gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "profile.h"
  29 #include "predict.h"
  30 #include "vec.h"
  31 #include "hashtab.h"
  32 #include "hash-set.h"
  33 #include "machmode.h"
  34 #include "input.h"
  35 #include "function.h"
  36 #include "dominance.h"
  37 #include "cfg.h"
  38 #include "cfgrtl.h"
  39 #include "basic-block.h"
  40 #include "cfgloop.h"
  41 #include "params.h"
  42 #include "expr.h"
  43 #include "hash-table.h"
  44 #include "recog.h"
  45 #include "target.h"
  46 #include "dumpfile.h"
  47
  48 /* This pass performs loop unrolling.  We only perform this
  49    optimization on innermost loops (with single exception) because
  50    the impact on performance is greatest here, and we want to avoid
  51    unnecessary code size growth.  The gain is caused by greater sequentiality
  52    of code, better code to optimize for further passes and in some cases
  53    by fewer testings of exit conditions.  The main problem is code growth,
  54    that impacts performance negatively due to effect of caches.
  55
  56    What we do:
  57
  58    -- unrolling of loops that roll constant times; this is almost always
  59       win, as we get rid of exit condition tests.
  60    -- unrolling of loops that roll number of times that we can compute
  61       in runtime; we also get rid of exit condition tests here, but there
  62       is the extra expense for calculating the number of iterations
  63    -- simple unrolling of remaining loops; this is performed only if we
  64       are asked to, as the gain is questionable in this case and often
  65       it may even slow down the code
  66    For more detailed descriptions of each of those, see comments at
  67    appropriate function below.
  68
  69    There is a lot of parameters (defined and described in params.def) that
  70    control how much we unroll.
  71
  72    ??? A great problem is that we don't have a good way how to determine
  73    how many times we should unroll the loop; the experiments I have made
  74    showed that this choice may affect performance in order of several %.
  75    */
  76
  77 /* Information about induction variables to split.  */
  78
  79 struct iv_to_split
  80 {
  81   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  82   rtx orig_var;         /* The variable (register) for the IV before split.  */
  83   rtx base_var;         /* The variable on that the values in the further
  84                            iterations are based.  */
  85   rtx step;             /* Step of the induction variable.  */
  86   struct iv_to_split *next; /* Next entry in walking order.  */
  87 };
  88
  89 /* Information about accumulators to expand.  */
  90
  91 struct var_to_expand
  92 {
  93   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  94   rtx reg;                         /* The accumulator which is expanded.  */
  95   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  96   struct var_to_expand *next;      /* Next entry in walking order.  */
  97   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  98                                       or multiplication.  */
  99   int expansion_count;             /* Count the number of expansions generated so far.  */
 100   int reuse_expansion;             /* The expansion we intend to reuse to expand
 101                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 102                                       the original accumulator.  Else use
 103                                       var_expansions[REUSE_EXPANSION - 1].  */
 104 };
 105
 106 /* Hashtable helper for iv_to_split.  */
 107
 108 struct iv_split_hasher : typed_free_remove <iv_to_split>
 109 {
 110   typedef iv_to_split value_type;
 111   typedef iv_to_split compare_type;
 112   static inline hashval_t hash (const value_type *);
 113   static inline bool equal (const value_type *, const compare_type *);
 114 };
 115
 116
 117 /* A hash function for information about insns to split.  */
 118
 119 inline hashval_t
 120 iv_split_hasher::hash (const value_type *ivts)
 121 {
 122   return (hashval_t) INSN_UID (ivts->insn);
 123 }
 124
 125 /* An equality functions for information about insns to split.  */
 126
 127 inline bool
 128 iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
 129 {
 130   return i1->insn == i2->insn;
 131 }
 132
 133 /* Hashtable helper for iv_to_split.  */
 134
 135 struct var_expand_hasher : typed_free_remove <var_to_expand>
 136 {
 137   typedef var_to_expand value_type;
 138   typedef var_to_expand compare_type;
 139   static inline hashval_t hash (const value_type *);
 140   static inline bool equal (const value_type *, const compare_type *);
 141 };
 142
 143 /* Return a hash for VES.  */
 144
 145 inline hashval_t
 146 var_expand_hasher::hash (const value_type *ves)
 147 {
 148   return (hashval_t) INSN_UID (ves->insn);
 149 }
 150
 151 /* Return true if I1 and I2 refer to the same instruction.  */
 152
 153 inline bool
 154 var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
 155 {
 156   return i1->insn == i2->insn;
 157 }
 158
 159 /* Information about optimization applied in
 160    the unrolled loop.  */
 161
 162 struct opt_info
 163 {
 164   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 165                                                   split.  */
 166   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 167   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 168   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 169                                         insns with accumulators to expand.  */
 170   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 171   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 172   unsigned first_new_block;        /* The first basic block that was
 173                                       duplicated.  */
 174   basic_block loop_exit;           /* The loop exit basic block.  */
 175   basic_block loop_preheader;      /* The loop preheader basic block.  */
 176 };
 177
 178 static void decide_unroll_stupid (struct loop *, int);
 179 static void decide_unroll_constant_iterations (struct loop *, int);
 180 static void decide_unroll_runtime_iterations (struct loop *, int);
 181 static void unroll_loop_stupid (struct loop *);
 182 static void decide_unrolling (int);
 183 static void unroll_loop_constant_iterations (struct loop *);
 184 static void unroll_loop_runtime_iterations (struct loop *);
 185 static struct opt_info *analyze_insns_in_loop (struct loop *);
 186 static void opt_info_start_duplication (struct opt_info *);
 187 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 188 static void free_opt_info (struct opt_info *);
 189 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 190 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 191 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 192 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 193 static void insert_var_expansion_initialization (struct var_to_expand *,
 194                                                  basic_block);
 195 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 196                                              basic_block);
 197 static rtx get_expansion (struct var_to_expand *);
 198
 199 /* Emit a message summarizing the unroll that will be
 200    performed for LOOP, along with the loop's location LOCUS, if
 201    appropriate given the dump or -fopt-info settings.  */
 202
 203 static void
 204 report_unroll (struct loop *loop, location_t locus)
 205 {
 206   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 207
 208   if (loop->lpt_decision.decision == LPT_NONE)
 209     return;
 210
 211   if (!dump_enabled_p ())
 212     return;
 213
 214   dump_printf_loc (report_flags, locus,
 215                    "loop unrolled %d times",
 216                    loop->lpt_decision.times);
 217   if (profile_info)
 218     dump_printf (report_flags,
 219                  " (header execution count %d)",
 220                  (int)loop->header->count);
 221
 222   dump_printf (report_flags, "\n");
 223 }
 224
 225 /* Decide whether unroll loops and how much.  */
 226 static void
 227 decide_unrolling (int flags)
 228 {
 229   struct loop *loop;
 230
 231   /* Scan the loops, inner ones first.  */
 232   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 233     {
 234       loop->lpt_decision.decision = LPT_NONE;
 235       location_t locus = get_loop_location (loop);
 236
 237       if (dump_enabled_p ())
 238         dump_printf_loc (TDF_RTL, locus,
 239                          ";; *** Considering loop %d at BB %d for "
 240                          "unrolling ***\n",
 241                          loop->num, loop->header->index);
 242
 243       /* Do not peel cold areas.  */
 244       if (optimize_loop_for_size_p (loop))
 245         {
 246           if (dump_file)
 247             fprintf (dump_file, ";; Not considering loop, cold area\n");
 248           continue;
 249         }
 250
 251       /* Can the loop be manipulated?  */
 252       if (!can_duplicate_loop_p (loop))
 253         {
 254           if (dump_file)
 255             fprintf (dump_file,
 256                      ";; Not considering loop, cannot duplicate\n");
 257           continue;
 258         }
 259
 260       /* Skip non-innermost loops.  */
 261       if (loop->inner)
 262         {
 263           if (dump_file)
 264             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 265           continue;
 266         }
 267
 268       loop->ninsns = num_loop_insns (loop);
 269       loop->av_ninsns = average_num_loop_insns (loop);
 270
 271       /* Try transformations one by one in decreasing order of
 272          priority.  */
 273
 274       decide_unroll_constant_iterations (loop, flags);
 275       if (loop->lpt_decision.decision == LPT_NONE)
 276         decide_unroll_runtime_iterations (loop, flags);
 277       if (loop->lpt_decision.decision == LPT_NONE)
 278         decide_unroll_stupid (loop, flags);
 279
 280       report_unroll (loop, locus);
 281     }
 282 }
 283
 284 /* Unroll LOOPS.  */
 285 void
 286 unroll_loops (int flags)
 287 {
 288   struct loop *loop;
 289   bool changed = false;
 290
 291   /* Now decide rest of unrolling.  */
 292   decide_unrolling (flags);
 293
 294   /* Scan the loops, inner ones first.  */
 295   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 296     {
 297       /* And perform the appropriate transformations.  */
 298       switch (loop->lpt_decision.decision)
 299         {
 300         case LPT_UNROLL_CONSTANT:
 301           unroll_loop_constant_iterations (loop);
 302           changed = true;
 303           break;
 304         case LPT_UNROLL_RUNTIME:
 305           unroll_loop_runtime_iterations (loop);
 306           changed = true;
 307           break;
 308         case LPT_UNROLL_STUPID:
 309           unroll_loop_stupid (loop);
 310           changed = true;
 311           break;
 312         case LPT_NONE:
 313           break;
 314         default:
 315           gcc_unreachable ();
 316         }
 317     }
 318
 319     if (changed)
 320       {
 321         calculate_dominance_info (CDI_DOMINATORS);
 322         fix_loop_structure (NULL);
 323       }
 324
 325   iv_analysis_done ();
 326 }
 327
 328 /* Check whether exit of the LOOP is at the end of loop body.  */
 329
 330 static bool
 331 loop_exit_at_end_p (struct loop *loop)
 332 {
 333   struct niter_desc *desc = get_simple_loop_desc (loop);
 334   rtx_insn *insn;
 335
 336   /* We should never have conditional in latch block.  */
 337   gcc_assert (desc->in_edge->dest != loop->header);
 338
 339   if (desc->in_edge->dest != loop->latch)
 340     return false;
 341
 342   /* Check that the latch is empty.  */
 343   FOR_BB_INSNS (loop->latch, insn)
 344     {
 345       if (INSN_P (insn) && active_insn_p (insn))
 346         return false;
 347     }
 348
 349   return true;
 350 }
 351
 352 /* Decide whether to unroll LOOP iterating constant number of times
 353    and how much.  */
 354
 355 static void
 356 decide_unroll_constant_iterations (struct loop *loop, int flags)
 357 {
 358   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 359   struct niter_desc *desc;
 360   widest_int iterations;
 361
 362   if (!(flags & UAP_UNROLL))
 363     {
 364       /* We were not asked to, just return back silently.  */
 365       return;
 366     }
 367
 368   if (dump_file)
 369     fprintf (dump_file,
 370              "\n;; Considering unrolling loop with constant "
 371              "number of iterations\n");
 372
 373   /* nunroll = total number of copies of the original loop body in
 374      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 375   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 376   nunroll_by_av
 377     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 378   if (nunroll > nunroll_by_av)
 379     nunroll = nunroll_by_av;
 380   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 381     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 382
 383   if (targetm.loop_unroll_adjust)
 384     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 385
 386   /* Skip big loops.  */
 387   if (nunroll <= 1)
 388     {
 389       if (dump_file)
 390         fprintf (dump_file, ";; Not considering loop, is too big\n");
 391       return;
 392     }
 393
 394   /* Check for simple loops.  */
 395   desc = get_simple_loop_desc (loop);
 396
 397   /* Check number of iterations.  */
 398   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 399     {
 400       if (dump_file)
 401         fprintf (dump_file,
 402                  ";; Unable to prove that the loop iterates constant times\n");
 403       return;
 404     }
 405
 406   /* Check whether the loop rolls enough to consider.
 407      Consult also loop bounds and profile; in the case the loop has more
 408      than one exit it may well loop less than determined maximal number
 409      of iterations.  */
 410   if (desc->niter < 2 * nunroll
 411       || ((get_estimated_loop_iterations (loop, &iterations)
 412            || get_max_loop_iterations (loop, &iterations))
 413           && wi::ltu_p (iterations, 2 * nunroll)))
 414     {
 415       if (dump_file)
 416         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 417       return;
 418     }
 419
 420   /* Success; now compute number of iterations to unroll.  We alter
 421      nunroll so that as few as possible copies of loop body are
 422      necessary, while still not decreasing the number of unrollings
 423      too much (at most by 1).  */
 424   best_copies = 2 * nunroll + 10;
 425
 426   i = 2 * nunroll + 2;
 427   if (i - 1 >= desc->niter)
 428     i = desc->niter - 2;
 429
 430   for (; i >= nunroll - 1; i--)
 431     {
 432       unsigned exit_mod = desc->niter % (i + 1);
 433
 434       if (!loop_exit_at_end_p (loop))
 435         n_copies = exit_mod + i + 1;
 436       else if (exit_mod != (unsigned) i
 437                || desc->noloop_assumptions != NULL_RTX)
 438         n_copies = exit_mod + i + 2;
 439       else
 440         n_copies = i + 1;
 441
 442       if (n_copies < best_copies)
 443         {
 444           best_copies = n_copies;
 445           best_unroll = i;
 446         }
 447     }
 448
 449   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 450   loop->lpt_decision.times = best_unroll;
 451 }
 452
 453 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 454    The transformation does this:
 455
 456    for (i = 0; i < 102; i++)
 457      body;
 458
 459    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 460
 461    i = 0;
 462    body; i++;
 463    body; i++;
 464    while (i < 102)
 465      {
 466        body; i++;
 467        body; i++;
 468        body; i++;
 469        body; i++;
 470      }
 471   */
 472 static void
 473 unroll_loop_constant_iterations (struct loop *loop)
 474 {
 475   unsigned HOST_WIDE_INT niter;
 476   unsigned exit_mod;
 477   sbitmap wont_exit;
 478   unsigned i;
 479   edge e;
 480   unsigned max_unroll = loop->lpt_decision.times;
 481   struct niter_desc *desc = get_simple_loop_desc (loop);
 482   bool exit_at_end = loop_exit_at_end_p (loop);
 483   struct opt_info *opt_info = NULL;
 484   bool ok;
 485
 486   niter = desc->niter;
 487
 488   /* Should not get here (such loop should be peeled instead).  */
 489   gcc_assert (niter > max_unroll + 1);
 490
 491   exit_mod = niter % (max_unroll + 1);
 492
 493   wont_exit = sbitmap_alloc (max_unroll + 1);
 494   bitmap_ones (wont_exit);
 495
 496   auto_vec<edge> remove_edges;
 497   if (flag_split_ivs_in_unroller
 498       || flag_variable_expansion_in_unroller)
 499     opt_info = analyze_insns_in_loop (loop);
 500
 501   if (!exit_at_end)
 502     {
 503       /* The exit is not at the end of the loop; leave exit test
 504          in the first copy, so that the loops that start with test
 505          of exit condition have continuous body after unrolling.  */
 506
 507       if (dump_file)
 508         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 509
 510       /* Peel exit_mod iterations.  */
 511       bitmap_clear_bit (wont_exit, 0);
 512       if (desc->noloop_assumptions)
 513         bitmap_clear_bit (wont_exit, 1);
 514
 515       if (exit_mod)
 516         {
 517           opt_info_start_duplication (opt_info);
 518           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 519                                               exit_mod,
 520                                               wont_exit, desc->out_edge,
 521                                               &remove_edges,
 522                                               DLTHE_FLAG_UPDATE_FREQ
 523                                               | (opt_info && exit_mod > 1
 524                                                  ? DLTHE_RECORD_COPY_NUMBER
 525                                                    : 0));
 526           gcc_assert (ok);
 527
 528           if (opt_info && exit_mod > 1)
 529             apply_opt_in_copies (opt_info, exit_mod, false, false);
 530
 531           desc->noloop_assumptions = NULL_RTX;
 532           desc->niter -= exit_mod;
 533           loop->nb_iterations_upper_bound -= exit_mod;
 534           if (loop->any_estimate
 535               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 536             loop->nb_iterations_estimate -= exit_mod;
 537           else
 538             loop->any_estimate = false;
 539         }
 540
 541       bitmap_set_bit (wont_exit, 1);
 542     }
 543   else
 544     {
 545       /* Leave exit test in last copy, for the same reason as above if
 546          the loop tests the condition at the end of loop body.  */
 547
 548       if (dump_file)
 549         fprintf (dump_file, ";; Condition at end of loop.\n");
 550
 551       /* We know that niter >= max_unroll + 2; so we do not need to care of
 552          case when we would exit before reaching the loop.  So just peel
 553          exit_mod + 1 iterations.  */
 554       if (exit_mod != max_unroll
 555           || desc->noloop_assumptions)
 556         {
 557           bitmap_clear_bit (wont_exit, 0);
 558           if (desc->noloop_assumptions)
 559             bitmap_clear_bit (wont_exit, 1);
 560
 561           opt_info_start_duplication (opt_info);
 562           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 563                                               exit_mod + 1,
 564                                               wont_exit, desc->out_edge,
 565                                               &remove_edges,
 566                                               DLTHE_FLAG_UPDATE_FREQ
 567                                               | (opt_info && exit_mod > 0
 568                                                  ? DLTHE_RECORD_COPY_NUMBER
 569                                                    : 0));
 570           gcc_assert (ok);
 571
 572           if (opt_info && exit_mod > 0)
 573             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 574
 575           desc->niter -= exit_mod + 1;
 576           loop->nb_iterations_upper_bound -= exit_mod + 1;
 577           if (loop->any_estimate
 578               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 579             loop->nb_iterations_estimate -= exit_mod + 1;
 580           else
 581             loop->any_estimate = false;
 582           desc->noloop_assumptions = NULL_RTX;
 583
 584           bitmap_set_bit (wont_exit, 0);
 585           bitmap_set_bit (wont_exit, 1);
 586         }
 587
 588       bitmap_clear_bit (wont_exit, max_unroll);
 589     }
 590
 591   /* Now unroll the loop.  */
 592
 593   opt_info_start_duplication (opt_info);
 594   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 595                                       max_unroll,
 596                                       wont_exit, desc->out_edge,
 597                                       &remove_edges,
 598                                       DLTHE_FLAG_UPDATE_FREQ
 599                                       | (opt_info
 600                                          ? DLTHE_RECORD_COPY_NUMBER
 601                                            : 0));
 602   gcc_assert (ok);
 603
 604   if (opt_info)
 605     {
 606       apply_opt_in_copies (opt_info, max_unroll, true, true);
 607       free_opt_info (opt_info);
 608     }
 609
 610   free (wont_exit);
 611
 612   if (exit_at_end)
 613     {
 614       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 615       /* Find a new in and out edge; they are in the last copy we have made.  */
 616
 617       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 618         {
 619           desc->out_edge = EDGE_SUCC (exit_block, 0);
 620           desc->in_edge = EDGE_SUCC (exit_block, 1);
 621         }
 622       else
 623         {
 624           desc->out_edge = EDGE_SUCC (exit_block, 1);
 625           desc->in_edge = EDGE_SUCC (exit_block, 0);
 626         }
 627     }
 628
 629   desc->niter /= max_unroll + 1;
 630   loop->nb_iterations_upper_bound
 631     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 632   if (loop->any_estimate)
 633     loop->nb_iterations_estimate
 634       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 635   desc->niter_expr = GEN_INT (desc->niter);
 636
 637   /* Remove the edges.  */
 638   FOR_EACH_VEC_ELT (remove_edges, i, e)
 639     remove_path (e);
 640
 641   if (dump_file)
 642     fprintf (dump_file,
 643              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 644              max_unroll, num_loop_insns (loop));
 645 }
 646
 647 /* Decide whether to unroll LOOP iterating runtime computable number of times
 648    and how much.  */
 649 static void
 650 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 651 {
 652   unsigned nunroll, nunroll_by_av, i;
 653   struct niter_desc *desc;
 654   widest_int iterations;
 655
 656   if (!(flags & UAP_UNROLL))
 657     {
 658       /* We were not asked to, just return back silently.  */
 659       return;
 660     }
 661
 662   if (dump_file)
 663     fprintf (dump_file,
 664              "\n;; Considering unrolling loop with runtime "
 665              "computable number of iterations\n");
 666
 667   /* nunroll = total number of copies of the original loop body in
 668      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 669   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 670   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 671   if (nunroll > nunroll_by_av)
 672     nunroll = nunroll_by_av;
 673   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 674     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 675
 676   if (targetm.loop_unroll_adjust)
 677     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 678
 679   /* Skip big loops.  */
 680   if (nunroll <= 1)
 681     {
 682       if (dump_file)
 683         fprintf (dump_file, ";; Not considering loop, is too big\n");
 684       return;
 685     }
 686
 687   /* Check for simple loops.  */
 688   desc = get_simple_loop_desc (loop);
 689
 690   /* Check simpleness.  */
 691   if (!desc->simple_p || desc->assumptions)
 692     {
 693       if (dump_file)
 694         fprintf (dump_file,
 695                  ";; Unable to prove that the number of iterations "
 696                  "can be counted in runtime\n");
 697       return;
 698     }
 699
 700   if (desc->const_iter)
 701     {
 702       if (dump_file)
 703         fprintf (dump_file, ";; Loop iterates constant times\n");
 704       return;
 705     }
 706
 707   /* Check whether the loop rolls.  */
 708   if ((get_estimated_loop_iterations (loop, &iterations)
 709        || get_max_loop_iterations (loop, &iterations))
 710       && wi::ltu_p (iterations, 2 * nunroll))
 711     {
 712       if (dump_file)
 713         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 714       return;
 715     }
 716
 717   /* Success; now force nunroll to be power of 2, as we are unable to
 718      cope with overflows in computation of number of iterations.  */
 719   for (i = 1; 2 * i <= nunroll; i *= 2)
 720     continue;
 721
 722   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 723   loop->lpt_decision.times = i - 1;
 724 }
 725
 726 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 727    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 728    and NULL is returned instead.  */
 729
 730 basic_block
 731 split_edge_and_insert (edge e, rtx_insn *insns)
 732 {
 733   basic_block bb;
 734
 735   if (!insns)
 736     return NULL;
 737   bb = split_edge (e);
 738   emit_insn_after (insns, BB_END (bb));
 739
 740   /* ??? We used to assume that INSNS can contain control flow insns, and
 741      that we had to try to find sub basic blocks in BB to maintain a valid
 742      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 743      and call break_superblocks when going out of cfglayout mode.  But it
 744      turns out that this never happens; and that if it does ever happen,
 745      the verify_flow_info at the end of the RTL loop passes would fail.
 746
 747      There are two reasons why we expected we could have control flow insns
 748      in INSNS.  The first is when a comparison has to be done in parts, and
 749      the second is when the number of iterations is computed for loops with
 750      the number of iterations known at runtime.  In both cases, test cases
 751      to get control flow in INSNS appear to be impossible to construct:
 752
 753       * If do_compare_rtx_and_jump needs several branches to do comparison
 754         in a mode that needs comparison by parts, we cannot analyze the
 755         number of iterations of the loop, and we never get to unrolling it.
 756
 757       * The code in expand_divmod that was suspected to cause creation of
 758         branching code seems to be only accessed for signed division.  The
 759         divisions used by # of iterations analysis are always unsigned.
 760         Problems might arise on architectures that emits branching code
 761         for some operations that may appear in the unroller (especially
 762         for division), but we have no such architectures.
 763
 764      Considering all this, it was decided that we should for now assume
 765      that INSNS can in theory contain control flow insns, but in practice
 766      it never does.  So we don't handle the theoretical case, and should
 767      a real failure ever show up, we have a pretty good clue for how to
 768      fix it.  */
 769
 770   return bb;
 771 }
 772
 773 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 774    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 775    in order to create a jump.  */
 776
 777 static rtx_insn *
 778 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
 779                       rtx_insn *cinsn)
 780 {
 781   rtx_insn *seq, *jump;
 782   rtx cond;
 783   machine_mode mode;
 784
 785   mode = GET_MODE (op0);
 786   if (mode == VOIDmode)
 787     mode = GET_MODE (op1);
 788
 789   start_sequence ();
 790   if (GET_MODE_CLASS (mode) == MODE_CC)
 791     {
 792       /* A hack -- there seems to be no easy generic way how to make a
 793          conditional jump from a ccmode comparison.  */
 794       gcc_assert (cinsn);
 795       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 796       gcc_assert (GET_CODE (cond) == comp);
 797       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 798       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 799       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 800       jump = get_last_insn ();
 801       gcc_assert (JUMP_P (jump));
 802       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 803       LABEL_NUSES (JUMP_LABEL (jump))++;
 804       redirect_jump (jump, label, 0);
 805     }
 806   else
 807     {
 808       gcc_assert (!cinsn);
 809
 810       op0 = force_operand (op0, NULL_RTX);
 811       op1 = force_operand (op1, NULL_RTX);
 812       do_compare_rtx_and_jump (op0, op1, comp, 0,
 813                                mode, NULL_RTX, NULL_RTX, label, -1);
 814       jump = get_last_insn ();
 815       gcc_assert (JUMP_P (jump));
 816       JUMP_LABEL (jump) = label;
 817       LABEL_NUSES (label)++;
 818     }
 819   add_int_reg_note (jump, REG_BR_PROB, prob);
 820
 821   seq = get_insns ();
 822   end_sequence ();
 823
 824   return seq;
 825 }
 826
 827 /* Unroll LOOP for which we are able to count number of iterations in runtime
 828    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 829    extra care for case n < 0):
 830
 831    for (i = 0; i < n; i++)
 832      body;
 833
 834    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 835
 836    i = 0;
 837    mod = n % 4;
 838
 839    switch (mod)
 840      {
 841        case 3:
 842          body; i++;
 843        case 2:
 844          body; i++;
 845        case 1:
 846          body; i++;
 847        case 0: ;
 848      }
 849
 850    while (i < n)
 851      {
 852        body; i++;
 853        body; i++;
 854        body; i++;
 855        body; i++;
 856      }
 857    */
 858 static void
 859 unroll_loop_runtime_iterations (struct loop *loop)
 860 {
 861   rtx old_niter, niter, tmp;
 862   rtx_insn *init_code, *branch_code;
 863   unsigned i, j, p;
 864   basic_block preheader, *body, swtch, ezc_swtch;
 865   sbitmap wont_exit;
 866   int may_exit_copy;
 867   unsigned n_peel;
 868   edge e;
 869   bool extra_zero_check, last_may_exit;
 870   unsigned max_unroll = loop->lpt_decision.times;
 871   struct niter_desc *desc = get_simple_loop_desc (loop);
 872   bool exit_at_end = loop_exit_at_end_p (loop);
 873   struct opt_info *opt_info = NULL;
 874   bool ok;
 875
 876   if (flag_split_ivs_in_unroller
 877       || flag_variable_expansion_in_unroller)
 878     opt_info = analyze_insns_in_loop (loop);
 879
 880   /* Remember blocks whose dominators will have to be updated.  */
 881   auto_vec<basic_block> dom_bbs;
 882
 883   body = get_loop_body (loop);
 884   for (i = 0; i < loop->num_nodes; i++)
 885     {
 886       vec<basic_block> ldom;
 887       basic_block bb;
 888
 889       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 890       FOR_EACH_VEC_ELT (ldom, j, bb)
 891         if (!flow_bb_inside_loop_p (loop, bb))
 892           dom_bbs.safe_push (bb);
 893
 894       ldom.release ();
 895     }
 896   free (body);
 897
 898   if (!exit_at_end)
 899     {
 900       /* Leave exit in first copy (for explanation why see comment in
 901          unroll_loop_constant_iterations).  */
 902       may_exit_copy = 0;
 903       n_peel = max_unroll - 1;
 904       extra_zero_check = true;
 905       last_may_exit = false;
 906     }
 907   else
 908     {
 909       /* Leave exit in last copy (for explanation why see comment in
 910          unroll_loop_constant_iterations).  */
 911       may_exit_copy = max_unroll;
 912       n_peel = max_unroll;
 913       extra_zero_check = false;
 914       last_may_exit = true;
 915     }
 916
 917   /* Get expression for number of iterations.  */
 918   start_sequence ();
 919   old_niter = niter = gen_reg_rtx (desc->mode);
 920   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 921   if (tmp != niter)
 922     emit_move_insn (niter, tmp);
 923
 924   /* Count modulo by ANDing it with max_unroll; we use the fact that
 925      the number of unrollings is a power of two, and thus this is correct
 926      even if there is overflow in the computation.  */
 927   niter = expand_simple_binop (desc->mode, AND,
 928                                niter, gen_int_mode (max_unroll, desc->mode),
 929                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 930
 931   init_code = get_insns ();
 932   end_sequence ();
 933   unshare_all_rtl_in_chain (init_code);
 934
 935   /* Precondition the loop.  */
 936   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 937
 938   auto_vec<edge> remove_edges;
 939
 940   wont_exit = sbitmap_alloc (max_unroll + 2);
 941
 942   /* Peel the first copy of loop body (almost always we must leave exit test
 943      here; the only exception is when we have extra zero check and the number
 944      of iterations is reliable.  Also record the place of (possible) extra
 945      zero check.  */
 946   bitmap_clear (wont_exit);
 947   if (extra_zero_check
 948       && !desc->noloop_assumptions)
 949     bitmap_set_bit (wont_exit, 1);
 950   ezc_swtch = loop_preheader_edge (loop)->src;
 951   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 952                                       1, wont_exit, desc->out_edge,
 953                                       &remove_edges,
 954                                       DLTHE_FLAG_UPDATE_FREQ);
 955   gcc_assert (ok);
 956
 957   /* Record the place where switch will be built for preconditioning.  */
 958   swtch = split_edge (loop_preheader_edge (loop));
 959
 960   for (i = 0; i < n_peel; i++)
 961     {
 962       /* Peel the copy.  */
 963       bitmap_clear (wont_exit);
 964       if (i != n_peel - 1 || !last_may_exit)
 965         bitmap_set_bit (wont_exit, 1);
 966       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 967                                           1, wont_exit, desc->out_edge,
 968                                           &remove_edges,
 969                                           DLTHE_FLAG_UPDATE_FREQ);
 970       gcc_assert (ok);
 971
 972       /* Create item for switch.  */
 973       j = n_peel - i - (extra_zero_check ? 0 : 1);
 974       p = REG_BR_PROB_BASE / (i + 2);
 975
 976       preheader = split_edge (loop_preheader_edge (loop));
 977       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 978                                           block_label (preheader), p,
 979                                           NULL);
 980
 981       /* We rely on the fact that the compare and jump cannot be optimized out,
 982          and hence the cfg we create is correct.  */
 983       gcc_assert (branch_code != NULL_RTX);
 984
 985       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 986       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 987       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 988       e = make_edge (swtch, preheader,
 989                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 990       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 991       e->probability = p;
 992     }
 993
 994   if (extra_zero_check)
 995     {
 996       /* Add branch for zero iterations.  */
 997       p = REG_BR_PROB_BASE / (max_unroll + 1);
 998       swtch = ezc_swtch;
 999       preheader = split_edge (loop_preheader_edge (loop));
1000       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1001                                           block_label (preheader), p,
1002                                           NULL);
1003       gcc_assert (branch_code != NULL_RTX);
1004
1005       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1006       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1007       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1008       e = make_edge (swtch, preheader,
1009                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1010       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1011       e->probability = p;
1012     }
1013
1014   /* Recount dominators for outer blocks.  */
1015   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1016
1017   /* And unroll loop.  */
1018
1019   bitmap_ones (wont_exit);
1020   bitmap_clear_bit (wont_exit, may_exit_copy);
1021   opt_info_start_duplication (opt_info);
1022
1023   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1024                                       max_unroll,
1025                                       wont_exit, desc->out_edge,
1026                                       &remove_edges,
1027                                       DLTHE_FLAG_UPDATE_FREQ
1028                                       | (opt_info
1029                                          ? DLTHE_RECORD_COPY_NUMBER
1030                                            : 0));
1031   gcc_assert (ok);
1032
1033   if (opt_info)
1034     {
1035       apply_opt_in_copies (opt_info, max_unroll, true, true);
1036       free_opt_info (opt_info);
1037     }
1038
1039   free (wont_exit);
1040
1041   if (exit_at_end)
1042     {
1043       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1044       /* Find a new in and out edge; they are in the last copy we have
1045          made.  */
1046
1047       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1048         {
1049           desc->out_edge = EDGE_SUCC (exit_block, 0);
1050           desc->in_edge = EDGE_SUCC (exit_block, 1);
1051         }
1052       else
1053         {
1054           desc->out_edge = EDGE_SUCC (exit_block, 1);
1055           desc->in_edge = EDGE_SUCC (exit_block, 0);
1056         }
1057     }
1058
1059   /* Remove the edges.  */
1060   FOR_EACH_VEC_ELT (remove_edges, i, e)
1061     remove_path (e);
1062
1063   /* We must be careful when updating the number of iterations due to
1064      preconditioning and the fact that the value must be valid at entry
1065      of the loop.  After passing through the above code, we see that
1066      the correct new number of iterations is this:  */
1067   gcc_assert (!desc->const_iter);
1068   desc->niter_expr =
1069     simplify_gen_binary (UDIV, desc->mode, old_niter,
1070                          gen_int_mode (max_unroll + 1, desc->mode));
1071   loop->nb_iterations_upper_bound
1072     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1073   if (loop->any_estimate)
1074     loop->nb_iterations_estimate
1075       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1076   if (exit_at_end)
1077     {
1078       desc->niter_expr =
1079         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1080       desc->noloop_assumptions = NULL_RTX;
1081       --loop->nb_iterations_upper_bound;
1082       if (loop->any_estimate
1083           && loop->nb_iterations_estimate != 0)
1084         --loop->nb_iterations_estimate;
1085       else
1086         loop->any_estimate = false;
1087     }
1088
1089   if (dump_file)
1090     fprintf (dump_file,
1091              ";; Unrolled loop %d times, counting # of iterations "
1092              "in runtime, %i insns\n",
1093              max_unroll, num_loop_insns (loop));
1094 }
1095
1096 /* Decide whether to unroll LOOP stupidly and how much.  */
1097 static void
1098 decide_unroll_stupid (struct loop *loop, int flags)
1099 {
1100   unsigned nunroll, nunroll_by_av, i;
1101   struct niter_desc *desc;
1102   widest_int iterations;
1103
1104   if (!(flags & UAP_UNROLL_ALL))
1105     {
1106       /* We were not asked to, just return back silently.  */
1107       return;
1108     }
1109
1110   if (dump_file)
1111     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1112
1113   /* nunroll = total number of copies of the original loop body in
1114      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1115   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1116   nunroll_by_av
1117     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1118   if (nunroll > nunroll_by_av)
1119     nunroll = nunroll_by_av;
1120   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1121     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1122
1123   if (targetm.loop_unroll_adjust)
1124     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1125
1126   /* Skip big loops.  */
1127   if (nunroll <= 1)
1128     {
1129       if (dump_file)
1130         fprintf (dump_file, ";; Not considering loop, is too big\n");
1131       return;
1132     }
1133
1134   /* Check for simple loops.  */
1135   desc = get_simple_loop_desc (loop);
1136
1137   /* Check simpleness.  */
1138   if (desc->simple_p && !desc->assumptions)
1139     {
1140       if (dump_file)
1141         fprintf (dump_file, ";; The loop is simple\n");
1142       return;
1143     }
1144
1145   /* Do not unroll loops with branches inside -- it increases number
1146      of mispredicts.
1147      TODO: this heuristic needs tunning; call inside the loop body
1148      is also relatively good reason to not unroll.  */
1149   if (num_loop_branches (loop) > 1)
1150     {
1151       if (dump_file)
1152         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1153       return;
1154     }
1155
1156   /* Check whether the loop rolls.  */
1157   if ((get_estimated_loop_iterations (loop, &iterations)
1158        || get_max_loop_iterations (loop, &iterations))
1159       && wi::ltu_p (iterations, 2 * nunroll))
1160     {
1161       if (dump_file)
1162         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1163       return;
1164     }
1165
1166   /* Success.  Now force nunroll to be power of 2, as it seems that this
1167      improves results (partially because of better alignments, partially
1168      because of some dark magic).  */
1169   for (i = 1; 2 * i <= nunroll; i *= 2)
1170     continue;
1171
1172   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1173   loop->lpt_decision.times = i - 1;
1174 }
1175
1176 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1177
1178    while (cond)
1179      body;
1180
1181    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1182
1183    while (cond)
1184      {
1185        body;
1186        if (!cond) break;
1187        body;
1188        if (!cond) break;
1189        body;
1190        if (!cond) break;
1191        body;
1192      }
1193    */
1194 static void
1195 unroll_loop_stupid (struct loop *loop)
1196 {
1197   sbitmap wont_exit;
1198   unsigned nunroll = loop->lpt_decision.times;
1199   struct niter_desc *desc = get_simple_loop_desc (loop);
1200   struct opt_info *opt_info = NULL;
1201   bool ok;
1202
1203   if (flag_split_ivs_in_unroller
1204       || flag_variable_expansion_in_unroller)
1205     opt_info = analyze_insns_in_loop (loop);
1206
1207
1208   wont_exit = sbitmap_alloc (nunroll + 1);
1209   bitmap_clear (wont_exit);
1210   opt_info_start_duplication (opt_info);
1211
1212   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1213                                       nunroll, wont_exit,
1214                                       NULL, NULL,
1215                                       DLTHE_FLAG_UPDATE_FREQ
1216                                       | (opt_info
1217                                          ? DLTHE_RECORD_COPY_NUMBER
1218                                            : 0));
1219   gcc_assert (ok);
1220
1221   if (opt_info)
1222     {
1223       apply_opt_in_copies (opt_info, nunroll, true, true);
1224       free_opt_info (opt_info);
1225     }
1226
1227   free (wont_exit);
1228
1229   if (desc->simple_p)
1230     {
1231       /* We indeed may get here provided that there are nontrivial assumptions
1232          for a loop to be really simple.  We could update the counts, but the
1233          problem is that we are unable to decide which exit will be taken
1234          (not really true in case the number of iterations is constant,
1235          but no one will do anything with this information, so we do not
1236          worry about it).  */
1237       desc->simple_p = false;
1238     }
1239
1240   if (dump_file)
1241     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1242              nunroll, num_loop_insns (loop));
1243 }
1244
1245 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1246    Set *DEBUG_USES to the number of debug insns that reference the
1247    variable.  */
1248
1249 static bool
1250 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1251                                   int *debug_uses)
1252 {
1253   basic_block *body, bb;
1254   unsigned i;
1255   int count_ref = 0;
1256   rtx_insn *insn;
1257
1258   body = get_loop_body (loop);
1259   for (i = 0; i < loop->num_nodes; i++)
1260     {
1261       bb = body[i];
1262
1263       FOR_BB_INSNS (bb, insn)
1264         if (!rtx_referenced_p (reg, insn))
1265           continue;
1266         else if (DEBUG_INSN_P (insn))
1267           ++*debug_uses;
1268         else if (++count_ref > 1)
1269           break;
1270     }
1271   free (body);
1272   return (count_ref  == 1);
1273 }
1274
1275 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1276
1277 static void
1278 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1279 {
1280   basic_block *body, bb;
1281   unsigned i;
1282   rtx_insn *insn;
1283
1284   body = get_loop_body (loop);
1285   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1286     {
1287       bb = body[i];
1288
1289       FOR_BB_INSNS (bb, insn)
1290         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1291           continue;
1292         else
1293           {
1294             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1295                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1296             if (!--debug_uses)
1297               break;
1298           }
1299     }
1300   free (body);
1301 }
1302
1303 /* Determine whether INSN contains an accumulator
1304    which can be expanded into separate copies,
1305    one for each copy of the LOOP body.
1306
1307    for (i = 0 ; i < n; i++)
1308      sum += a[i];
1309
1310    ==>
1311
1312    sum += a[i]
1313    ....
1314    i = i+1;
1315    sum1 += a[i]
1316    ....
1317    i = i+1
1318    sum2 += a[i];
1319    ....
1320
1321    Return NULL if INSN contains no opportunity for expansion of accumulator.
1322    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1323    information and return a pointer to it.
1324 */
1325
1326 static struct var_to_expand *
1327 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1328 {
1329   rtx set, dest, src;
1330   struct var_to_expand *ves;
1331   unsigned accum_pos;
1332   enum rtx_code code;
1333   int debug_uses = 0;
1334
1335   set = single_set (insn);
1336   if (!set)
1337     return NULL;
1338
1339   dest = SET_DEST (set);
1340   src = SET_SRC (set);
1341   code = GET_CODE (src);
1342
1343   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1344     return NULL;
1345
1346   if (FLOAT_MODE_P (GET_MODE (dest)))
1347     {
1348       if (!flag_associative_math)
1349         return NULL;
1350       /* In the case of FMA, we're also changing the rounding.  */
1351       if (code == FMA && !flag_unsafe_math_optimizations)
1352         return NULL;
1353     }
1354
1355   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1356      in MD.  But if there is no optab to generate the insn, we can not
1357      perform the variable expansion.  This can happen if an MD provides
1358      an insn but not a named pattern to generate it, for example to avoid
1359      producing code that needs additional mode switches like for x87/mmx.
1360
1361      So we check have_insn_for which looks for an optab for the operation
1362      in SRC.  If it doesn't exist, we can't perform the expansion even
1363      though INSN is valid.  */
1364   if (!have_insn_for (code, GET_MODE (src)))
1365     return NULL;
1366
1367   if (!REG_P (dest)
1368       && !(GET_CODE (dest) == SUBREG
1369            && REG_P (SUBREG_REG (dest))))
1370     return NULL;
1371
1372   /* Find the accumulator use within the operation.  */
1373   if (code == FMA)
1374     {
1375       /* We only support accumulation via FMA in the ADD position.  */
1376       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1377         return NULL;
1378       accum_pos = 2;
1379     }
1380   else if (rtx_equal_p (dest, XEXP (src, 0)))
1381     accum_pos = 0;
1382   else if (rtx_equal_p (dest, XEXP (src, 1)))
1383     {
1384       /* The method of expansion that we are using; which includes the
1385          initialization of the expansions with zero and the summation of
1386          the expansions at the end of the computation will yield wrong
1387          results for (x = something - x) thus avoid using it in that case.  */
1388       if (code == MINUS)
1389         return NULL;
1390       accum_pos = 1;
1391     }
1392   else
1393     return NULL;
1394
1395   /* It must not otherwise be used.  */
1396   if (code == FMA)
1397     {
1398       if (rtx_referenced_p (dest, XEXP (src, 0))
1399           || rtx_referenced_p (dest, XEXP (src, 1)))
1400         return NULL;
1401     }
1402   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1403     return NULL;
1404
1405   /* It must be used in exactly one insn.  */
1406   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1407     return NULL;
1408
1409   if (dump_file)
1410     {
1411       fprintf (dump_file, "\n;; Expanding Accumulator ");
1412       print_rtl (dump_file, dest);
1413       fprintf (dump_file, "\n");
1414     }
1415
1416   if (debug_uses)
1417     /* Instead of resetting the debug insns, we could replace each
1418        debug use in the loop with the sum or product of all expanded
1419        accummulators.  Since we'll only know of all expansions at the
1420        end, we'd have to keep track of which vars_to_expand a debug
1421        insn in the loop references, take note of each copy of the
1422        debug insn during unrolling, and when it's all done, compute
1423        the sum or product of each variable and adjust the original
1424        debug insn and each copy thereof.  What a pain!  */
1425     reset_debug_uses_in_loop (loop, dest, debug_uses);
1426
1427   /* Record the accumulator to expand.  */
1428   ves = XNEW (struct var_to_expand);
1429   ves->insn = insn;
1430   ves->reg = copy_rtx (dest);
1431   ves->var_expansions.create (1);
1432   ves->next = NULL;
1433   ves->op = GET_CODE (src);
1434   ves->expansion_count = 0;
1435   ves->reuse_expansion = 0;
1436   return ves;
1437 }
1438
1439 /* Determine whether there is an induction variable in INSN that
1440    we would like to split during unrolling.
1441
1442    I.e. replace
1443
1444    i = i + 1;
1445    ...
1446    i = i + 1;
1447    ...
1448    i = i + 1;
1449    ...
1450
1451    type chains by
1452
1453    i0 = i + 1
1454    ...
1455    i = i0 + 1
1456    ...
1457    i = i0 + 2
1458    ...
1459
1460    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1461    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1462    pointer to it.  */
1463
1464 static struct iv_to_split *
1465 analyze_iv_to_split_insn (rtx_insn *insn)
1466 {
1467   rtx set, dest;
1468   struct rtx_iv iv;
1469   struct iv_to_split *ivts;
1470   bool ok;
1471
1472   /* For now we just split the basic induction variables.  Later this may be
1473      extended for example by selecting also addresses of memory references.  */
1474   set = single_set (insn);
1475   if (!set)
1476     return NULL;
1477
1478   dest = SET_DEST (set);
1479   if (!REG_P (dest))
1480     return NULL;
1481
1482   if (!biv_p (insn, dest))
1483     return NULL;
1484
1485   ok = iv_analyze_result (insn, dest, &iv);
1486
1487   /* This used to be an assert under the assumption that if biv_p returns
1488      true that iv_analyze_result must also return true.  However, that
1489      assumption is not strictly correct as evidenced by pr25569.
1490
1491      Returning NULL when iv_analyze_result returns false is safe and
1492      avoids the problems in pr25569 until the iv_analyze_* routines
1493      can be fixed, which is apparently hard and time consuming
1494      according to their author.  */
1495   if (! ok)
1496     return NULL;
1497
1498   if (iv.step == const0_rtx
1499       || iv.mode != iv.extend_mode)
1500     return NULL;
1501
1502   /* Record the insn to split.  */
1503   ivts = XNEW (struct iv_to_split);
1504   ivts->insn = insn;
1505   ivts->orig_var = dest;
1506   ivts->base_var = NULL_RTX;
1507   ivts->step = iv.step;
1508   ivts->next = NULL;
1509
1510   return ivts;
1511 }
1512
1513 /* Determines which of insns in LOOP can be optimized.
1514    Return a OPT_INFO struct with the relevant hash tables filled
1515    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1516    is undefined for the return value.  */
1517
1518 static struct opt_info *
1519 analyze_insns_in_loop (struct loop *loop)
1520 {
1521   basic_block *body, bb;
1522   unsigned i;
1523   struct opt_info *opt_info = XCNEW (struct opt_info);
1524   rtx_insn *insn;
1525   struct iv_to_split *ivts = NULL;
1526   struct var_to_expand *ves = NULL;
1527   iv_to_split **slot1;
1528   var_to_expand **slot2;
1529   vec<edge> edges = get_loop_exit_edges (loop);
1530   edge exit;
1531   bool can_apply = false;
1532
1533   iv_analysis_loop_init (loop);
1534
1535   body = get_loop_body (loop);
1536
1537   if (flag_split_ivs_in_unroller)
1538     {
1539       opt_info->insns_to_split
1540         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1541       opt_info->iv_to_split_head = NULL;
1542       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1543     }
1544
1545   /* Record the loop exit bb and loop preheader before the unrolling.  */
1546   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1547
1548   if (edges.length () == 1)
1549     {
1550       exit = edges[0];
1551       if (!(exit->flags & EDGE_COMPLEX))
1552         {
1553           opt_info->loop_exit = split_edge (exit);
1554           can_apply = true;
1555         }
1556     }
1557
1558   if (flag_variable_expansion_in_unroller
1559       && can_apply)
1560     {
1561       opt_info->insns_with_var_to_expand
1562         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1563       opt_info->var_to_expand_head = NULL;
1564       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1565     }
1566
1567   for (i = 0; i < loop->num_nodes; i++)
1568     {
1569       bb = body[i];
1570       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1571         continue;
1572
1573       FOR_BB_INSNS (bb, insn)
1574       {
1575         if (!INSN_P (insn))
1576           continue;
1577
1578         if (opt_info->insns_to_split)
1579           ivts = analyze_iv_to_split_insn (insn);
1580
1581         if (ivts)
1582           {
1583             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1584             gcc_assert (*slot1 == NULL);
1585             *slot1 = ivts;
1586             *opt_info->iv_to_split_tail = ivts;
1587             opt_info->iv_to_split_tail = &ivts->next;
1588             continue;
1589           }
1590
1591         if (opt_info->insns_with_var_to_expand)
1592           ves = analyze_insn_to_expand_var (loop, insn);
1593
1594         if (ves)
1595           {
1596             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1597             gcc_assert (*slot2 == NULL);
1598             *slot2 = ves;
1599             *opt_info->var_to_expand_tail = ves;
1600             opt_info->var_to_expand_tail = &ves->next;
1601           }
1602       }
1603     }
1604
1605   edges.release ();
1606   free (body);
1607   return opt_info;
1608 }
1609
1610 /* Called just before loop duplication.  Records start of duplicated area
1611    to OPT_INFO.  */
1612
1613 static void
1614 opt_info_start_duplication (struct opt_info *opt_info)
1615 {
1616   if (opt_info)
1617     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1618 }
1619
1620 /* Determine the number of iterations between initialization of the base
1621    variable and the current copy (N_COPY).  N_COPIES is the total number
1622    of newly created copies.  UNROLLING is true if we are unrolling
1623    (not peeling) the loop.  */
1624
1625 static unsigned
1626 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1627 {
1628   if (unrolling)
1629     {
1630       /* If we are unrolling, initialization is done in the original loop
1631          body (number 0).  */
1632       return n_copy;
1633     }
1634   else
1635     {
1636       /* If we are peeling, the copy in that the initialization occurs has
1637          number 1.  The original loop (number 0) is the last.  */
1638       if (n_copy)
1639         return n_copy - 1;
1640       else
1641         return n_copies;
1642     }
1643 }
1644
1645 /* Allocate basic variable for the induction variable chain.  */
1646
1647 static void
1648 allocate_basic_variable (struct iv_to_split *ivts)
1649 {
1650   rtx expr = SET_SRC (single_set (ivts->insn));
1651
1652   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1653 }
1654
1655 /* Insert initialization of basic variable of IVTS before INSN, taking
1656    the initial value from INSN.  */
1657
1658 static void
1659 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1660 {
1661   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1662   rtx_insn *seq;
1663
1664   start_sequence ();
1665   expr = force_operand (expr, ivts->base_var);
1666   if (expr != ivts->base_var)
1667     emit_move_insn (ivts->base_var, expr);
1668   seq = get_insns ();
1669   end_sequence ();
1670
1671   emit_insn_before (seq, insn);
1672 }
1673
1674 /* Replace the use of induction variable described in IVTS in INSN
1675    by base variable + DELTA * step.  */
1676
1677 static void
1678 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1679 {
1680   rtx expr, *loc, incr, var;
1681   rtx_insn *seq;
1682   machine_mode mode = GET_MODE (ivts->base_var);
1683   rtx src, dest, set;
1684
1685   /* Construct base + DELTA * step.  */
1686   if (!delta)
1687     expr = ivts->base_var;
1688   else
1689     {
1690       incr = simplify_gen_binary (MULT, mode,
1691                                   ivts->step, gen_int_mode (delta, mode));
1692       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1693                                   ivts->base_var, incr);
1694     }
1695
1696   /* Figure out where to do the replacement.  */
1697   loc = &SET_SRC (single_set (insn));
1698
1699   /* If we can make the replacement right away, we're done.  */
1700   if (validate_change (insn, loc, expr, 0))
1701     return;
1702
1703   /* Otherwise, force EXPR into a register and try again.  */
1704   start_sequence ();
1705   var = gen_reg_rtx (mode);
1706   expr = force_operand (expr, var);
1707   if (expr != var)
1708     emit_move_insn (var, expr);
1709   seq = get_insns ();
1710   end_sequence ();
1711   emit_insn_before (seq, insn);
1712
1713   if (validate_change (insn, loc, var, 0))
1714     return;
1715
1716   /* The last chance.  Try recreating the assignment in insn
1717      completely from scratch.  */
1718   set = single_set (insn);
1719   gcc_assert (set);
1720
1721   start_sequence ();
1722   *loc = var;
1723   src = copy_rtx (SET_SRC (set));
1724   dest = copy_rtx (SET_DEST (set));
1725   src = force_operand (src, dest);
1726   if (src != dest)
1727     emit_move_insn (dest, src);
1728   seq = get_insns ();
1729   end_sequence ();
1730
1731   emit_insn_before (seq, insn);
1732   delete_insn (insn);
1733 }
1734
1735
1736 /* Return one expansion of the accumulator recorded in struct VE.  */
1737
1738 static rtx
1739 get_expansion (struct var_to_expand *ve)
1740 {
1741   rtx reg;
1742
1743   if (ve->reuse_expansion == 0)
1744     reg = ve->reg;
1745   else
1746     reg = ve->var_expansions[ve->reuse_expansion - 1];
1747
1748   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1749     ve->reuse_expansion = 0;
1750   else
1751     ve->reuse_expansion++;
1752
1753   return reg;
1754 }
1755
1756
1757 /* Given INSN replace the uses of the accumulator recorded in VE
1758    with a new register.  */
1759
1760 static void
1761 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1762 {
1763   rtx new_reg, set;
1764   bool really_new_expansion = false;
1765
1766   set = single_set (insn);
1767   gcc_assert (set);
1768
1769   /* Generate a new register only if the expansion limit has not been
1770      reached.  Else reuse an already existing expansion.  */
1771   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1772     {
1773       really_new_expansion = true;
1774       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1775     }
1776   else
1777     new_reg = get_expansion (ve);
1778
1779   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1780   if (apply_change_group ())
1781     if (really_new_expansion)
1782       {
1783         ve->var_expansions.safe_push (new_reg);
1784         ve->expansion_count++;
1785       }
1786 }
1787
1788 /* Initialize the variable expansions in loop preheader.  PLACE is the
1789    loop-preheader basic block where the initialization of the
1790    expansions should take place.  The expansions are initialized with
1791    (-0) when the operation is plus or minus to honor sign zero.  This
1792    way we can prevent cases where the sign of the final result is
1793    effected by the sign of the expansion.  Here is an example to
1794    demonstrate this:
1795
1796    for (i = 0 ; i < n; i++)
1797      sum += something;
1798
1799    ==>
1800
1801    sum += something
1802    ....
1803    i = i+1;
1804    sum1 += something
1805    ....
1806    i = i+1
1807    sum2 += something;
1808    ....
1809
1810    When SUM is initialized with -zero and SOMETHING is also -zero; the
1811    final result of sum should be -zero thus the expansions sum1 and sum2
1812    should be initialized with -zero as well (otherwise we will get +zero
1813    as the final result).  */
1814
1815 static void
1816 insert_var_expansion_initialization (struct var_to_expand *ve,
1817                                      basic_block place)
1818 {
1819   rtx_insn *seq;
1820   rtx var, zero_init;
1821   unsigned i;
1822   machine_mode mode = GET_MODE (ve->reg);
1823   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1824
1825   if (ve->var_expansions.length () == 0)
1826     return;
1827
1828   start_sequence ();
1829   switch (ve->op)
1830     {
1831     case FMA:
1832       /* Note that we only accumulate FMA via the ADD operand.  */
1833     case PLUS:
1834     case MINUS:
1835       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1836         {
1837           if (honor_signed_zero_p)
1838             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1839           else
1840             zero_init = CONST0_RTX (mode);
1841           emit_move_insn (var, zero_init);
1842         }
1843       break;
1844
1845     case MULT:
1846       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1847         {
1848           zero_init = CONST1_RTX (GET_MODE (var));
1849           emit_move_insn (var, zero_init);
1850         }
1851       break;
1852
1853     default:
1854       gcc_unreachable ();
1855     }
1856
1857   seq = get_insns ();
1858   end_sequence ();
1859
1860   emit_insn_after (seq, BB_END (place));
1861 }
1862
1863 /* Combine the variable expansions at the loop exit.  PLACE is the
1864    loop exit basic block where the summation of the expansions should
1865    take place.  */
1866
1867 static void
1868 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1869 {
1870   rtx sum = ve->reg;
1871   rtx expr, var;
1872   rtx_insn *seq, *insn;
1873   unsigned i;
1874
1875   if (ve->var_expansions.length () == 0)
1876     return;
1877
1878   start_sequence ();
1879   switch (ve->op)
1880     {
1881     case FMA:
1882       /* Note that we only accumulate FMA via the ADD operand.  */
1883     case PLUS:
1884     case MINUS:
1885       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1886         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1887       break;
1888
1889     case MULT:
1890       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1891         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1892       break;
1893
1894     default:
1895       gcc_unreachable ();
1896     }
1897
1898   expr = force_operand (sum, ve->reg);
1899   if (expr != ve->reg)
1900     emit_move_insn (ve->reg, expr);
1901   seq = get_insns ();
1902   end_sequence ();
1903
1904   insn = BB_HEAD (place);
1905   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1906     insn = NEXT_INSN (insn);
1907
1908   emit_insn_after (seq, insn);
1909 }
1910
1911 /* Strip away REG_EQUAL notes for IVs we're splitting.
1912
1913    Updating REG_EQUAL notes for IVs we split is tricky: We
1914    cannot tell until after unrolling, DF-rescanning, and liveness
1915    updating, whether an EQ_USE is reached by the split IV while
1916    the IV reg is still live.  See PR55006.
1917
1918    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1919    because RTL loop-iv requires us to defer rescanning insns and
1920    any notes attached to them.  So resort to old techniques...  */
1921
1922 static void
1923 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1924 {
1925   struct iv_to_split *ivts;
1926   rtx note = find_reg_equal_equiv_note (insn);
1927   if (! note)
1928     return;
1929   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1930     if (reg_mentioned_p (ivts->orig_var, note))
1931       {
1932         remove_note (insn, note);
1933         return;
1934       }
1935 }
1936
1937 /* Apply loop optimizations in loop copies using the
1938    data which gathered during the unrolling.  Structure
1939    OPT_INFO record that data.
1940
1941    UNROLLING is true if we unrolled (not peeled) the loop.
1942    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1943    the loop (as it should happen in complete unrolling, but not in ordinary
1944    peeling of the loop).  */
1945
1946 static void
1947 apply_opt_in_copies (struct opt_info *opt_info,
1948                      unsigned n_copies, bool unrolling,
1949                      bool rewrite_original_loop)
1950 {
1951   unsigned i, delta;
1952   basic_block bb, orig_bb;
1953   rtx_insn *insn, *orig_insn, *next;
1954   struct iv_to_split ivts_templ, *ivts;
1955   struct var_to_expand ve_templ, *ves;
1956
1957   /* Sanity check -- we need to put initialization in the original loop
1958      body.  */
1959   gcc_assert (!unrolling || rewrite_original_loop);
1960
1961   /* Allocate the basic variables (i0).  */
1962   if (opt_info->insns_to_split)
1963     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1964       allocate_basic_variable (ivts);
1965
1966   for (i = opt_info->first_new_block;
1967        i < (unsigned) last_basic_block_for_fn (cfun);
1968        i++)
1969     {
1970       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1971       orig_bb = get_bb_original (bb);
1972
1973       /* bb->aux holds position in copy sequence initialized by
1974          duplicate_loop_to_header_edge.  */
1975       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1976                                         unrolling);
1977       bb->aux = 0;
1978       orig_insn = BB_HEAD (orig_bb);
1979       FOR_BB_INSNS_SAFE (bb, insn, next)
1980         {
1981           if (!INSN_P (insn)
1982               || (DEBUG_INSN_P (insn)
1983                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1984             continue;
1985
1986           while (!INSN_P (orig_insn)
1987                  || (DEBUG_INSN_P (orig_insn)
1988                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1989                          == LABEL_DECL)))
1990             orig_insn = NEXT_INSN (orig_insn);
1991
1992           ivts_templ.insn = orig_insn;
1993           ve_templ.insn = orig_insn;
1994
1995           /* Apply splitting iv optimization.  */
1996           if (opt_info->insns_to_split)
1997             {
1998               maybe_strip_eq_note_for_split_iv (opt_info, insn);
1999
2000               ivts = opt_info->insns_to_split->find (&ivts_templ);
2001
2002               if (ivts)
2003                 {
2004                   gcc_assert (GET_CODE (PATTERN (insn))
2005                               == GET_CODE (PATTERN (orig_insn)));
2006
2007                   if (!delta)
2008                     insert_base_initialization (ivts, insn);
2009                   split_iv (ivts, insn, delta);
2010                 }
2011             }
2012           /* Apply variable expansion optimization.  */
2013           if (unrolling && opt_info->insns_with_var_to_expand)
2014             {
2015               ves = (struct var_to_expand *)
2016                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2017               if (ves)
2018                 {
2019                   gcc_assert (GET_CODE (PATTERN (insn))
2020                               == GET_CODE (PATTERN (orig_insn)));
2021                   expand_var_during_unrolling (ves, insn);
2022                 }
2023             }
2024           orig_insn = NEXT_INSN (orig_insn);
2025         }
2026     }
2027
2028   if (!rewrite_original_loop)
2029     return;
2030
2031   /* Initialize the variable expansions in the loop preheader
2032      and take care of combining them at the loop exit.  */
2033   if (opt_info->insns_with_var_to_expand)
2034     {
2035       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2036         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2037       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2038         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2039     }
2040
2041   /* Rewrite also the original loop body.  Find them as originals of the blocks
2042      in the last copied iteration, i.e. those that have
2043      get_bb_copy (get_bb_original (bb)) == bb.  */
2044   for (i = opt_info->first_new_block;
2045        i < (unsigned) last_basic_block_for_fn (cfun);
2046        i++)
2047     {
2048       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2049       orig_bb = get_bb_original (bb);
2050       if (get_bb_copy (orig_bb) != bb)
2051         continue;
2052
2053       delta = determine_split_iv_delta (0, n_copies, unrolling);
2054       for (orig_insn = BB_HEAD (orig_bb);
2055            orig_insn != NEXT_INSN (BB_END (bb));
2056            orig_insn = next)
2057         {
2058           next = NEXT_INSN (orig_insn);
2059
2060           if (!INSN_P (orig_insn))
2061             continue;
2062
2063           ivts_templ.insn = orig_insn;
2064           if (opt_info->insns_to_split)
2065             {
2066               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2067
2068               ivts = (struct iv_to_split *)
2069                 opt_info->insns_to_split->find (&ivts_templ);
2070               if (ivts)
2071                 {
2072                   if (!delta)
2073                     insert_base_initialization (ivts, orig_insn);
2074                   split_iv (ivts, orig_insn, delta);
2075                   continue;
2076                 }
2077             }
2078
2079         }
2080     }
2081 }
2082
2083 /* Release OPT_INFO.  */
2084
2085 static void
2086 free_opt_info (struct opt_info *opt_info)
2087 {
2088   delete opt_info->insns_to_split;
2089   opt_info->insns_to_split = NULL;
2090   if (opt_info->insns_with_var_to_expand)
2091     {
2092       struct var_to_expand *ves;
2093
2094       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2095         ves->var_expansions.release ();
2096       delete opt_info->insns_with_var_to_expand;
2097       opt_info->insns_with_var_to_expand = NULL;
2098     }
2099   free (opt_info);
2100 }