gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "alias.h"
  26 #include "symtab.h"
  27 #include "tree.h"
  28 #include "hard-reg-set.h"
  29 #include "obstack.h"
  30 #include "profile.h"
  31 #include "predict.h"
  32 #include "function.h"
  33 #include "dominance.h"
  34 #include "cfg.h"
  35 #include "cfgrtl.h"
  36 #include "basic-block.h"
  37 #include "cfgloop.h"
  38 #include "params.h"
  39 #include "insn-codes.h"
  40 #include "optabs.h"
  41 #include "flags.h"
  42 #include "insn-config.h"
  43 #include "expmed.h"
  44 #include "dojump.h"
  45 #include "explow.h"
  46 #include "calls.h"
  47 #include "emit-rtl.h"
  48 #include "varasm.h"
  49 #include "stmt.h"
  50 #include "expr.h"
  51 #include "recog.h"
  52 #include "target.h"
  53 #include "dumpfile.h"
  54
  55 /* This pass performs loop unrolling.  We only perform this
  56    optimization on innermost loops (with single exception) because
  57    the impact on performance is greatest here, and we want to avoid
  58    unnecessary code size growth.  The gain is caused by greater sequentiality
  59    of code, better code to optimize for further passes and in some cases
  60    by fewer testings of exit conditions.  The main problem is code growth,
  61    that impacts performance negatively due to effect of caches.
  62
  63    What we do:
  64
  65    -- unrolling of loops that roll constant times; this is almost always
  66       win, as we get rid of exit condition tests.
  67    -- unrolling of loops that roll number of times that we can compute
  68       in runtime; we also get rid of exit condition tests here, but there
  69       is the extra expense for calculating the number of iterations
  70    -- simple unrolling of remaining loops; this is performed only if we
  71       are asked to, as the gain is questionable in this case and often
  72       it may even slow down the code
  73    For more detailed descriptions of each of those, see comments at
  74    appropriate function below.
  75
  76    There is a lot of parameters (defined and described in params.def) that
  77    control how much we unroll.
  78
  79    ??? A great problem is that we don't have a good way how to determine
  80    how many times we should unroll the loop; the experiments I have made
  81    showed that this choice may affect performance in order of several %.
  82    */
  83
  84 /* Information about induction variables to split.  */
  85
  86 struct iv_to_split
  87 {
  88   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  89   rtx orig_var;         /* The variable (register) for the IV before split.  */
  90   rtx base_var;         /* The variable on that the values in the further
  91                            iterations are based.  */
  92   rtx step;             /* Step of the induction variable.  */
  93   struct iv_to_split *next; /* Next entry in walking order.  */
  94 };
  95
  96 /* Information about accumulators to expand.  */
  97
  98 struct var_to_expand
  99 {
 100   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
 101   rtx reg;                         /* The accumulator which is expanded.  */
 102   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
 103   struct var_to_expand *next;      /* Next entry in walking order.  */
 104   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
 105                                       or multiplication.  */
 106   int expansion_count;             /* Count the number of expansions generated so far.  */
 107   int reuse_expansion;             /* The expansion we intend to reuse to expand
 108                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 109                                       the original accumulator.  Else use
 110                                       var_expansions[REUSE_EXPANSION - 1].  */
 111 };
 112
 113 /* Hashtable helper for iv_to_split.  */
 114
 115 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 116 {
 117   static inline hashval_t hash (const iv_to_split *);
 118   static inline bool equal (const iv_to_split *, const iv_to_split *);
 119 };
 120
 121
 122 /* A hash function for information about insns to split.  */
 123
 124 inline hashval_t
 125 iv_split_hasher::hash (const iv_to_split *ivts)
 126 {
 127   return (hashval_t) INSN_UID (ivts->insn);
 128 }
 129
 130 /* An equality functions for information about insns to split.  */
 131
 132 inline bool
 133 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 134 {
 135   return i1->insn == i2->insn;
 136 }
 137
 138 /* Hashtable helper for iv_to_split.  */
 139
 140 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 141 {
 142   static inline hashval_t hash (const var_to_expand *);
 143   static inline bool equal (const var_to_expand *, const var_to_expand *);
 144 };
 145
 146 /* Return a hash for VES.  */
 147
 148 inline hashval_t
 149 var_expand_hasher::hash (const var_to_expand *ves)
 150 {
 151   return (hashval_t) INSN_UID (ves->insn);
 152 }
 153
 154 /* Return true if I1 and I2 refer to the same instruction.  */
 155
 156 inline bool
 157 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 158 {
 159   return i1->insn == i2->insn;
 160 }
 161
 162 /* Information about optimization applied in
 163    the unrolled loop.  */
 164
 165 struct opt_info
 166 {
 167   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 168                                                   split.  */
 169   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 170   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 171   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 172                                         insns with accumulators to expand.  */
 173   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 174   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 175   unsigned first_new_block;        /* The first basic block that was
 176                                       duplicated.  */
 177   basic_block loop_exit;           /* The loop exit basic block.  */
 178   basic_block loop_preheader;      /* The loop preheader basic block.  */
 179 };
 180
 181 static void decide_unroll_stupid (struct loop *, int);
 182 static void decide_unroll_constant_iterations (struct loop *, int);
 183 static void decide_unroll_runtime_iterations (struct loop *, int);
 184 static void unroll_loop_stupid (struct loop *);
 185 static void decide_unrolling (int);
 186 static void unroll_loop_constant_iterations (struct loop *);
 187 static void unroll_loop_runtime_iterations (struct loop *);
 188 static struct opt_info *analyze_insns_in_loop (struct loop *);
 189 static void opt_info_start_duplication (struct opt_info *);
 190 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 191 static void free_opt_info (struct opt_info *);
 192 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 193 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 194 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 195 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 196 static void insert_var_expansion_initialization (struct var_to_expand *,
 197                                                  basic_block);
 198 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 199                                              basic_block);
 200 static rtx get_expansion (struct var_to_expand *);
 201
 202 /* Emit a message summarizing the unroll that will be
 203    performed for LOOP, along with the loop's location LOCUS, if
 204    appropriate given the dump or -fopt-info settings.  */
 205
 206 static void
 207 report_unroll (struct loop *loop, location_t locus)
 208 {
 209   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 210
 211   if (loop->lpt_decision.decision == LPT_NONE)
 212     return;
 213
 214   if (!dump_enabled_p ())
 215     return;
 216
 217   dump_printf_loc (report_flags, locus,
 218                    "loop unrolled %d times",
 219                    loop->lpt_decision.times);
 220   if (profile_info)
 221     dump_printf (report_flags,
 222                  " (header execution count %d)",
 223                  (int)loop->header->count);
 224
 225   dump_printf (report_flags, "\n");
 226 }
 227
 228 /* Decide whether unroll loops and how much.  */
 229 static void
 230 decide_unrolling (int flags)
 231 {
 232   struct loop *loop;
 233
 234   /* Scan the loops, inner ones first.  */
 235   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 236     {
 237       loop->lpt_decision.decision = LPT_NONE;
 238       location_t locus = get_loop_location (loop);
 239
 240       if (dump_enabled_p ())
 241         dump_printf_loc (TDF_RTL, locus,
 242                          ";; *** Considering loop %d at BB %d for "
 243                          "unrolling ***\n",
 244                          loop->num, loop->header->index);
 245
 246       /* Do not peel cold areas.  */
 247       if (optimize_loop_for_size_p (loop))
 248         {
 249           if (dump_file)
 250             fprintf (dump_file, ";; Not considering loop, cold area\n");
 251           continue;
 252         }
 253
 254       /* Can the loop be manipulated?  */
 255       if (!can_duplicate_loop_p (loop))
 256         {
 257           if (dump_file)
 258             fprintf (dump_file,
 259                      ";; Not considering loop, cannot duplicate\n");
 260           continue;
 261         }
 262
 263       /* Skip non-innermost loops.  */
 264       if (loop->inner)
 265         {
 266           if (dump_file)
 267             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 268           continue;
 269         }
 270
 271       loop->ninsns = num_loop_insns (loop);
 272       loop->av_ninsns = average_num_loop_insns (loop);
 273
 274       /* Try transformations one by one in decreasing order of
 275          priority.  */
 276
 277       decide_unroll_constant_iterations (loop, flags);
 278       if (loop->lpt_decision.decision == LPT_NONE)
 279         decide_unroll_runtime_iterations (loop, flags);
 280       if (loop->lpt_decision.decision == LPT_NONE)
 281         decide_unroll_stupid (loop, flags);
 282
 283       report_unroll (loop, locus);
 284     }
 285 }
 286
 287 /* Unroll LOOPS.  */
 288 void
 289 unroll_loops (int flags)
 290 {
 291   struct loop *loop;
 292   bool changed = false;
 293
 294   /* Now decide rest of unrolling.  */
 295   decide_unrolling (flags);
 296
 297   /* Scan the loops, inner ones first.  */
 298   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 299     {
 300       /* And perform the appropriate transformations.  */
 301       switch (loop->lpt_decision.decision)
 302         {
 303         case LPT_UNROLL_CONSTANT:
 304           unroll_loop_constant_iterations (loop);
 305           changed = true;
 306           break;
 307         case LPT_UNROLL_RUNTIME:
 308           unroll_loop_runtime_iterations (loop);
 309           changed = true;
 310           break;
 311         case LPT_UNROLL_STUPID:
 312           unroll_loop_stupid (loop);
 313           changed = true;
 314           break;
 315         case LPT_NONE:
 316           break;
 317         default:
 318           gcc_unreachable ();
 319         }
 320     }
 321
 322     if (changed)
 323       {
 324         calculate_dominance_info (CDI_DOMINATORS);
 325         fix_loop_structure (NULL);
 326       }
 327
 328   iv_analysis_done ();
 329 }
 330
 331 /* Check whether exit of the LOOP is at the end of loop body.  */
 332
 333 static bool
 334 loop_exit_at_end_p (struct loop *loop)
 335 {
 336   struct niter_desc *desc = get_simple_loop_desc (loop);
 337   rtx_insn *insn;
 338
 339   /* We should never have conditional in latch block.  */
 340   gcc_assert (desc->in_edge->dest != loop->header);
 341
 342   if (desc->in_edge->dest != loop->latch)
 343     return false;
 344
 345   /* Check that the latch is empty.  */
 346   FOR_BB_INSNS (loop->latch, insn)
 347     {
 348       if (INSN_P (insn) && active_insn_p (insn))
 349         return false;
 350     }
 351
 352   return true;
 353 }
 354
 355 /* Decide whether to unroll LOOP iterating constant number of times
 356    and how much.  */
 357
 358 static void
 359 decide_unroll_constant_iterations (struct loop *loop, int flags)
 360 {
 361   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 362   struct niter_desc *desc;
 363   widest_int iterations;
 364
 365   if (!(flags & UAP_UNROLL))
 366     {
 367       /* We were not asked to, just return back silently.  */
 368       return;
 369     }
 370
 371   if (dump_file)
 372     fprintf (dump_file,
 373              "\n;; Considering unrolling loop with constant "
 374              "number of iterations\n");
 375
 376   /* nunroll = total number of copies of the original loop body in
 377      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 378   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 379   nunroll_by_av
 380     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 381   if (nunroll > nunroll_by_av)
 382     nunroll = nunroll_by_av;
 383   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 384     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 385
 386   if (targetm.loop_unroll_adjust)
 387     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 388
 389   /* Skip big loops.  */
 390   if (nunroll <= 1)
 391     {
 392       if (dump_file)
 393         fprintf (dump_file, ";; Not considering loop, is too big\n");
 394       return;
 395     }
 396
 397   /* Check for simple loops.  */
 398   desc = get_simple_loop_desc (loop);
 399
 400   /* Check number of iterations.  */
 401   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 402     {
 403       if (dump_file)
 404         fprintf (dump_file,
 405                  ";; Unable to prove that the loop iterates constant times\n");
 406       return;
 407     }
 408
 409   /* Check whether the loop rolls enough to consider.
 410      Consult also loop bounds and profile; in the case the loop has more
 411      than one exit it may well loop less than determined maximal number
 412      of iterations.  */
 413   if (desc->niter < 2 * nunroll
 414       || ((get_estimated_loop_iterations (loop, &iterations)
 415            || get_max_loop_iterations (loop, &iterations))
 416           && wi::ltu_p (iterations, 2 * nunroll)))
 417     {
 418       if (dump_file)
 419         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 420       return;
 421     }
 422
 423   /* Success; now compute number of iterations to unroll.  We alter
 424      nunroll so that as few as possible copies of loop body are
 425      necessary, while still not decreasing the number of unrollings
 426      too much (at most by 1).  */
 427   best_copies = 2 * nunroll + 10;
 428
 429   i = 2 * nunroll + 2;
 430   if (i - 1 >= desc->niter)
 431     i = desc->niter - 2;
 432
 433   for (; i >= nunroll - 1; i--)
 434     {
 435       unsigned exit_mod = desc->niter % (i + 1);
 436
 437       if (!loop_exit_at_end_p (loop))
 438         n_copies = exit_mod + i + 1;
 439       else if (exit_mod != (unsigned) i
 440                || desc->noloop_assumptions != NULL_RTX)
 441         n_copies = exit_mod + i + 2;
 442       else
 443         n_copies = i + 1;
 444
 445       if (n_copies < best_copies)
 446         {
 447           best_copies = n_copies;
 448           best_unroll = i;
 449         }
 450     }
 451
 452   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 453   loop->lpt_decision.times = best_unroll;
 454 }
 455
 456 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 457    The transformation does this:
 458
 459    for (i = 0; i < 102; i++)
 460      body;
 461
 462    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 463
 464    i = 0;
 465    body; i++;
 466    body; i++;
 467    while (i < 102)
 468      {
 469        body; i++;
 470        body; i++;
 471        body; i++;
 472        body; i++;
 473      }
 474   */
 475 static void
 476 unroll_loop_constant_iterations (struct loop *loop)
 477 {
 478   unsigned HOST_WIDE_INT niter;
 479   unsigned exit_mod;
 480   sbitmap wont_exit;
 481   unsigned i;
 482   edge e;
 483   unsigned max_unroll = loop->lpt_decision.times;
 484   struct niter_desc *desc = get_simple_loop_desc (loop);
 485   bool exit_at_end = loop_exit_at_end_p (loop);
 486   struct opt_info *opt_info = NULL;
 487   bool ok;
 488
 489   niter = desc->niter;
 490
 491   /* Should not get here (such loop should be peeled instead).  */
 492   gcc_assert (niter > max_unroll + 1);
 493
 494   exit_mod = niter % (max_unroll + 1);
 495
 496   wont_exit = sbitmap_alloc (max_unroll + 1);
 497   bitmap_ones (wont_exit);
 498
 499   auto_vec<edge> remove_edges;
 500   if (flag_split_ivs_in_unroller
 501       || flag_variable_expansion_in_unroller)
 502     opt_info = analyze_insns_in_loop (loop);
 503
 504   if (!exit_at_end)
 505     {
 506       /* The exit is not at the end of the loop; leave exit test
 507          in the first copy, so that the loops that start with test
 508          of exit condition have continuous body after unrolling.  */
 509
 510       if (dump_file)
 511         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 512
 513       /* Peel exit_mod iterations.  */
 514       bitmap_clear_bit (wont_exit, 0);
 515       if (desc->noloop_assumptions)
 516         bitmap_clear_bit (wont_exit, 1);
 517
 518       if (exit_mod)
 519         {
 520           opt_info_start_duplication (opt_info);
 521           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 522                                               exit_mod,
 523                                               wont_exit, desc->out_edge,
 524                                               &remove_edges,
 525                                               DLTHE_FLAG_UPDATE_FREQ
 526                                               | (opt_info && exit_mod > 1
 527                                                  ? DLTHE_RECORD_COPY_NUMBER
 528                                                    : 0));
 529           gcc_assert (ok);
 530
 531           if (opt_info && exit_mod > 1)
 532             apply_opt_in_copies (opt_info, exit_mod, false, false);
 533
 534           desc->noloop_assumptions = NULL_RTX;
 535           desc->niter -= exit_mod;
 536           loop->nb_iterations_upper_bound -= exit_mod;
 537           if (loop->any_estimate
 538               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 539             loop->nb_iterations_estimate -= exit_mod;
 540           else
 541             loop->any_estimate = false;
 542         }
 543
 544       bitmap_set_bit (wont_exit, 1);
 545     }
 546   else
 547     {
 548       /* Leave exit test in last copy, for the same reason as above if
 549          the loop tests the condition at the end of loop body.  */
 550
 551       if (dump_file)
 552         fprintf (dump_file, ";; Condition at end of loop.\n");
 553
 554       /* We know that niter >= max_unroll + 2; so we do not need to care of
 555          case when we would exit before reaching the loop.  So just peel
 556          exit_mod + 1 iterations.  */
 557       if (exit_mod != max_unroll
 558           || desc->noloop_assumptions)
 559         {
 560           bitmap_clear_bit (wont_exit, 0);
 561           if (desc->noloop_assumptions)
 562             bitmap_clear_bit (wont_exit, 1);
 563
 564           opt_info_start_duplication (opt_info);
 565           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 566                                               exit_mod + 1,
 567                                               wont_exit, desc->out_edge,
 568                                               &remove_edges,
 569                                               DLTHE_FLAG_UPDATE_FREQ
 570                                               | (opt_info && exit_mod > 0
 571                                                  ? DLTHE_RECORD_COPY_NUMBER
 572                                                    : 0));
 573           gcc_assert (ok);
 574
 575           if (opt_info && exit_mod > 0)
 576             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 577
 578           desc->niter -= exit_mod + 1;
 579           loop->nb_iterations_upper_bound -= exit_mod + 1;
 580           if (loop->any_estimate
 581               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 582             loop->nb_iterations_estimate -= exit_mod + 1;
 583           else
 584             loop->any_estimate = false;
 585           desc->noloop_assumptions = NULL_RTX;
 586
 587           bitmap_set_bit (wont_exit, 0);
 588           bitmap_set_bit (wont_exit, 1);
 589         }
 590
 591       bitmap_clear_bit (wont_exit, max_unroll);
 592     }
 593
 594   /* Now unroll the loop.  */
 595
 596   opt_info_start_duplication (opt_info);
 597   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 598                                       max_unroll,
 599                                       wont_exit, desc->out_edge,
 600                                       &remove_edges,
 601                                       DLTHE_FLAG_UPDATE_FREQ
 602                                       | (opt_info
 603                                          ? DLTHE_RECORD_COPY_NUMBER
 604                                            : 0));
 605   gcc_assert (ok);
 606
 607   if (opt_info)
 608     {
 609       apply_opt_in_copies (opt_info, max_unroll, true, true);
 610       free_opt_info (opt_info);
 611     }
 612
 613   free (wont_exit);
 614
 615   if (exit_at_end)
 616     {
 617       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 618       /* Find a new in and out edge; they are in the last copy we have made.  */
 619
 620       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 621         {
 622           desc->out_edge = EDGE_SUCC (exit_block, 0);
 623           desc->in_edge = EDGE_SUCC (exit_block, 1);
 624         }
 625       else
 626         {
 627           desc->out_edge = EDGE_SUCC (exit_block, 1);
 628           desc->in_edge = EDGE_SUCC (exit_block, 0);
 629         }
 630     }
 631
 632   desc->niter /= max_unroll + 1;
 633   loop->nb_iterations_upper_bound
 634     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 635   if (loop->any_estimate)
 636     loop->nb_iterations_estimate
 637       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 638   desc->niter_expr = GEN_INT (desc->niter);
 639
 640   /* Remove the edges.  */
 641   FOR_EACH_VEC_ELT (remove_edges, i, e)
 642     remove_path (e);
 643
 644   if (dump_file)
 645     fprintf (dump_file,
 646              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 647              max_unroll, num_loop_insns (loop));
 648 }
 649
 650 /* Decide whether to unroll LOOP iterating runtime computable number of times
 651    and how much.  */
 652 static void
 653 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 654 {
 655   unsigned nunroll, nunroll_by_av, i;
 656   struct niter_desc *desc;
 657   widest_int iterations;
 658
 659   if (!(flags & UAP_UNROLL))
 660     {
 661       /* We were not asked to, just return back silently.  */
 662       return;
 663     }
 664
 665   if (dump_file)
 666     fprintf (dump_file,
 667              "\n;; Considering unrolling loop with runtime "
 668              "computable number of iterations\n");
 669
 670   /* nunroll = total number of copies of the original loop body in
 671      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 672   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 673   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 674   if (nunroll > nunroll_by_av)
 675     nunroll = nunroll_by_av;
 676   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 677     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 678
 679   if (targetm.loop_unroll_adjust)
 680     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 681
 682   /* Skip big loops.  */
 683   if (nunroll <= 1)
 684     {
 685       if (dump_file)
 686         fprintf (dump_file, ";; Not considering loop, is too big\n");
 687       return;
 688     }
 689
 690   /* Check for simple loops.  */
 691   desc = get_simple_loop_desc (loop);
 692
 693   /* Check simpleness.  */
 694   if (!desc->simple_p || desc->assumptions)
 695     {
 696       if (dump_file)
 697         fprintf (dump_file,
 698                  ";; Unable to prove that the number of iterations "
 699                  "can be counted in runtime\n");
 700       return;
 701     }
 702
 703   if (desc->const_iter)
 704     {
 705       if (dump_file)
 706         fprintf (dump_file, ";; Loop iterates constant times\n");
 707       return;
 708     }
 709
 710   /* Check whether the loop rolls.  */
 711   if ((get_estimated_loop_iterations (loop, &iterations)
 712        || get_max_loop_iterations (loop, &iterations))
 713       && wi::ltu_p (iterations, 2 * nunroll))
 714     {
 715       if (dump_file)
 716         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 717       return;
 718     }
 719
 720   /* Success; now force nunroll to be power of 2, as we are unable to
 721      cope with overflows in computation of number of iterations.  */
 722   for (i = 1; 2 * i <= nunroll; i *= 2)
 723     continue;
 724
 725   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 726   loop->lpt_decision.times = i - 1;
 727 }
 728
 729 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 730    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 731    and NULL is returned instead.  */
 732
 733 basic_block
 734 split_edge_and_insert (edge e, rtx_insn *insns)
 735 {
 736   basic_block bb;
 737
 738   if (!insns)
 739     return NULL;
 740   bb = split_edge (e);
 741   emit_insn_after (insns, BB_END (bb));
 742
 743   /* ??? We used to assume that INSNS can contain control flow insns, and
 744      that we had to try to find sub basic blocks in BB to maintain a valid
 745      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 746      and call break_superblocks when going out of cfglayout mode.  But it
 747      turns out that this never happens; and that if it does ever happen,
 748      the verify_flow_info at the end of the RTL loop passes would fail.
 749
 750      There are two reasons why we expected we could have control flow insns
 751      in INSNS.  The first is when a comparison has to be done in parts, and
 752      the second is when the number of iterations is computed for loops with
 753      the number of iterations known at runtime.  In both cases, test cases
 754      to get control flow in INSNS appear to be impossible to construct:
 755
 756       * If do_compare_rtx_and_jump needs several branches to do comparison
 757         in a mode that needs comparison by parts, we cannot analyze the
 758         number of iterations of the loop, and we never get to unrolling it.
 759
 760       * The code in expand_divmod that was suspected to cause creation of
 761         branching code seems to be only accessed for signed division.  The
 762         divisions used by # of iterations analysis are always unsigned.
 763         Problems might arise on architectures that emits branching code
 764         for some operations that may appear in the unroller (especially
 765         for division), but we have no such architectures.
 766
 767      Considering all this, it was decided that we should for now assume
 768      that INSNS can in theory contain control flow insns, but in practice
 769      it never does.  So we don't handle the theoretical case, and should
 770      a real failure ever show up, we have a pretty good clue for how to
 771      fix it.  */
 772
 773   return bb;
 774 }
 775
 776 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 777    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 778    in order to create a jump.  */
 779
 780 static rtx_insn *
 781 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 782                       rtx_code_label *label, int prob, rtx_insn *cinsn)
 783 {
 784   rtx_insn *seq;
 785   rtx_jump_insn *jump;
 786   rtx cond;
 787   machine_mode mode;
 788
 789   mode = GET_MODE (op0);
 790   if (mode == VOIDmode)
 791     mode = GET_MODE (op1);
 792
 793   start_sequence ();
 794   if (GET_MODE_CLASS (mode) == MODE_CC)
 795     {
 796       /* A hack -- there seems to be no easy generic way how to make a
 797          conditional jump from a ccmode comparison.  */
 798       gcc_assert (cinsn);
 799       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 800       gcc_assert (GET_CODE (cond) == comp);
 801       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 802       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 803       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 804       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 805       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 806       LABEL_NUSES (JUMP_LABEL (jump))++;
 807       redirect_jump (jump, label, 0);
 808     }
 809   else
 810     {
 811       gcc_assert (!cinsn);
 812
 813       op0 = force_operand (op0, NULL_RTX);
 814       op1 = force_operand (op1, NULL_RTX);
 815       do_compare_rtx_and_jump (op0, op1, comp, 0,
 816                                mode, NULL_RTX, NULL, label, -1);
 817       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 818       jump->set_jump_target (label);
 819       LABEL_NUSES (label)++;
 820     }
 821   add_int_reg_note (jump, REG_BR_PROB, prob);
 822
 823   seq = get_insns ();
 824   end_sequence ();
 825
 826   return seq;
 827 }
 828
 829 /* Unroll LOOP for which we are able to count number of iterations in runtime
 830    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 831    extra care for case n < 0):
 832
 833    for (i = 0; i < n; i++)
 834      body;
 835
 836    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 837
 838    i = 0;
 839    mod = n % 4;
 840
 841    switch (mod)
 842      {
 843        case 3:
 844          body; i++;
 845        case 2:
 846          body; i++;
 847        case 1:
 848          body; i++;
 849        case 0: ;
 850      }
 851
 852    while (i < n)
 853      {
 854        body; i++;
 855        body; i++;
 856        body; i++;
 857        body; i++;
 858      }
 859    */
 860 static void
 861 unroll_loop_runtime_iterations (struct loop *loop)
 862 {
 863   rtx old_niter, niter, tmp;
 864   rtx_insn *init_code, *branch_code;
 865   unsigned i, j, p;
 866   basic_block preheader, *body, swtch, ezc_swtch;
 867   sbitmap wont_exit;
 868   int may_exit_copy;
 869   unsigned n_peel;
 870   edge e;
 871   bool extra_zero_check, last_may_exit;
 872   unsigned max_unroll = loop->lpt_decision.times;
 873   struct niter_desc *desc = get_simple_loop_desc (loop);
 874   bool exit_at_end = loop_exit_at_end_p (loop);
 875   struct opt_info *opt_info = NULL;
 876   bool ok;
 877
 878   if (flag_split_ivs_in_unroller
 879       || flag_variable_expansion_in_unroller)
 880     opt_info = analyze_insns_in_loop (loop);
 881
 882   /* Remember blocks whose dominators will have to be updated.  */
 883   auto_vec<basic_block> dom_bbs;
 884
 885   body = get_loop_body (loop);
 886   for (i = 0; i < loop->num_nodes; i++)
 887     {
 888       vec<basic_block> ldom;
 889       basic_block bb;
 890
 891       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 892       FOR_EACH_VEC_ELT (ldom, j, bb)
 893         if (!flow_bb_inside_loop_p (loop, bb))
 894           dom_bbs.safe_push (bb);
 895
 896       ldom.release ();
 897     }
 898   free (body);
 899
 900   if (!exit_at_end)
 901     {
 902       /* Leave exit in first copy (for explanation why see comment in
 903          unroll_loop_constant_iterations).  */
 904       may_exit_copy = 0;
 905       n_peel = max_unroll - 1;
 906       extra_zero_check = true;
 907       last_may_exit = false;
 908     }
 909   else
 910     {
 911       /* Leave exit in last copy (for explanation why see comment in
 912          unroll_loop_constant_iterations).  */
 913       may_exit_copy = max_unroll;
 914       n_peel = max_unroll;
 915       extra_zero_check = false;
 916       last_may_exit = true;
 917     }
 918
 919   /* Get expression for number of iterations.  */
 920   start_sequence ();
 921   old_niter = niter = gen_reg_rtx (desc->mode);
 922   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 923   if (tmp != niter)
 924     emit_move_insn (niter, tmp);
 925
 926   /* Count modulo by ANDing it with max_unroll; we use the fact that
 927      the number of unrollings is a power of two, and thus this is correct
 928      even if there is overflow in the computation.  */
 929   niter = expand_simple_binop (desc->mode, AND,
 930                                niter, gen_int_mode (max_unroll, desc->mode),
 931                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 932
 933   init_code = get_insns ();
 934   end_sequence ();
 935   unshare_all_rtl_in_chain (init_code);
 936
 937   /* Precondition the loop.  */
 938   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 939
 940   auto_vec<edge> remove_edges;
 941
 942   wont_exit = sbitmap_alloc (max_unroll + 2);
 943
 944   /* Peel the first copy of loop body (almost always we must leave exit test
 945      here; the only exception is when we have extra zero check and the number
 946      of iterations is reliable.  Also record the place of (possible) extra
 947      zero check.  */
 948   bitmap_clear (wont_exit);
 949   if (extra_zero_check
 950       && !desc->noloop_assumptions)
 951     bitmap_set_bit (wont_exit, 1);
 952   ezc_swtch = loop_preheader_edge (loop)->src;
 953   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 954                                       1, wont_exit, desc->out_edge,
 955                                       &remove_edges,
 956                                       DLTHE_FLAG_UPDATE_FREQ);
 957   gcc_assert (ok);
 958
 959   /* Record the place where switch will be built for preconditioning.  */
 960   swtch = split_edge (loop_preheader_edge (loop));
 961
 962   for (i = 0; i < n_peel; i++)
 963     {
 964       /* Peel the copy.  */
 965       bitmap_clear (wont_exit);
 966       if (i != n_peel - 1 || !last_may_exit)
 967         bitmap_set_bit (wont_exit, 1);
 968       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 969                                           1, wont_exit, desc->out_edge,
 970                                           &remove_edges,
 971                                           DLTHE_FLAG_UPDATE_FREQ);
 972       gcc_assert (ok);
 973
 974       /* Create item for switch.  */
 975       j = n_peel - i - (extra_zero_check ? 0 : 1);
 976       p = REG_BR_PROB_BASE / (i + 2);
 977
 978       preheader = split_edge (loop_preheader_edge (loop));
 979       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 980                                           block_label (preheader), p,
 981                                           NULL);
 982
 983       /* We rely on the fact that the compare and jump cannot be optimized out,
 984          and hence the cfg we create is correct.  */
 985       gcc_assert (branch_code != NULL_RTX);
 986
 987       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 988       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 989       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 990       e = make_edge (swtch, preheader,
 991                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 992       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 993       e->probability = p;
 994     }
 995
 996   if (extra_zero_check)
 997     {
 998       /* Add branch for zero iterations.  */
 999       p = REG_BR_PROB_BASE / (max_unroll + 1);
1000       swtch = ezc_swtch;
1001       preheader = split_edge (loop_preheader_edge (loop));
1002       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1003                                           block_label (preheader), p,
1004                                           NULL);
1005       gcc_assert (branch_code != NULL_RTX);
1006
1007       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1008       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1009       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1010       e = make_edge (swtch, preheader,
1011                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1012       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1013       e->probability = p;
1014     }
1015
1016   /* Recount dominators for outer blocks.  */
1017   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1018
1019   /* And unroll loop.  */
1020
1021   bitmap_ones (wont_exit);
1022   bitmap_clear_bit (wont_exit, may_exit_copy);
1023   opt_info_start_duplication (opt_info);
1024
1025   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1026                                       max_unroll,
1027                                       wont_exit, desc->out_edge,
1028                                       &remove_edges,
1029                                       DLTHE_FLAG_UPDATE_FREQ
1030                                       | (opt_info
1031                                          ? DLTHE_RECORD_COPY_NUMBER
1032                                            : 0));
1033   gcc_assert (ok);
1034
1035   if (opt_info)
1036     {
1037       apply_opt_in_copies (opt_info, max_unroll, true, true);
1038       free_opt_info (opt_info);
1039     }
1040
1041   free (wont_exit);
1042
1043   if (exit_at_end)
1044     {
1045       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1046       /* Find a new in and out edge; they are in the last copy we have
1047          made.  */
1048
1049       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1050         {
1051           desc->out_edge = EDGE_SUCC (exit_block, 0);
1052           desc->in_edge = EDGE_SUCC (exit_block, 1);
1053         }
1054       else
1055         {
1056           desc->out_edge = EDGE_SUCC (exit_block, 1);
1057           desc->in_edge = EDGE_SUCC (exit_block, 0);
1058         }
1059     }
1060
1061   /* Remove the edges.  */
1062   FOR_EACH_VEC_ELT (remove_edges, i, e)
1063     remove_path (e);
1064
1065   /* We must be careful when updating the number of iterations due to
1066      preconditioning and the fact that the value must be valid at entry
1067      of the loop.  After passing through the above code, we see that
1068      the correct new number of iterations is this:  */
1069   gcc_assert (!desc->const_iter);
1070   desc->niter_expr =
1071     simplify_gen_binary (UDIV, desc->mode, old_niter,
1072                          gen_int_mode (max_unroll + 1, desc->mode));
1073   loop->nb_iterations_upper_bound
1074     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1075   if (loop->any_estimate)
1076     loop->nb_iterations_estimate
1077       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1078   if (exit_at_end)
1079     {
1080       desc->niter_expr =
1081         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1082       desc->noloop_assumptions = NULL_RTX;
1083       --loop->nb_iterations_upper_bound;
1084       if (loop->any_estimate
1085           && loop->nb_iterations_estimate != 0)
1086         --loop->nb_iterations_estimate;
1087       else
1088         loop->any_estimate = false;
1089     }
1090
1091   if (dump_file)
1092     fprintf (dump_file,
1093              ";; Unrolled loop %d times, counting # of iterations "
1094              "in runtime, %i insns\n",
1095              max_unroll, num_loop_insns (loop));
1096 }
1097
1098 /* Decide whether to unroll LOOP stupidly and how much.  */
1099 static void
1100 decide_unroll_stupid (struct loop *loop, int flags)
1101 {
1102   unsigned nunroll, nunroll_by_av, i;
1103   struct niter_desc *desc;
1104   widest_int iterations;
1105
1106   if (!(flags & UAP_UNROLL_ALL))
1107     {
1108       /* We were not asked to, just return back silently.  */
1109       return;
1110     }
1111
1112   if (dump_file)
1113     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1114
1115   /* nunroll = total number of copies of the original loop body in
1116      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1117   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1118   nunroll_by_av
1119     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1120   if (nunroll > nunroll_by_av)
1121     nunroll = nunroll_by_av;
1122   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1123     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1124
1125   if (targetm.loop_unroll_adjust)
1126     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1127
1128   /* Skip big loops.  */
1129   if (nunroll <= 1)
1130     {
1131       if (dump_file)
1132         fprintf (dump_file, ";; Not considering loop, is too big\n");
1133       return;
1134     }
1135
1136   /* Check for simple loops.  */
1137   desc = get_simple_loop_desc (loop);
1138
1139   /* Check simpleness.  */
1140   if (desc->simple_p && !desc->assumptions)
1141     {
1142       if (dump_file)
1143         fprintf (dump_file, ";; The loop is simple\n");
1144       return;
1145     }
1146
1147   /* Do not unroll loops with branches inside -- it increases number
1148      of mispredicts.
1149      TODO: this heuristic needs tunning; call inside the loop body
1150      is also relatively good reason to not unroll.  */
1151   if (num_loop_branches (loop) > 1)
1152     {
1153       if (dump_file)
1154         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1155       return;
1156     }
1157
1158   /* Check whether the loop rolls.  */
1159   if ((get_estimated_loop_iterations (loop, &iterations)
1160        || get_max_loop_iterations (loop, &iterations))
1161       && wi::ltu_p (iterations, 2 * nunroll))
1162     {
1163       if (dump_file)
1164         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1165       return;
1166     }
1167
1168   /* Success.  Now force nunroll to be power of 2, as it seems that this
1169      improves results (partially because of better alignments, partially
1170      because of some dark magic).  */
1171   for (i = 1; 2 * i <= nunroll; i *= 2)
1172     continue;
1173
1174   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1175   loop->lpt_decision.times = i - 1;
1176 }
1177
1178 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1179
1180    while (cond)
1181      body;
1182
1183    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1184
1185    while (cond)
1186      {
1187        body;
1188        if (!cond) break;
1189        body;
1190        if (!cond) break;
1191        body;
1192        if (!cond) break;
1193        body;
1194      }
1195    */
1196 static void
1197 unroll_loop_stupid (struct loop *loop)
1198 {
1199   sbitmap wont_exit;
1200   unsigned nunroll = loop->lpt_decision.times;
1201   struct niter_desc *desc = get_simple_loop_desc (loop);
1202   struct opt_info *opt_info = NULL;
1203   bool ok;
1204
1205   if (flag_split_ivs_in_unroller
1206       || flag_variable_expansion_in_unroller)
1207     opt_info = analyze_insns_in_loop (loop);
1208
1209
1210   wont_exit = sbitmap_alloc (nunroll + 1);
1211   bitmap_clear (wont_exit);
1212   opt_info_start_duplication (opt_info);
1213
1214   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1215                                       nunroll, wont_exit,
1216                                       NULL, NULL,
1217                                       DLTHE_FLAG_UPDATE_FREQ
1218                                       | (opt_info
1219                                          ? DLTHE_RECORD_COPY_NUMBER
1220                                            : 0));
1221   gcc_assert (ok);
1222
1223   if (opt_info)
1224     {
1225       apply_opt_in_copies (opt_info, nunroll, true, true);
1226       free_opt_info (opt_info);
1227     }
1228
1229   free (wont_exit);
1230
1231   if (desc->simple_p)
1232     {
1233       /* We indeed may get here provided that there are nontrivial assumptions
1234          for a loop to be really simple.  We could update the counts, but the
1235          problem is that we are unable to decide which exit will be taken
1236          (not really true in case the number of iterations is constant,
1237          but no one will do anything with this information, so we do not
1238          worry about it).  */
1239       desc->simple_p = false;
1240     }
1241
1242   if (dump_file)
1243     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1244              nunroll, num_loop_insns (loop));
1245 }
1246
1247 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1248    Set *DEBUG_USES to the number of debug insns that reference the
1249    variable.  */
1250
1251 static bool
1252 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1253                                   int *debug_uses)
1254 {
1255   basic_block *body, bb;
1256   unsigned i;
1257   int count_ref = 0;
1258   rtx_insn *insn;
1259
1260   body = get_loop_body (loop);
1261   for (i = 0; i < loop->num_nodes; i++)
1262     {
1263       bb = body[i];
1264
1265       FOR_BB_INSNS (bb, insn)
1266         if (!rtx_referenced_p (reg, insn))
1267           continue;
1268         else if (DEBUG_INSN_P (insn))
1269           ++*debug_uses;
1270         else if (++count_ref > 1)
1271           break;
1272     }
1273   free (body);
1274   return (count_ref  == 1);
1275 }
1276
1277 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1278
1279 static void
1280 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1281 {
1282   basic_block *body, bb;
1283   unsigned i;
1284   rtx_insn *insn;
1285
1286   body = get_loop_body (loop);
1287   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1288     {
1289       bb = body[i];
1290
1291       FOR_BB_INSNS (bb, insn)
1292         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1293           continue;
1294         else
1295           {
1296             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1297                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1298             if (!--debug_uses)
1299               break;
1300           }
1301     }
1302   free (body);
1303 }
1304
1305 /* Determine whether INSN contains an accumulator
1306    which can be expanded into separate copies,
1307    one for each copy of the LOOP body.
1308
1309    for (i = 0 ; i < n; i++)
1310      sum += a[i];
1311
1312    ==>
1313
1314    sum += a[i]
1315    ....
1316    i = i+1;
1317    sum1 += a[i]
1318    ....
1319    i = i+1
1320    sum2 += a[i];
1321    ....
1322
1323    Return NULL if INSN contains no opportunity for expansion of accumulator.
1324    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1325    information and return a pointer to it.
1326 */
1327
1328 static struct var_to_expand *
1329 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1330 {
1331   rtx set, dest, src;
1332   struct var_to_expand *ves;
1333   unsigned accum_pos;
1334   enum rtx_code code;
1335   int debug_uses = 0;
1336
1337   set = single_set (insn);
1338   if (!set)
1339     return NULL;
1340
1341   dest = SET_DEST (set);
1342   src = SET_SRC (set);
1343   code = GET_CODE (src);
1344
1345   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1346     return NULL;
1347
1348   if (FLOAT_MODE_P (GET_MODE (dest)))
1349     {
1350       if (!flag_associative_math)
1351         return NULL;
1352       /* In the case of FMA, we're also changing the rounding.  */
1353       if (code == FMA && !flag_unsafe_math_optimizations)
1354         return NULL;
1355     }
1356
1357   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1358      in MD.  But if there is no optab to generate the insn, we can not
1359      perform the variable expansion.  This can happen if an MD provides
1360      an insn but not a named pattern to generate it, for example to avoid
1361      producing code that needs additional mode switches like for x87/mmx.
1362
1363      So we check have_insn_for which looks for an optab for the operation
1364      in SRC.  If it doesn't exist, we can't perform the expansion even
1365      though INSN is valid.  */
1366   if (!have_insn_for (code, GET_MODE (src)))
1367     return NULL;
1368
1369   if (!REG_P (dest)
1370       && !(GET_CODE (dest) == SUBREG
1371            && REG_P (SUBREG_REG (dest))))
1372     return NULL;
1373
1374   /* Find the accumulator use within the operation.  */
1375   if (code == FMA)
1376     {
1377       /* We only support accumulation via FMA in the ADD position.  */
1378       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1379         return NULL;
1380       accum_pos = 2;
1381     }
1382   else if (rtx_equal_p (dest, XEXP (src, 0)))
1383     accum_pos = 0;
1384   else if (rtx_equal_p (dest, XEXP (src, 1)))
1385     {
1386       /* The method of expansion that we are using; which includes the
1387          initialization of the expansions with zero and the summation of
1388          the expansions at the end of the computation will yield wrong
1389          results for (x = something - x) thus avoid using it in that case.  */
1390       if (code == MINUS)
1391         return NULL;
1392       accum_pos = 1;
1393     }
1394   else
1395     return NULL;
1396
1397   /* It must not otherwise be used.  */
1398   if (code == FMA)
1399     {
1400       if (rtx_referenced_p (dest, XEXP (src, 0))
1401           || rtx_referenced_p (dest, XEXP (src, 1)))
1402         return NULL;
1403     }
1404   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1405     return NULL;
1406
1407   /* It must be used in exactly one insn.  */
1408   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1409     return NULL;
1410
1411   if (dump_file)
1412     {
1413       fprintf (dump_file, "\n;; Expanding Accumulator ");
1414       print_rtl (dump_file, dest);
1415       fprintf (dump_file, "\n");
1416     }
1417
1418   if (debug_uses)
1419     /* Instead of resetting the debug insns, we could replace each
1420        debug use in the loop with the sum or product of all expanded
1421        accummulators.  Since we'll only know of all expansions at the
1422        end, we'd have to keep track of which vars_to_expand a debug
1423        insn in the loop references, take note of each copy of the
1424        debug insn during unrolling, and when it's all done, compute
1425        the sum or product of each variable and adjust the original
1426        debug insn and each copy thereof.  What a pain!  */
1427     reset_debug_uses_in_loop (loop, dest, debug_uses);
1428
1429   /* Record the accumulator to expand.  */
1430   ves = XNEW (struct var_to_expand);
1431   ves->insn = insn;
1432   ves->reg = copy_rtx (dest);
1433   ves->var_expansions.create (1);
1434   ves->next = NULL;
1435   ves->op = GET_CODE (src);
1436   ves->expansion_count = 0;
1437   ves->reuse_expansion = 0;
1438   return ves;
1439 }
1440
1441 /* Determine whether there is an induction variable in INSN that
1442    we would like to split during unrolling.
1443
1444    I.e. replace
1445
1446    i = i + 1;
1447    ...
1448    i = i + 1;
1449    ...
1450    i = i + 1;
1451    ...
1452
1453    type chains by
1454
1455    i0 = i + 1
1456    ...
1457    i = i0 + 1
1458    ...
1459    i = i0 + 2
1460    ...
1461
1462    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1463    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1464    pointer to it.  */
1465
1466 static struct iv_to_split *
1467 analyze_iv_to_split_insn (rtx_insn *insn)
1468 {
1469   rtx set, dest;
1470   struct rtx_iv iv;
1471   struct iv_to_split *ivts;
1472   bool ok;
1473
1474   /* For now we just split the basic induction variables.  Later this may be
1475      extended for example by selecting also addresses of memory references.  */
1476   set = single_set (insn);
1477   if (!set)
1478     return NULL;
1479
1480   dest = SET_DEST (set);
1481   if (!REG_P (dest))
1482     return NULL;
1483
1484   if (!biv_p (insn, dest))
1485     return NULL;
1486
1487   ok = iv_analyze_result (insn, dest, &iv);
1488
1489   /* This used to be an assert under the assumption that if biv_p returns
1490      true that iv_analyze_result must also return true.  However, that
1491      assumption is not strictly correct as evidenced by pr25569.
1492
1493      Returning NULL when iv_analyze_result returns false is safe and
1494      avoids the problems in pr25569 until the iv_analyze_* routines
1495      can be fixed, which is apparently hard and time consuming
1496      according to their author.  */
1497   if (! ok)
1498     return NULL;
1499
1500   if (iv.step == const0_rtx
1501       || iv.mode != iv.extend_mode)
1502     return NULL;
1503
1504   /* Record the insn to split.  */
1505   ivts = XNEW (struct iv_to_split);
1506   ivts->insn = insn;
1507   ivts->orig_var = dest;
1508   ivts->base_var = NULL_RTX;
1509   ivts->step = iv.step;
1510   ivts->next = NULL;
1511
1512   return ivts;
1513 }
1514
1515 /* Determines which of insns in LOOP can be optimized.
1516    Return a OPT_INFO struct with the relevant hash tables filled
1517    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1518    is undefined for the return value.  */
1519
1520 static struct opt_info *
1521 analyze_insns_in_loop (struct loop *loop)
1522 {
1523   basic_block *body, bb;
1524   unsigned i;
1525   struct opt_info *opt_info = XCNEW (struct opt_info);
1526   rtx_insn *insn;
1527   struct iv_to_split *ivts = NULL;
1528   struct var_to_expand *ves = NULL;
1529   iv_to_split **slot1;
1530   var_to_expand **slot2;
1531   vec<edge> edges = get_loop_exit_edges (loop);
1532   edge exit;
1533   bool can_apply = false;
1534
1535   iv_analysis_loop_init (loop);
1536
1537   body = get_loop_body (loop);
1538
1539   if (flag_split_ivs_in_unroller)
1540     {
1541       opt_info->insns_to_split
1542         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1543       opt_info->iv_to_split_head = NULL;
1544       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1545     }
1546
1547   /* Record the loop exit bb and loop preheader before the unrolling.  */
1548   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1549
1550   if (edges.length () == 1)
1551     {
1552       exit = edges[0];
1553       if (!(exit->flags & EDGE_COMPLEX))
1554         {
1555           opt_info->loop_exit = split_edge (exit);
1556           can_apply = true;
1557         }
1558     }
1559
1560   if (flag_variable_expansion_in_unroller
1561       && can_apply)
1562     {
1563       opt_info->insns_with_var_to_expand
1564         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1565       opt_info->var_to_expand_head = NULL;
1566       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1567     }
1568
1569   for (i = 0; i < loop->num_nodes; i++)
1570     {
1571       bb = body[i];
1572       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1573         continue;
1574
1575       FOR_BB_INSNS (bb, insn)
1576       {
1577         if (!INSN_P (insn))
1578           continue;
1579
1580         if (opt_info->insns_to_split)
1581           ivts = analyze_iv_to_split_insn (insn);
1582
1583         if (ivts)
1584           {
1585             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1586             gcc_assert (*slot1 == NULL);
1587             *slot1 = ivts;
1588             *opt_info->iv_to_split_tail = ivts;
1589             opt_info->iv_to_split_tail = &ivts->next;
1590             continue;
1591           }
1592
1593         if (opt_info->insns_with_var_to_expand)
1594           ves = analyze_insn_to_expand_var (loop, insn);
1595
1596         if (ves)
1597           {
1598             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1599             gcc_assert (*slot2 == NULL);
1600             *slot2 = ves;
1601             *opt_info->var_to_expand_tail = ves;
1602             opt_info->var_to_expand_tail = &ves->next;
1603           }
1604       }
1605     }
1606
1607   edges.release ();
1608   free (body);
1609   return opt_info;
1610 }
1611
1612 /* Called just before loop duplication.  Records start of duplicated area
1613    to OPT_INFO.  */
1614
1615 static void
1616 opt_info_start_duplication (struct opt_info *opt_info)
1617 {
1618   if (opt_info)
1619     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1620 }
1621
1622 /* Determine the number of iterations between initialization of the base
1623    variable and the current copy (N_COPY).  N_COPIES is the total number
1624    of newly created copies.  UNROLLING is true if we are unrolling
1625    (not peeling) the loop.  */
1626
1627 static unsigned
1628 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1629 {
1630   if (unrolling)
1631     {
1632       /* If we are unrolling, initialization is done in the original loop
1633          body (number 0).  */
1634       return n_copy;
1635     }
1636   else
1637     {
1638       /* If we are peeling, the copy in that the initialization occurs has
1639          number 1.  The original loop (number 0) is the last.  */
1640       if (n_copy)
1641         return n_copy - 1;
1642       else
1643         return n_copies;
1644     }
1645 }
1646
1647 /* Allocate basic variable for the induction variable chain.  */
1648
1649 static void
1650 allocate_basic_variable (struct iv_to_split *ivts)
1651 {
1652   rtx expr = SET_SRC (single_set (ivts->insn));
1653
1654   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1655 }
1656
1657 /* Insert initialization of basic variable of IVTS before INSN, taking
1658    the initial value from INSN.  */
1659
1660 static void
1661 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1662 {
1663   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1664   rtx_insn *seq;
1665
1666   start_sequence ();
1667   expr = force_operand (expr, ivts->base_var);
1668   if (expr != ivts->base_var)
1669     emit_move_insn (ivts->base_var, expr);
1670   seq = get_insns ();
1671   end_sequence ();
1672
1673   emit_insn_before (seq, insn);
1674 }
1675
1676 /* Replace the use of induction variable described in IVTS in INSN
1677    by base variable + DELTA * step.  */
1678
1679 static void
1680 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1681 {
1682   rtx expr, *loc, incr, var;
1683   rtx_insn *seq;
1684   machine_mode mode = GET_MODE (ivts->base_var);
1685   rtx src, dest, set;
1686
1687   /* Construct base + DELTA * step.  */
1688   if (!delta)
1689     expr = ivts->base_var;
1690   else
1691     {
1692       incr = simplify_gen_binary (MULT, mode,
1693                                   ivts->step, gen_int_mode (delta, mode));
1694       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1695                                   ivts->base_var, incr);
1696     }
1697
1698   /* Figure out where to do the replacement.  */
1699   loc = &SET_SRC (single_set (insn));
1700
1701   /* If we can make the replacement right away, we're done.  */
1702   if (validate_change (insn, loc, expr, 0))
1703     return;
1704
1705   /* Otherwise, force EXPR into a register and try again.  */
1706   start_sequence ();
1707   var = gen_reg_rtx (mode);
1708   expr = force_operand (expr, var);
1709   if (expr != var)
1710     emit_move_insn (var, expr);
1711   seq = get_insns ();
1712   end_sequence ();
1713   emit_insn_before (seq, insn);
1714
1715   if (validate_change (insn, loc, var, 0))
1716     return;
1717
1718   /* The last chance.  Try recreating the assignment in insn
1719      completely from scratch.  */
1720   set = single_set (insn);
1721   gcc_assert (set);
1722
1723   start_sequence ();
1724   *loc = var;
1725   src = copy_rtx (SET_SRC (set));
1726   dest = copy_rtx (SET_DEST (set));
1727   src = force_operand (src, dest);
1728   if (src != dest)
1729     emit_move_insn (dest, src);
1730   seq = get_insns ();
1731   end_sequence ();
1732
1733   emit_insn_before (seq, insn);
1734   delete_insn (insn);
1735 }
1736
1737
1738 /* Return one expansion of the accumulator recorded in struct VE.  */
1739
1740 static rtx
1741 get_expansion (struct var_to_expand *ve)
1742 {
1743   rtx reg;
1744
1745   if (ve->reuse_expansion == 0)
1746     reg = ve->reg;
1747   else
1748     reg = ve->var_expansions[ve->reuse_expansion - 1];
1749
1750   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1751     ve->reuse_expansion = 0;
1752   else
1753     ve->reuse_expansion++;
1754
1755   return reg;
1756 }
1757
1758
1759 /* Given INSN replace the uses of the accumulator recorded in VE
1760    with a new register.  */
1761
1762 static void
1763 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1764 {
1765   rtx new_reg, set;
1766   bool really_new_expansion = false;
1767
1768   set = single_set (insn);
1769   gcc_assert (set);
1770
1771   /* Generate a new register only if the expansion limit has not been
1772      reached.  Else reuse an already existing expansion.  */
1773   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1774     {
1775       really_new_expansion = true;
1776       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1777     }
1778   else
1779     new_reg = get_expansion (ve);
1780
1781   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1782   if (apply_change_group ())
1783     if (really_new_expansion)
1784       {
1785         ve->var_expansions.safe_push (new_reg);
1786         ve->expansion_count++;
1787       }
1788 }
1789
1790 /* Initialize the variable expansions in loop preheader.  PLACE is the
1791    loop-preheader basic block where the initialization of the
1792    expansions should take place.  The expansions are initialized with
1793    (-0) when the operation is plus or minus to honor sign zero.  This
1794    way we can prevent cases where the sign of the final result is
1795    effected by the sign of the expansion.  Here is an example to
1796    demonstrate this:
1797
1798    for (i = 0 ; i < n; i++)
1799      sum += something;
1800
1801    ==>
1802
1803    sum += something
1804    ....
1805    i = i+1;
1806    sum1 += something
1807    ....
1808    i = i+1
1809    sum2 += something;
1810    ....
1811
1812    When SUM is initialized with -zero and SOMETHING is also -zero; the
1813    final result of sum should be -zero thus the expansions sum1 and sum2
1814    should be initialized with -zero as well (otherwise we will get +zero
1815    as the final result).  */
1816
1817 static void
1818 insert_var_expansion_initialization (struct var_to_expand *ve,
1819                                      basic_block place)
1820 {
1821   rtx_insn *seq;
1822   rtx var, zero_init;
1823   unsigned i;
1824   machine_mode mode = GET_MODE (ve->reg);
1825   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1826
1827   if (ve->var_expansions.length () == 0)
1828     return;
1829
1830   start_sequence ();
1831   switch (ve->op)
1832     {
1833     case FMA:
1834       /* Note that we only accumulate FMA via the ADD operand.  */
1835     case PLUS:
1836     case MINUS:
1837       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1838         {
1839           if (honor_signed_zero_p)
1840             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1841           else
1842             zero_init = CONST0_RTX (mode);
1843           emit_move_insn (var, zero_init);
1844         }
1845       break;
1846
1847     case MULT:
1848       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1849         {
1850           zero_init = CONST1_RTX (GET_MODE (var));
1851           emit_move_insn (var, zero_init);
1852         }
1853       break;
1854
1855     default:
1856       gcc_unreachable ();
1857     }
1858
1859   seq = get_insns ();
1860   end_sequence ();
1861
1862   emit_insn_after (seq, BB_END (place));
1863 }
1864
1865 /* Combine the variable expansions at the loop exit.  PLACE is the
1866    loop exit basic block where the summation of the expansions should
1867    take place.  */
1868
1869 static void
1870 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1871 {
1872   rtx sum = ve->reg;
1873   rtx expr, var;
1874   rtx_insn *seq, *insn;
1875   unsigned i;
1876
1877   if (ve->var_expansions.length () == 0)
1878     return;
1879
1880   start_sequence ();
1881   switch (ve->op)
1882     {
1883     case FMA:
1884       /* Note that we only accumulate FMA via the ADD operand.  */
1885     case PLUS:
1886     case MINUS:
1887       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1888         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1889       break;
1890
1891     case MULT:
1892       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1893         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1894       break;
1895
1896     default:
1897       gcc_unreachable ();
1898     }
1899
1900   expr = force_operand (sum, ve->reg);
1901   if (expr != ve->reg)
1902     emit_move_insn (ve->reg, expr);
1903   seq = get_insns ();
1904   end_sequence ();
1905
1906   insn = BB_HEAD (place);
1907   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1908     insn = NEXT_INSN (insn);
1909
1910   emit_insn_after (seq, insn);
1911 }
1912
1913 /* Strip away REG_EQUAL notes for IVs we're splitting.
1914
1915    Updating REG_EQUAL notes for IVs we split is tricky: We
1916    cannot tell until after unrolling, DF-rescanning, and liveness
1917    updating, whether an EQ_USE is reached by the split IV while
1918    the IV reg is still live.  See PR55006.
1919
1920    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1921    because RTL loop-iv requires us to defer rescanning insns and
1922    any notes attached to them.  So resort to old techniques...  */
1923
1924 static void
1925 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1926 {
1927   struct iv_to_split *ivts;
1928   rtx note = find_reg_equal_equiv_note (insn);
1929   if (! note)
1930     return;
1931   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1932     if (reg_mentioned_p (ivts->orig_var, note))
1933       {
1934         remove_note (insn, note);
1935         return;
1936       }
1937 }
1938
1939 /* Apply loop optimizations in loop copies using the
1940    data which gathered during the unrolling.  Structure
1941    OPT_INFO record that data.
1942
1943    UNROLLING is true if we unrolled (not peeled) the loop.
1944    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1945    the loop (as it should happen in complete unrolling, but not in ordinary
1946    peeling of the loop).  */
1947
1948 static void
1949 apply_opt_in_copies (struct opt_info *opt_info,
1950                      unsigned n_copies, bool unrolling,
1951                      bool rewrite_original_loop)
1952 {
1953   unsigned i, delta;
1954   basic_block bb, orig_bb;
1955   rtx_insn *insn, *orig_insn, *next;
1956   struct iv_to_split ivts_templ, *ivts;
1957   struct var_to_expand ve_templ, *ves;
1958
1959   /* Sanity check -- we need to put initialization in the original loop
1960      body.  */
1961   gcc_assert (!unrolling || rewrite_original_loop);
1962
1963   /* Allocate the basic variables (i0).  */
1964   if (opt_info->insns_to_split)
1965     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1966       allocate_basic_variable (ivts);
1967
1968   for (i = opt_info->first_new_block;
1969        i < (unsigned) last_basic_block_for_fn (cfun);
1970        i++)
1971     {
1972       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1973       orig_bb = get_bb_original (bb);
1974
1975       /* bb->aux holds position in copy sequence initialized by
1976          duplicate_loop_to_header_edge.  */
1977       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1978                                         unrolling);
1979       bb->aux = 0;
1980       orig_insn = BB_HEAD (orig_bb);
1981       FOR_BB_INSNS_SAFE (bb, insn, next)
1982         {
1983           if (!INSN_P (insn)
1984               || (DEBUG_INSN_P (insn)
1985                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1986             continue;
1987
1988           while (!INSN_P (orig_insn)
1989                  || (DEBUG_INSN_P (orig_insn)
1990                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1991                          == LABEL_DECL)))
1992             orig_insn = NEXT_INSN (orig_insn);
1993
1994           ivts_templ.insn = orig_insn;
1995           ve_templ.insn = orig_insn;
1996
1997           /* Apply splitting iv optimization.  */
1998           if (opt_info->insns_to_split)
1999             {
2000               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2001
2002               ivts = opt_info->insns_to_split->find (&ivts_templ);
2003
2004               if (ivts)
2005                 {
2006                   gcc_assert (GET_CODE (PATTERN (insn))
2007                               == GET_CODE (PATTERN (orig_insn)));
2008
2009                   if (!delta)
2010                     insert_base_initialization (ivts, insn);
2011                   split_iv (ivts, insn, delta);
2012                 }
2013             }
2014           /* Apply variable expansion optimization.  */
2015           if (unrolling && opt_info->insns_with_var_to_expand)
2016             {
2017               ves = (struct var_to_expand *)
2018                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2019               if (ves)
2020                 {
2021                   gcc_assert (GET_CODE (PATTERN (insn))
2022                               == GET_CODE (PATTERN (orig_insn)));
2023                   expand_var_during_unrolling (ves, insn);
2024                 }
2025             }
2026           orig_insn = NEXT_INSN (orig_insn);
2027         }
2028     }
2029
2030   if (!rewrite_original_loop)
2031     return;
2032
2033   /* Initialize the variable expansions in the loop preheader
2034      and take care of combining them at the loop exit.  */
2035   if (opt_info->insns_with_var_to_expand)
2036     {
2037       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2038         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2039       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2040         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2041     }
2042
2043   /* Rewrite also the original loop body.  Find them as originals of the blocks
2044      in the last copied iteration, i.e. those that have
2045      get_bb_copy (get_bb_original (bb)) == bb.  */
2046   for (i = opt_info->first_new_block;
2047        i < (unsigned) last_basic_block_for_fn (cfun);
2048        i++)
2049     {
2050       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2051       orig_bb = get_bb_original (bb);
2052       if (get_bb_copy (orig_bb) != bb)
2053         continue;
2054
2055       delta = determine_split_iv_delta (0, n_copies, unrolling);
2056       for (orig_insn = BB_HEAD (orig_bb);
2057            orig_insn != NEXT_INSN (BB_END (bb));
2058            orig_insn = next)
2059         {
2060           next = NEXT_INSN (orig_insn);
2061
2062           if (!INSN_P (orig_insn))
2063             continue;
2064
2065           ivts_templ.insn = orig_insn;
2066           if (opt_info->insns_to_split)
2067             {
2068               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2069
2070               ivts = (struct iv_to_split *)
2071                 opt_info->insns_to_split->find (&ivts_templ);
2072               if (ivts)
2073                 {
2074                   if (!delta)
2075                     insert_base_initialization (ivts, orig_insn);
2076                   split_iv (ivts, orig_insn, delta);
2077                   continue;
2078                 }
2079             }
2080
2081         }
2082     }
2083 }
2084
2085 /* Release OPT_INFO.  */
2086
2087 static void
2088 free_opt_info (struct opt_info *opt_info)
2089 {
2090   delete opt_info->insns_to_split;
2091   opt_info->insns_to_split = NULL;
2092   if (opt_info->insns_with_var_to_expand)
2093     {
2094       struct var_to_expand *ves;
2095
2096       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2097         ves->var_expansions.release ();
2098       delete opt_info->insns_with_var_to_expand;
2099       opt_info->insns_with_var_to_expand = NULL;
2100     }
2101   free (opt_info);
2102 }