gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "memmodel.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "profile.h"
  33 #include "cfgrtl.h"
  34 #include "cfgloop.h"
  35 #include "dojump.h"
  36 #include "expr.h"
  37 #include "dumpfile.h"
  38
  39 /* This pass performs loop unrolling.  We only perform this
  40    optimization on innermost loops (with single exception) because
  41    the impact on performance is greatest here, and we want to avoid
  42    unnecessary code size growth.  The gain is caused by greater sequentiality
  43    of code, better code to optimize for further passes and in some cases
  44    by fewer testings of exit conditions.  The main problem is code growth,
  45    that impacts performance negatively due to effect of caches.
  46
  47    What we do:
  48
  49    -- unrolling of loops that roll constant times; this is almost always
  50       win, as we get rid of exit condition tests.
  51    -- unrolling of loops that roll number of times that we can compute
  52       in runtime; we also get rid of exit condition tests here, but there
  53       is the extra expense for calculating the number of iterations
  54    -- simple unrolling of remaining loops; this is performed only if we
  55       are asked to, as the gain is questionable in this case and often
  56       it may even slow down the code
  57    For more detailed descriptions of each of those, see comments at
  58    appropriate function below.
  59
  60    There is a lot of parameters (defined and described in params.def) that
  61    control how much we unroll.
  62
  63    ??? A great problem is that we don't have a good way how to determine
  64    how many times we should unroll the loop; the experiments I have made
  65    showed that this choice may affect performance in order of several %.
  66    */
  67
  68 /* Information about induction variables to split.  */
  69
  70 struct iv_to_split
  71 {
  72   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  73   rtx orig_var;         /* The variable (register) for the IV before split.  */
  74   rtx base_var;         /* The variable on that the values in the further
  75                            iterations are based.  */
  76   rtx step;             /* Step of the induction variable.  */
  77   struct iv_to_split *next; /* Next entry in walking order.  */
  78 };
  79
  80 /* Information about accumulators to expand.  */
  81
  82 struct var_to_expand
  83 {
  84   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  85   rtx reg;                         /* The accumulator which is expanded.  */
  86   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  87   struct var_to_expand *next;      /* Next entry in walking order.  */
  88   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  89                                       or multiplication.  */
  90   int expansion_count;             /* Count the number of expansions generated so far.  */
  91   int reuse_expansion;             /* The expansion we intend to reuse to expand
  92                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  93                                       the original accumulator.  Else use
  94                                       var_expansions[REUSE_EXPANSION - 1].  */
  95 };
  96
  97 /* Hashtable helper for iv_to_split.  */
  98
  99 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 100 {
 101   static inline hashval_t hash (const iv_to_split *);
 102   static inline bool equal (const iv_to_split *, const iv_to_split *);
 103 };
 104
 105
 106 /* A hash function for information about insns to split.  */
 107
 108 inline hashval_t
 109 iv_split_hasher::hash (const iv_to_split *ivts)
 110 {
 111   return (hashval_t) INSN_UID (ivts->insn);
 112 }
 113
 114 /* An equality functions for information about insns to split.  */
 115
 116 inline bool
 117 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 118 {
 119   return i1->insn == i2->insn;
 120 }
 121
 122 /* Hashtable helper for iv_to_split.  */
 123
 124 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 125 {
 126   static inline hashval_t hash (const var_to_expand *);
 127   static inline bool equal (const var_to_expand *, const var_to_expand *);
 128 };
 129
 130 /* Return a hash for VES.  */
 131
 132 inline hashval_t
 133 var_expand_hasher::hash (const var_to_expand *ves)
 134 {
 135   return (hashval_t) INSN_UID (ves->insn);
 136 }
 137
 138 /* Return true if I1 and I2 refer to the same instruction.  */
 139
 140 inline bool
 141 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 142 {
 143   return i1->insn == i2->insn;
 144 }
 145
 146 /* Information about optimization applied in
 147    the unrolled loop.  */
 148
 149 struct opt_info
 150 {
 151   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 152                                                   split.  */
 153   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 154   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 155   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 156                                         insns with accumulators to expand.  */
 157   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 158   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 159   unsigned first_new_block;        /* The first basic block that was
 160                                       duplicated.  */
 161   basic_block loop_exit;           /* The loop exit basic block.  */
 162   basic_block loop_preheader;      /* The loop preheader basic block.  */
 163 };
 164
 165 static void decide_unroll_stupid (class loop *, int);
 166 static void decide_unroll_constant_iterations (class loop *, int);
 167 static void decide_unroll_runtime_iterations (class loop *, int);
 168 static void unroll_loop_stupid (class loop *);
 169 static void decide_unrolling (int);
 170 static void unroll_loop_constant_iterations (class loop *);
 171 static void unroll_loop_runtime_iterations (class loop *);
 172 static struct opt_info *analyze_insns_in_loop (class loop *);
 173 static void opt_info_start_duplication (struct opt_info *);
 174 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 175 static void free_opt_info (struct opt_info *);
 176 static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
 177 static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
 178 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 179 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 180 static void insert_var_expansion_initialization (struct var_to_expand *,
 181                                                  basic_block);
 182 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 183                                              basic_block);
 184 static rtx get_expansion (struct var_to_expand *);
 185
 186 /* Emit a message summarizing the unroll that will be
 187    performed for LOOP, along with the loop's location LOCUS, if
 188    appropriate given the dump or -fopt-info settings.  */
 189
 190 static void
 191 report_unroll (class loop *loop, dump_location_t locus)
 192 {
 193   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 194
 195   if (loop->lpt_decision.decision == LPT_NONE)
 196     return;
 197
 198   if (!dump_enabled_p ())
 199     return;
 200
 201   dump_metadata_t metadata (report_flags, locus.get_impl_location ());
 202   dump_printf_loc (metadata, locus.get_user_location (),
 203                    "loop unrolled %d times",
 204                    loop->lpt_decision.times);
 205   if (profile_info && loop->header->count.initialized_p ())
 206     dump_printf (metadata,
 207                  " (header execution count %d)",
 208                  (int)loop->header->count.to_gcov_type ());
 209
 210   dump_printf (metadata, "\n");
 211 }
 212
 213 /* Decide whether unroll loops and how much.  */
 214 static void
 215 decide_unrolling (int flags)
 216 {
 217   class loop *loop;
 218
 219   /* Scan the loops, inner ones first.  */
 220   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 221     {
 222       loop->lpt_decision.decision = LPT_NONE;
 223       dump_user_location_t locus = get_loop_location (loop);
 224
 225       if (dump_enabled_p ())
 226         dump_printf_loc (MSG_NOTE, locus,
 227                          "considering unrolling loop %d at BB %d\n",
 228                          loop->num, loop->header->index);
 229
 230       if (loop->unroll == 1)
 231         {
 232           if (dump_file)
 233             fprintf (dump_file,
 234                      ";; Not unrolling loop, user didn't want it unrolled\n");
 235           continue;
 236         }
 237
 238       /* Do not peel cold areas.  */
 239       if (optimize_loop_for_size_p (loop))
 240         {
 241           if (dump_file)
 242             fprintf (dump_file, ";; Not considering loop, cold area\n");
 243           continue;
 244         }
 245
 246       /* Can the loop be manipulated?  */
 247       if (!can_duplicate_loop_p (loop))
 248         {
 249           if (dump_file)
 250             fprintf (dump_file,
 251                      ";; Not considering loop, cannot duplicate\n");
 252           continue;
 253         }
 254
 255       /* Skip non-innermost loops.  */
 256       if (loop->inner)
 257         {
 258           if (dump_file)
 259             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 260           continue;
 261         }
 262
 263       loop->ninsns = num_loop_insns (loop);
 264       loop->av_ninsns = average_num_loop_insns (loop);
 265
 266       /* Try transformations one by one in decreasing order of priority.  */
 267       decide_unroll_constant_iterations (loop, flags);
 268       if (loop->lpt_decision.decision == LPT_NONE)
 269         decide_unroll_runtime_iterations (loop, flags);
 270       if (loop->lpt_decision.decision == LPT_NONE)
 271         decide_unroll_stupid (loop, flags);
 272
 273       report_unroll (loop, locus);
 274     }
 275 }
 276
 277 /* Unroll LOOPS.  */
 278 void
 279 unroll_loops (int flags)
 280 {
 281   class loop *loop;
 282   bool changed = false;
 283
 284   /* Now decide rest of unrolling.  */
 285   decide_unrolling (flags);
 286
 287   /* Scan the loops, inner ones first.  */
 288   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 289     {
 290       /* And perform the appropriate transformations.  */
 291       switch (loop->lpt_decision.decision)
 292         {
 293         case LPT_UNROLL_CONSTANT:
 294           unroll_loop_constant_iterations (loop);
 295           changed = true;
 296           break;
 297         case LPT_UNROLL_RUNTIME:
 298           unroll_loop_runtime_iterations (loop);
 299           changed = true;
 300           break;
 301         case LPT_UNROLL_STUPID:
 302           unroll_loop_stupid (loop);
 303           changed = true;
 304           break;
 305         case LPT_NONE:
 306           break;
 307         default:
 308           gcc_unreachable ();
 309         }
 310     }
 311
 312     if (changed)
 313       {
 314         calculate_dominance_info (CDI_DOMINATORS);
 315         fix_loop_structure (NULL);
 316       }
 317
 318   iv_analysis_done ();
 319 }
 320
 321 /* Check whether exit of the LOOP is at the end of loop body.  */
 322
 323 static bool
 324 loop_exit_at_end_p (class loop *loop)
 325 {
 326   class niter_desc *desc = get_simple_loop_desc (loop);
 327   rtx_insn *insn;
 328
 329   /* We should never have conditional in latch block.  */
 330   gcc_assert (desc->in_edge->dest != loop->header);
 331
 332   if (desc->in_edge->dest != loop->latch)
 333     return false;
 334
 335   /* Check that the latch is empty.  */
 336   FOR_BB_INSNS (loop->latch, insn)
 337     {
 338       if (INSN_P (insn) && active_insn_p (insn))
 339         return false;
 340     }
 341
 342   return true;
 343 }
 344
 345 /* Decide whether to unroll LOOP iterating constant number of times
 346    and how much.  */
 347
 348 static void
 349 decide_unroll_constant_iterations (class loop *loop, int flags)
 350 {
 351   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 352   class niter_desc *desc;
 353   widest_int iterations;
 354
 355   /* If we were not asked to unroll this loop, just return back silently.  */
 356   if (!(flags & UAP_UNROLL) && !loop->unroll)
 357     return;
 358
 359   if (dump_enabled_p ())
 360     dump_printf (MSG_NOTE,
 361                  "considering unrolling loop with constant "
 362                  "number of iterations\n");
 363
 364   /* nunroll = total number of copies of the original loop body in
 365      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
 366   nunroll = param_max_unrolled_insns / loop->ninsns;
 367   nunroll_by_av
 368     = param_max_average_unrolled_insns / loop->av_ninsns;
 369   if (nunroll > nunroll_by_av)
 370     nunroll = nunroll_by_av;
 371   if (nunroll > (unsigned) param_max_unroll_times)
 372     nunroll = param_max_unroll_times;
 373
 374   if (targetm.loop_unroll_adjust)
 375     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 376
 377   /* Skip big loops.  */
 378   if (nunroll <= 1)
 379     {
 380       if (dump_file)
 381         fprintf (dump_file, ";; Not considering loop, is too big\n");
 382       return;
 383     }
 384
 385   /* Check for simple loops.  */
 386   desc = get_simple_loop_desc (loop);
 387
 388   /* Check number of iterations.  */
 389   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 390     {
 391       if (dump_file)
 392         fprintf (dump_file,
 393                  ";; Unable to prove that the loop iterates constant times\n");
 394       return;
 395     }
 396
 397   /* Check for an explicit unrolling factor.  */
 398   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 399     {
 400       /* However we cannot unroll completely at the RTL level a loop with
 401          constant number of iterations; it should have been peeled instead.  */
 402       if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
 403         {
 404           if (dump_file)
 405             fprintf (dump_file, ";; Loop should have been peeled\n");
 406         }
 407       else
 408         {
 409           loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 410           loop->lpt_decision.times = loop->unroll - 1;
 411         }
 412       return;
 413     }
 414
 415   /* Check whether the loop rolls enough to consider.
 416      Consult also loop bounds and profile; in the case the loop has more
 417      than one exit it may well loop less than determined maximal number
 418      of iterations.  */
 419   if (desc->niter < 2 * nunroll
 420       || ((get_estimated_loop_iterations (loop, &iterations)
 421            || get_likely_max_loop_iterations (loop, &iterations))
 422           && wi::ltu_p (iterations, 2 * nunroll)))
 423     {
 424       if (dump_file)
 425         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 426       return;
 427     }
 428
 429   /* Success; now compute number of iterations to unroll.  We alter
 430      nunroll so that as few as possible copies of loop body are
 431      necessary, while still not decreasing the number of unrollings
 432      too much (at most by 1).  */
 433   best_copies = 2 * nunroll + 10;
 434
 435   i = 2 * nunroll + 2;
 436   if (i > desc->niter - 2)
 437     i = desc->niter - 2;
 438
 439   for (; i >= nunroll - 1; i--)
 440     {
 441       unsigned exit_mod = desc->niter % (i + 1);
 442
 443       if (!loop_exit_at_end_p (loop))
 444         n_copies = exit_mod + i + 1;
 445       else if (exit_mod != (unsigned) i
 446                || desc->noloop_assumptions != NULL_RTX)
 447         n_copies = exit_mod + i + 2;
 448       else
 449         n_copies = i + 1;
 450
 451       if (n_copies < best_copies)
 452         {
 453           best_copies = n_copies;
 454           best_unroll = i;
 455         }
 456     }
 457
 458   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 459   loop->lpt_decision.times = best_unroll;
 460 }
 461
 462 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 463    The transformation does this:
 464
 465    for (i = 0; i < 102; i++)
 466      body;
 467
 468    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 469
 470    i = 0;
 471    body; i++;
 472    body; i++;
 473    while (i < 102)
 474      {
 475        body; i++;
 476        body; i++;
 477        body; i++;
 478        body; i++;
 479      }
 480   */
 481 static void
 482 unroll_loop_constant_iterations (class loop *loop)
 483 {
 484   unsigned HOST_WIDE_INT niter;
 485   unsigned exit_mod;
 486   unsigned i;
 487   edge e;
 488   unsigned max_unroll = loop->lpt_decision.times;
 489   class niter_desc *desc = get_simple_loop_desc (loop);
 490   bool exit_at_end = loop_exit_at_end_p (loop);
 491   struct opt_info *opt_info = NULL;
 492   bool ok;
 493
 494   niter = desc->niter;
 495
 496   /* Should not get here (such loop should be peeled instead).  */
 497   gcc_assert (niter > max_unroll + 1);
 498
 499   exit_mod = niter % (max_unroll + 1);
 500
 501   auto_sbitmap wont_exit (max_unroll + 2);
 502   bitmap_ones (wont_exit);
 503
 504   auto_vec<edge> remove_edges;
 505   if (flag_split_ivs_in_unroller
 506       || flag_variable_expansion_in_unroller)
 507     opt_info = analyze_insns_in_loop (loop);
 508
 509   if (!exit_at_end)
 510     {
 511       /* The exit is not at the end of the loop; leave exit test
 512          in the first copy, so that the loops that start with test
 513          of exit condition have continuous body after unrolling.  */
 514
 515       if (dump_file)
 516         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 517
 518       /* Peel exit_mod iterations.  */
 519       bitmap_clear_bit (wont_exit, 0);
 520       if (desc->noloop_assumptions)
 521         bitmap_clear_bit (wont_exit, 1);
 522
 523       if (exit_mod)
 524         {
 525           opt_info_start_duplication (opt_info);
 526           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 527                                               exit_mod,
 528                                               wont_exit, desc->out_edge,
 529                                               &remove_edges,
 530                                               DLTHE_FLAG_UPDATE_FREQ
 531                                               | (opt_info && exit_mod > 1
 532                                                  ? DLTHE_RECORD_COPY_NUMBER
 533                                                    : 0));
 534           gcc_assert (ok);
 535
 536           if (opt_info && exit_mod > 1)
 537             apply_opt_in_copies (opt_info, exit_mod, false, false);
 538
 539           desc->noloop_assumptions = NULL_RTX;
 540           desc->niter -= exit_mod;
 541           loop->nb_iterations_upper_bound -= exit_mod;
 542           if (loop->any_estimate
 543               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 544             loop->nb_iterations_estimate -= exit_mod;
 545           else
 546             loop->any_estimate = false;
 547           if (loop->any_likely_upper_bound
 548               && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
 549             loop->nb_iterations_likely_upper_bound -= exit_mod;
 550           else
 551             loop->any_likely_upper_bound = false;
 552         }
 553
 554       bitmap_set_bit (wont_exit, 1);
 555     }
 556   else
 557     {
 558       /* Leave exit test in last copy, for the same reason as above if
 559          the loop tests the condition at the end of loop body.  */
 560
 561       if (dump_file)
 562         fprintf (dump_file, ";; Condition at end of loop.\n");
 563
 564       /* We know that niter >= max_unroll + 2; so we do not need to care of
 565          case when we would exit before reaching the loop.  So just peel
 566          exit_mod + 1 iterations.  */
 567       if (exit_mod != max_unroll
 568           || desc->noloop_assumptions)
 569         {
 570           bitmap_clear_bit (wont_exit, 0);
 571           if (desc->noloop_assumptions)
 572             bitmap_clear_bit (wont_exit, 1);
 573
 574           opt_info_start_duplication (opt_info);
 575           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 576                                               exit_mod + 1,
 577                                               wont_exit, desc->out_edge,
 578                                               &remove_edges,
 579                                               DLTHE_FLAG_UPDATE_FREQ
 580                                               | (opt_info && exit_mod > 0
 581                                                  ? DLTHE_RECORD_COPY_NUMBER
 582                                                    : 0));
 583           gcc_assert (ok);
 584
 585           if (opt_info && exit_mod > 0)
 586             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 587
 588           desc->niter -= exit_mod + 1;
 589           loop->nb_iterations_upper_bound -= exit_mod + 1;
 590           if (loop->any_estimate
 591               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 592             loop->nb_iterations_estimate -= exit_mod + 1;
 593           else
 594             loop->any_estimate = false;
 595           if (loop->any_likely_upper_bound
 596               && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
 597             loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
 598           else
 599             loop->any_likely_upper_bound = false;
 600           desc->noloop_assumptions = NULL_RTX;
 601
 602           bitmap_set_bit (wont_exit, 0);
 603           bitmap_set_bit (wont_exit, 1);
 604         }
 605
 606       bitmap_clear_bit (wont_exit, max_unroll);
 607     }
 608
 609   /* Now unroll the loop.  */
 610
 611   opt_info_start_duplication (opt_info);
 612   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 613                                       max_unroll,
 614                                       wont_exit, desc->out_edge,
 615                                       &remove_edges,
 616                                       DLTHE_FLAG_UPDATE_FREQ
 617                                       | (opt_info
 618                                          ? DLTHE_RECORD_COPY_NUMBER
 619                                            : 0));
 620   gcc_assert (ok);
 621
 622   if (opt_info)
 623     {
 624       apply_opt_in_copies (opt_info, max_unroll, true, true);
 625       free_opt_info (opt_info);
 626     }
 627
 628   if (exit_at_end)
 629     {
 630       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 631       /* Find a new in and out edge; they are in the last copy we have made.  */
 632
 633       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 634         {
 635           desc->out_edge = EDGE_SUCC (exit_block, 0);
 636           desc->in_edge = EDGE_SUCC (exit_block, 1);
 637         }
 638       else
 639         {
 640           desc->out_edge = EDGE_SUCC (exit_block, 1);
 641           desc->in_edge = EDGE_SUCC (exit_block, 0);
 642         }
 643     }
 644
 645   desc->niter /= max_unroll + 1;
 646   loop->nb_iterations_upper_bound
 647     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 648   if (loop->any_estimate)
 649     loop->nb_iterations_estimate
 650       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 651   if (loop->any_likely_upper_bound)
 652     loop->nb_iterations_likely_upper_bound
 653       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
 654   desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
 655
 656   /* Remove the edges.  */
 657   FOR_EACH_VEC_ELT (remove_edges, i, e)
 658     remove_path (e);
 659
 660   if (dump_file)
 661     fprintf (dump_file,
 662              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 663              max_unroll, num_loop_insns (loop));
 664 }
 665
 666 /* Decide whether to unroll LOOP iterating runtime computable number of times
 667    and how much.  */
 668 static void
 669 decide_unroll_runtime_iterations (class loop *loop, int flags)
 670 {
 671   unsigned nunroll, nunroll_by_av, i;
 672   class niter_desc *desc;
 673   widest_int iterations;
 674
 675   /* If we were not asked to unroll this loop, just return back silently.  */
 676   if (!(flags & UAP_UNROLL) && !loop->unroll)
 677     return;
 678
 679   if (dump_enabled_p ())
 680     dump_printf (MSG_NOTE,
 681                  "considering unrolling loop with runtime-"
 682                  "computable number of iterations\n");
 683
 684   /* nunroll = total number of copies of the original loop body in
 685      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 686   nunroll = param_max_unrolled_insns / loop->ninsns;
 687   nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
 688   if (nunroll > nunroll_by_av)
 689     nunroll = nunroll_by_av;
 690   if (nunroll > (unsigned) param_max_unroll_times)
 691     nunroll = param_max_unroll_times;
 692
 693   if (targetm.loop_unroll_adjust)
 694     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 695
 696   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 697     nunroll = loop->unroll;
 698
 699   /* Skip big loops.  */
 700   if (nunroll <= 1)
 701     {
 702       if (dump_file)
 703         fprintf (dump_file, ";; Not considering loop, is too big\n");
 704       return;
 705     }
 706
 707   /* Check for simple loops.  */
 708   desc = get_simple_loop_desc (loop);
 709
 710   /* Check simpleness.  */
 711   if (!desc->simple_p || desc->assumptions)
 712     {
 713       if (dump_file)
 714         fprintf (dump_file,
 715                  ";; Unable to prove that the number of iterations "
 716                  "can be counted in runtime\n");
 717       return;
 718     }
 719
 720   if (desc->const_iter)
 721     {
 722       if (dump_file)
 723         fprintf (dump_file, ";; Loop iterates constant times\n");
 724       return;
 725     }
 726
 727   /* Check whether the loop rolls.  */
 728   if ((get_estimated_loop_iterations (loop, &iterations)
 729        || get_likely_max_loop_iterations (loop, &iterations))
 730       && wi::ltu_p (iterations, 2 * nunroll))
 731     {
 732       if (dump_file)
 733         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 734       return;
 735     }
 736
 737   /* Success; now force nunroll to be power of 2, as code-gen
 738      requires it, we are unable to cope with overflows in
 739      computation of number of iterations.  */
 740   for (i = 1; 2 * i <= nunroll; i *= 2)
 741     continue;
 742
 743   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 744   loop->lpt_decision.times = i - 1;
 745 }
 746
 747 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 748    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 749    and NULL is returned instead.  */
 750
 751 basic_block
 752 split_edge_and_insert (edge e, rtx_insn *insns)
 753 {
 754   basic_block bb;
 755
 756   if (!insns)
 757     return NULL;
 758   bb = split_edge (e);
 759   emit_insn_after (insns, BB_END (bb));
 760
 761   /* ??? We used to assume that INSNS can contain control flow insns, and
 762      that we had to try to find sub basic blocks in BB to maintain a valid
 763      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 764      and call break_superblocks when going out of cfglayout mode.  But it
 765      turns out that this never happens; and that if it does ever happen,
 766      the verify_flow_info at the end of the RTL loop passes would fail.
 767
 768      There are two reasons why we expected we could have control flow insns
 769      in INSNS.  The first is when a comparison has to be done in parts, and
 770      the second is when the number of iterations is computed for loops with
 771      the number of iterations known at runtime.  In both cases, test cases
 772      to get control flow in INSNS appear to be impossible to construct:
 773
 774       * If do_compare_rtx_and_jump needs several branches to do comparison
 775         in a mode that needs comparison by parts, we cannot analyze the
 776         number of iterations of the loop, and we never get to unrolling it.
 777
 778       * The code in expand_divmod that was suspected to cause creation of
 779         branching code seems to be only accessed for signed division.  The
 780         divisions used by # of iterations analysis are always unsigned.
 781         Problems might arise on architectures that emits branching code
 782         for some operations that may appear in the unroller (especially
 783         for division), but we have no such architectures.
 784
 785      Considering all this, it was decided that we should for now assume
 786      that INSNS can in theory contain control flow insns, but in practice
 787      it never does.  So we don't handle the theoretical case, and should
 788      a real failure ever show up, we have a pretty good clue for how to
 789      fix it.  */
 790
 791   return bb;
 792 }
 793
 794 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 795    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 796    in order to create a jump.  */
 797
 798 static rtx_insn *
 799 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 800                       rtx_code_label *label, profile_probability prob,
 801                       rtx_insn *cinsn)
 802 {
 803   rtx_insn *seq;
 804   rtx_jump_insn *jump;
 805   rtx cond;
 806   machine_mode mode;
 807
 808   mode = GET_MODE (op0);
 809   if (mode == VOIDmode)
 810     mode = GET_MODE (op1);
 811
 812   start_sequence ();
 813   if (GET_MODE_CLASS (mode) == MODE_CC)
 814     {
 815       /* A hack -- there seems to be no easy generic way how to make a
 816          conditional jump from a ccmode comparison.  */
 817       gcc_assert (cinsn);
 818       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 819       gcc_assert (GET_CODE (cond) == comp);
 820       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 821       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 822       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 823       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 824       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 825       LABEL_NUSES (JUMP_LABEL (jump))++;
 826       redirect_jump (jump, label, 0);
 827     }
 828   else
 829     {
 830       gcc_assert (!cinsn);
 831
 832       op0 = force_operand (op0, NULL_RTX);
 833       op1 = force_operand (op1, NULL_RTX);
 834       do_compare_rtx_and_jump (op0, op1, comp, 0,
 835                                mode, NULL_RTX, NULL, label,
 836                                profile_probability::uninitialized ());
 837       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 838       jump->set_jump_target (label);
 839       LABEL_NUSES (label)++;
 840     }
 841   if (prob.initialized_p ())
 842     add_reg_br_prob_note (jump, prob);
 843
 844   seq = get_insns ();
 845   end_sequence ();
 846
 847   return seq;
 848 }
 849
 850 /* Unroll LOOP for which we are able to count number of iterations in
 851    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
 852    power of two.  The transformation does this (with some extra care
 853    for case n < 0):
 854
 855    for (i = 0; i < n; i++)
 856      body;
 857
 858    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 859
 860    i = 0;
 861    mod = n % 4;
 862
 863    switch (mod)
 864      {
 865        case 3:
 866          body; i++;
 867        case 2:
 868          body; i++;
 869        case 1:
 870          body; i++;
 871        case 0: ;
 872      }
 873
 874    while (i < n)
 875      {
 876        body; i++;
 877        body; i++;
 878        body; i++;
 879        body; i++;
 880      }
 881    */
 882 static void
 883 unroll_loop_runtime_iterations (class loop *loop)
 884 {
 885   rtx old_niter, niter, tmp;
 886   rtx_insn *init_code, *branch_code;
 887   unsigned i, j;
 888   profile_probability p;
 889   basic_block preheader, *body, swtch, ezc_swtch = NULL;
 890   int may_exit_copy;
 891   profile_count iter_count, new_count;
 892   unsigned n_peel;
 893   edge e;
 894   bool extra_zero_check, last_may_exit;
 895   unsigned max_unroll = loop->lpt_decision.times;
 896   class niter_desc *desc = get_simple_loop_desc (loop);
 897   bool exit_at_end = loop_exit_at_end_p (loop);
 898   struct opt_info *opt_info = NULL;
 899   bool ok;
 900
 901   if (flag_split_ivs_in_unroller
 902       || flag_variable_expansion_in_unroller)
 903     opt_info = analyze_insns_in_loop (loop);
 904
 905   /* Remember blocks whose dominators will have to be updated.  */
 906   auto_vec<basic_block> dom_bbs;
 907
 908   body = get_loop_body (loop);
 909   for (i = 0; i < loop->num_nodes; i++)
 910     {
 911       vec<basic_block> ldom;
 912       basic_block bb;
 913
 914       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 915       FOR_EACH_VEC_ELT (ldom, j, bb)
 916         if (!flow_bb_inside_loop_p (loop, bb))
 917           dom_bbs.safe_push (bb);
 918
 919       ldom.release ();
 920     }
 921   free (body);
 922
 923   if (!exit_at_end)
 924     {
 925       /* Leave exit in first copy (for explanation why see comment in
 926          unroll_loop_constant_iterations).  */
 927       may_exit_copy = 0;
 928       n_peel = max_unroll - 1;
 929       extra_zero_check = true;
 930       last_may_exit = false;
 931     }
 932   else
 933     {
 934       /* Leave exit in last copy (for explanation why see comment in
 935          unroll_loop_constant_iterations).  */
 936       may_exit_copy = max_unroll;
 937       n_peel = max_unroll;
 938       extra_zero_check = false;
 939       last_may_exit = true;
 940     }
 941
 942   /* Get expression for number of iterations.  */
 943   start_sequence ();
 944   old_niter = niter = gen_reg_rtx (desc->mode);
 945   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 946   if (tmp != niter)
 947     emit_move_insn (niter, tmp);
 948
 949   /* For loops that exit at end and whose number of iterations is reliable,
 950      add one to niter to account for first pass through loop body before
 951      reaching exit test. */
 952   if (exit_at_end && !desc->noloop_assumptions)
 953     {
 954       niter = expand_simple_binop (desc->mode, PLUS,
 955                                    niter, const1_rtx,
 956                                    NULL_RTX, 0, OPTAB_LIB_WIDEN);
 957       old_niter = niter;
 958     }
 959
 960   /* Count modulo by ANDing it with max_unroll; we use the fact that
 961      the number of unrollings is a power of two, and thus this is correct
 962      even if there is overflow in the computation.  */
 963   niter = expand_simple_binop (desc->mode, AND,
 964                                niter, gen_int_mode (max_unroll, desc->mode),
 965                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 966
 967   init_code = get_insns ();
 968   end_sequence ();
 969   unshare_all_rtl_in_chain (init_code);
 970
 971   /* Precondition the loop.  */
 972   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 973
 974   auto_vec<edge> remove_edges;
 975
 976   auto_sbitmap wont_exit (max_unroll + 2);
 977
 978   if (extra_zero_check || desc->noloop_assumptions)
 979     {
 980       /* Peel the first copy of loop body.  Leave the exit test if the number
 981          of iterations is not reliable.  Also record the place of the extra zero
 982          check.  */
 983       bitmap_clear (wont_exit);
 984       if (!desc->noloop_assumptions)
 985         bitmap_set_bit (wont_exit, 1);
 986       ezc_swtch = loop_preheader_edge (loop)->src;
 987       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 988                                           1, wont_exit, desc->out_edge,
 989                                           &remove_edges,
 990                                           DLTHE_FLAG_UPDATE_FREQ);
 991       gcc_assert (ok);
 992     }
 993
 994   /* Record the place where switch will be built for preconditioning.  */
 995   swtch = split_edge (loop_preheader_edge (loop));
 996
 997   /* Compute count increments for each switch block and initialize
 998      innermost switch block.  Switch blocks and peeled loop copies are built
 999      from innermost outward.  */
1000   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
1001   swtch->count = new_count;
1002
1003   for (i = 0; i < n_peel; i++)
1004     {
1005       /* Peel the copy.  */
1006       bitmap_clear (wont_exit);
1007       if (i != n_peel - 1 || !last_may_exit)
1008         bitmap_set_bit (wont_exit, 1);
1009       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1010                                           1, wont_exit, desc->out_edge,
1011                                           &remove_edges,
1012                                           DLTHE_FLAG_UPDATE_FREQ);
1013       gcc_assert (ok);
1014
1015       /* Create item for switch.  */
1016       j = n_peel - i - (extra_zero_check ? 0 : 1);
1017       p = profile_probability::always ().apply_scale (1, i + 2);
1018
1019       preheader = split_edge (loop_preheader_edge (loop));
1020       /* Add in count of edge from switch block.  */
1021       preheader->count += iter_count;
1022       branch_code = compare_and_jump_seq (copy_rtx (niter),
1023                                           gen_int_mode (j, desc->mode), EQ,
1024                                           block_label (preheader), p, NULL);
1025
1026       /* We rely on the fact that the compare and jump cannot be optimized out,
1027          and hence the cfg we create is correct.  */
1028       gcc_assert (branch_code != NULL_RTX);
1029
1030       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1031       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1032       single_succ_edge (swtch)->probability = p.invert ();
1033       new_count += iter_count;
1034       swtch->count = new_count;
1035       e = make_edge (swtch, preheader,
1036                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1037       e->probability = p;
1038     }
1039
1040   if (extra_zero_check)
1041     {
1042       /* Add branch for zero iterations.  */
1043       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1044       swtch = ezc_swtch;
1045       preheader = split_edge (loop_preheader_edge (loop));
1046       /* Recompute count adjustments since initial peel copy may
1047          have exited and reduced those values that were computed above.  */
1048       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1049       /* Add in count of edge from switch block.  */
1050       preheader->count += iter_count;
1051       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1052                                           block_label (preheader), p,
1053                                           NULL);
1054       gcc_assert (branch_code != NULL_RTX);
1055
1056       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1057       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1058       single_succ_edge (swtch)->probability = p.invert ();
1059       e = make_edge (swtch, preheader,
1060                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1061       e->probability = p;
1062     }
1063
1064   /* Recount dominators for outer blocks.  */
1065   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1066
1067   /* And unroll loop.  */
1068
1069   bitmap_ones (wont_exit);
1070   bitmap_clear_bit (wont_exit, may_exit_copy);
1071   opt_info_start_duplication (opt_info);
1072
1073   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1074                                       max_unroll,
1075                                       wont_exit, desc->out_edge,
1076                                       &remove_edges,
1077                                       DLTHE_FLAG_UPDATE_FREQ
1078                                       | (opt_info
1079                                          ? DLTHE_RECORD_COPY_NUMBER
1080                                            : 0));
1081   gcc_assert (ok);
1082
1083   if (opt_info)
1084     {
1085       apply_opt_in_copies (opt_info, max_unroll, true, true);
1086       free_opt_info (opt_info);
1087     }
1088
1089   if (exit_at_end)
1090     {
1091       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1092       /* Find a new in and out edge; they are in the last copy we have
1093          made.  */
1094
1095       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1096         {
1097           desc->out_edge = EDGE_SUCC (exit_block, 0);
1098           desc->in_edge = EDGE_SUCC (exit_block, 1);
1099         }
1100       else
1101         {
1102           desc->out_edge = EDGE_SUCC (exit_block, 1);
1103           desc->in_edge = EDGE_SUCC (exit_block, 0);
1104         }
1105     }
1106
1107   /* Remove the edges.  */
1108   FOR_EACH_VEC_ELT (remove_edges, i, e)
1109     remove_path (e);
1110
1111   /* We must be careful when updating the number of iterations due to
1112      preconditioning and the fact that the value must be valid at entry
1113      of the loop.  After passing through the above code, we see that
1114      the correct new number of iterations is this:  */
1115   gcc_assert (!desc->const_iter);
1116   desc->niter_expr =
1117     simplify_gen_binary (UDIV, desc->mode, old_niter,
1118                          gen_int_mode (max_unroll + 1, desc->mode));
1119   loop->nb_iterations_upper_bound
1120     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1121   if (loop->any_estimate)
1122     loop->nb_iterations_estimate
1123       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1124   if (loop->any_likely_upper_bound)
1125     loop->nb_iterations_likely_upper_bound
1126       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1127   if (exit_at_end)
1128     {
1129       desc->niter_expr =
1130         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1131       desc->noloop_assumptions = NULL_RTX;
1132       --loop->nb_iterations_upper_bound;
1133       if (loop->any_estimate
1134           && loop->nb_iterations_estimate != 0)
1135         --loop->nb_iterations_estimate;
1136       else
1137         loop->any_estimate = false;
1138       if (loop->any_likely_upper_bound
1139           && loop->nb_iterations_likely_upper_bound != 0)
1140         --loop->nb_iterations_likely_upper_bound;
1141       else
1142         loop->any_likely_upper_bound = false;
1143     }
1144
1145   if (dump_file)
1146     fprintf (dump_file,
1147              ";; Unrolled loop %d times, counting # of iterations "
1148              "in runtime, %i insns\n",
1149              max_unroll, num_loop_insns (loop));
1150 }
1151
1152 /* Decide whether to unroll LOOP stupidly and how much.  */
1153 static void
1154 decide_unroll_stupid (class loop *loop, int flags)
1155 {
1156   unsigned nunroll, nunroll_by_av, i;
1157   class niter_desc *desc;
1158   widest_int iterations;
1159
1160   /* If we were not asked to unroll this loop, just return back silently.  */
1161   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
1162     return;
1163
1164   if (dump_enabled_p ())
1165     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
1166
1167   /* nunroll = total number of copies of the original loop body in
1168      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1169   nunroll = param_max_unrolled_insns / loop->ninsns;
1170   nunroll_by_av
1171     = param_max_average_unrolled_insns / loop->av_ninsns;
1172   if (nunroll > nunroll_by_av)
1173     nunroll = nunroll_by_av;
1174   if (nunroll > (unsigned) param_max_unroll_times)
1175     nunroll = param_max_unroll_times;
1176
1177   if (targetm.loop_unroll_adjust)
1178     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1179
1180   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1181     nunroll = loop->unroll;
1182
1183   /* Skip big loops.  */
1184   if (nunroll <= 1)
1185     {
1186       if (dump_file)
1187         fprintf (dump_file, ";; Not considering loop, is too big\n");
1188       return;
1189     }
1190
1191   /* Check for simple loops.  */
1192   desc = get_simple_loop_desc (loop);
1193
1194   /* Check simpleness.  */
1195   if (desc->simple_p && !desc->assumptions)
1196     {
1197       if (dump_file)
1198         fprintf (dump_file, ";; Loop is simple\n");
1199       return;
1200     }
1201
1202   /* Do not unroll loops with branches inside -- it increases number
1203      of mispredicts.
1204      TODO: this heuristic needs tunning; call inside the loop body
1205      is also relatively good reason to not unroll.  */
1206   if (num_loop_branches (loop) > 1)
1207     {
1208       if (dump_file)
1209         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1210       return;
1211     }
1212
1213   /* Check whether the loop rolls.  */
1214   if ((get_estimated_loop_iterations (loop, &iterations)
1215        || get_likely_max_loop_iterations (loop, &iterations))
1216       && wi::ltu_p (iterations, 2 * nunroll))
1217     {
1218       if (dump_file)
1219         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1220       return;
1221     }
1222
1223   /* Success.  Now force nunroll to be power of 2, as it seems that this
1224      improves results (partially because of better alignments, partially
1225      because of some dark magic).  */
1226   for (i = 1; 2 * i <= nunroll; i *= 2)
1227     continue;
1228
1229   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1230   loop->lpt_decision.times = i - 1;
1231 }
1232
1233 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1234
1235    while (cond)
1236      body;
1237
1238    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1239
1240    while (cond)
1241      {
1242        body;
1243        if (!cond) break;
1244        body;
1245        if (!cond) break;
1246        body;
1247        if (!cond) break;
1248        body;
1249      }
1250    */
1251 static void
1252 unroll_loop_stupid (class loop *loop)
1253 {
1254   unsigned nunroll = loop->lpt_decision.times;
1255   class niter_desc *desc = get_simple_loop_desc (loop);
1256   struct opt_info *opt_info = NULL;
1257   bool ok;
1258
1259   if (flag_split_ivs_in_unroller
1260       || flag_variable_expansion_in_unroller)
1261     opt_info = analyze_insns_in_loop (loop);
1262
1263   auto_sbitmap wont_exit (nunroll + 1);
1264   bitmap_clear (wont_exit);
1265   opt_info_start_duplication (opt_info);
1266
1267   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1268                                       nunroll, wont_exit,
1269                                       NULL, NULL,
1270                                       DLTHE_FLAG_UPDATE_FREQ
1271                                       | (opt_info
1272                                          ? DLTHE_RECORD_COPY_NUMBER
1273                                            : 0));
1274   gcc_assert (ok);
1275
1276   if (opt_info)
1277     {
1278       apply_opt_in_copies (opt_info, nunroll, true, true);
1279       free_opt_info (opt_info);
1280     }
1281
1282   if (desc->simple_p)
1283     {
1284       /* We indeed may get here provided that there are nontrivial assumptions
1285          for a loop to be really simple.  We could update the counts, but the
1286          problem is that we are unable to decide which exit will be taken
1287          (not really true in case the number of iterations is constant,
1288          but no one will do anything with this information, so we do not
1289          worry about it).  */
1290       desc->simple_p = false;
1291     }
1292
1293   if (dump_file)
1294     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1295              nunroll, num_loop_insns (loop));
1296 }
1297
1298 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1299    Set *DEBUG_USES to the number of debug insns that reference the
1300    variable.  */
1301
1302 static bool
1303 referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
1304                                   int *debug_uses)
1305 {
1306   basic_block *body, bb;
1307   unsigned i;
1308   int count_ref = 0;
1309   rtx_insn *insn;
1310
1311   body = get_loop_body (loop);
1312   for (i = 0; i < loop->num_nodes; i++)
1313     {
1314       bb = body[i];
1315
1316       FOR_BB_INSNS (bb, insn)
1317         if (!rtx_referenced_p (reg, insn))
1318           continue;
1319         else if (DEBUG_INSN_P (insn))
1320           ++*debug_uses;
1321         else if (++count_ref > 1)
1322           break;
1323     }
1324   free (body);
1325   return (count_ref  == 1);
1326 }
1327
1328 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1329
1330 static void
1331 reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
1332 {
1333   basic_block *body, bb;
1334   unsigned i;
1335   rtx_insn *insn;
1336
1337   body = get_loop_body (loop);
1338   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1339     {
1340       bb = body[i];
1341
1342       FOR_BB_INSNS (bb, insn)
1343         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1344           continue;
1345         else
1346           {
1347             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1348                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1349             if (!--debug_uses)
1350               break;
1351           }
1352     }
1353   free (body);
1354 }
1355
1356 /* Determine whether INSN contains an accumulator
1357    which can be expanded into separate copies,
1358    one for each copy of the LOOP body.
1359
1360    for (i = 0 ; i < n; i++)
1361      sum += a[i];
1362
1363    ==>
1364
1365    sum += a[i]
1366    ....
1367    i = i+1;
1368    sum1 += a[i]
1369    ....
1370    i = i+1
1371    sum2 += a[i];
1372    ....
1373
1374    Return NULL if INSN contains no opportunity for expansion of accumulator.
1375    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1376    information and return a pointer to it.
1377 */
1378
1379 static struct var_to_expand *
1380 analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
1381 {
1382   rtx set, dest, src;
1383   struct var_to_expand *ves;
1384   unsigned accum_pos;
1385   enum rtx_code code;
1386   int debug_uses = 0;
1387
1388   set = single_set (insn);
1389   if (!set)
1390     return NULL;
1391
1392   dest = SET_DEST (set);
1393   src = SET_SRC (set);
1394   code = GET_CODE (src);
1395
1396   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1397     return NULL;
1398
1399   if (FLOAT_MODE_P (GET_MODE (dest)))
1400     {
1401       if (!flag_associative_math)
1402         return NULL;
1403       /* In the case of FMA, we're also changing the rounding.  */
1404       if (code == FMA && !flag_unsafe_math_optimizations)
1405         return NULL;
1406     }
1407
1408   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1409      in MD.  But if there is no optab to generate the insn, we cannot
1410      perform the variable expansion.  This can happen if an MD provides
1411      an insn but not a named pattern to generate it, for example to avoid
1412      producing code that needs additional mode switches like for x87/mmx.
1413
1414      So we check have_insn_for which looks for an optab for the operation
1415      in SRC.  If it doesn't exist, we can't perform the expansion even
1416      though INSN is valid.  */
1417   if (!have_insn_for (code, GET_MODE (src)))
1418     return NULL;
1419
1420   if (!REG_P (dest)
1421       && !(GET_CODE (dest) == SUBREG
1422            && REG_P (SUBREG_REG (dest))))
1423     return NULL;
1424
1425   /* Find the accumulator use within the operation.  */
1426   if (code == FMA)
1427     {
1428       /* We only support accumulation via FMA in the ADD position.  */
1429       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1430         return NULL;
1431       accum_pos = 2;
1432     }
1433   else if (rtx_equal_p (dest, XEXP (src, 0)))
1434     accum_pos = 0;
1435   else if (rtx_equal_p (dest, XEXP (src, 1)))
1436     {
1437       /* The method of expansion that we are using; which includes the
1438          initialization of the expansions with zero and the summation of
1439          the expansions at the end of the computation will yield wrong
1440          results for (x = something - x) thus avoid using it in that case.  */
1441       if (code == MINUS)
1442         return NULL;
1443       accum_pos = 1;
1444     }
1445   else
1446     return NULL;
1447
1448   /* It must not otherwise be used.  */
1449   if (code == FMA)
1450     {
1451       if (rtx_referenced_p (dest, XEXP (src, 0))
1452           || rtx_referenced_p (dest, XEXP (src, 1)))
1453         return NULL;
1454     }
1455   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1456     return NULL;
1457
1458   /* It must be used in exactly one insn.  */
1459   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1460     return NULL;
1461
1462   if (dump_file)
1463     {
1464       fprintf (dump_file, "\n;; Expanding Accumulator ");
1465       print_rtl (dump_file, dest);
1466       fprintf (dump_file, "\n");
1467     }
1468
1469   if (debug_uses)
1470     /* Instead of resetting the debug insns, we could replace each
1471        debug use in the loop with the sum or product of all expanded
1472        accumulators.  Since we'll only know of all expansions at the
1473        end, we'd have to keep track of which vars_to_expand a debug
1474        insn in the loop references, take note of each copy of the
1475        debug insn during unrolling, and when it's all done, compute
1476        the sum or product of each variable and adjust the original
1477        debug insn and each copy thereof.  What a pain!  */
1478     reset_debug_uses_in_loop (loop, dest, debug_uses);
1479
1480   /* Record the accumulator to expand.  */
1481   ves = XNEW (struct var_to_expand);
1482   ves->insn = insn;
1483   ves->reg = copy_rtx (dest);
1484   ves->var_expansions.create (1);
1485   ves->next = NULL;
1486   ves->op = GET_CODE (src);
1487   ves->expansion_count = 0;
1488   ves->reuse_expansion = 0;
1489   return ves;
1490 }
1491
1492 /* Determine whether there is an induction variable in INSN that
1493    we would like to split during unrolling.
1494
1495    I.e. replace
1496
1497    i = i + 1;
1498    ...
1499    i = i + 1;
1500    ...
1501    i = i + 1;
1502    ...
1503
1504    type chains by
1505
1506    i0 = i + 1
1507    ...
1508    i = i0 + 1
1509    ...
1510    i = i0 + 2
1511    ...
1512
1513    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1514    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1515    pointer to it.  */
1516
1517 static struct iv_to_split *
1518 analyze_iv_to_split_insn (rtx_insn *insn)
1519 {
1520   rtx set, dest;
1521   class rtx_iv iv;
1522   struct iv_to_split *ivts;
1523   scalar_int_mode mode;
1524   bool ok;
1525
1526   /* For now we just split the basic induction variables.  Later this may be
1527      extended for example by selecting also addresses of memory references.  */
1528   set = single_set (insn);
1529   if (!set)
1530     return NULL;
1531
1532   dest = SET_DEST (set);
1533   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1534     return NULL;
1535
1536   if (!biv_p (insn, mode, dest))
1537     return NULL;
1538
1539   ok = iv_analyze_result (insn, dest, &iv);
1540
1541   /* This used to be an assert under the assumption that if biv_p returns
1542      true that iv_analyze_result must also return true.  However, that
1543      assumption is not strictly correct as evidenced by pr25569.
1544
1545      Returning NULL when iv_analyze_result returns false is safe and
1546      avoids the problems in pr25569 until the iv_analyze_* routines
1547      can be fixed, which is apparently hard and time consuming
1548      according to their author.  */
1549   if (! ok)
1550     return NULL;
1551
1552   if (iv.step == const0_rtx
1553       || iv.mode != iv.extend_mode)
1554     return NULL;
1555
1556   /* Record the insn to split.  */
1557   ivts = XNEW (struct iv_to_split);
1558   ivts->insn = insn;
1559   ivts->orig_var = dest;
1560   ivts->base_var = NULL_RTX;
1561   ivts->step = iv.step;
1562   ivts->next = NULL;
1563
1564   return ivts;
1565 }
1566
1567 /* Determines which of insns in LOOP can be optimized.
1568    Return a OPT_INFO struct with the relevant hash tables filled
1569    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1570    is undefined for the return value.  */
1571
1572 static struct opt_info *
1573 analyze_insns_in_loop (class loop *loop)
1574 {
1575   basic_block *body, bb;
1576   unsigned i;
1577   struct opt_info *opt_info = XCNEW (struct opt_info);
1578   rtx_insn *insn;
1579   struct iv_to_split *ivts = NULL;
1580   struct var_to_expand *ves = NULL;
1581   iv_to_split **slot1;
1582   var_to_expand **slot2;
1583   vec<edge> edges = get_loop_exit_edges (loop);
1584   edge exit;
1585   bool can_apply = false;
1586
1587   iv_analysis_loop_init (loop);
1588
1589   body = get_loop_body (loop);
1590
1591   if (flag_split_ivs_in_unroller)
1592     {
1593       opt_info->insns_to_split
1594         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1595       opt_info->iv_to_split_head = NULL;
1596       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1597     }
1598
1599   /* Record the loop exit bb and loop preheader before the unrolling.  */
1600   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1601
1602   if (edges.length () == 1)
1603     {
1604       exit = edges[0];
1605       if (!(exit->flags & EDGE_COMPLEX))
1606         {
1607           opt_info->loop_exit = split_edge (exit);
1608           can_apply = true;
1609         }
1610     }
1611
1612   if (flag_variable_expansion_in_unroller
1613       && can_apply)
1614     {
1615       opt_info->insns_with_var_to_expand
1616         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1617       opt_info->var_to_expand_head = NULL;
1618       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1619     }
1620
1621   for (i = 0; i < loop->num_nodes; i++)
1622     {
1623       bb = body[i];
1624       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1625         continue;
1626
1627       FOR_BB_INSNS (bb, insn)
1628       {
1629         if (!INSN_P (insn))
1630           continue;
1631
1632         if (opt_info->insns_to_split)
1633           ivts = analyze_iv_to_split_insn (insn);
1634
1635         if (ivts)
1636           {
1637             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1638             gcc_assert (*slot1 == NULL);
1639             *slot1 = ivts;
1640             *opt_info->iv_to_split_tail = ivts;
1641             opt_info->iv_to_split_tail = &ivts->next;
1642             continue;
1643           }
1644
1645         if (opt_info->insns_with_var_to_expand)
1646           ves = analyze_insn_to_expand_var (loop, insn);
1647
1648         if (ves)
1649           {
1650             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1651             gcc_assert (*slot2 == NULL);
1652             *slot2 = ves;
1653             *opt_info->var_to_expand_tail = ves;
1654             opt_info->var_to_expand_tail = &ves->next;
1655           }
1656       }
1657     }
1658
1659   edges.release ();
1660   free (body);
1661   return opt_info;
1662 }
1663
1664 /* Called just before loop duplication.  Records start of duplicated area
1665    to OPT_INFO.  */
1666
1667 static void
1668 opt_info_start_duplication (struct opt_info *opt_info)
1669 {
1670   if (opt_info)
1671     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1672 }
1673
1674 /* Determine the number of iterations between initialization of the base
1675    variable and the current copy (N_COPY).  N_COPIES is the total number
1676    of newly created copies.  UNROLLING is true if we are unrolling
1677    (not peeling) the loop.  */
1678
1679 static unsigned
1680 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1681 {
1682   if (unrolling)
1683     {
1684       /* If we are unrolling, initialization is done in the original loop
1685          body (number 0).  */
1686       return n_copy;
1687     }
1688   else
1689     {
1690       /* If we are peeling, the copy in that the initialization occurs has
1691          number 1.  The original loop (number 0) is the last.  */
1692       if (n_copy)
1693         return n_copy - 1;
1694       else
1695         return n_copies;
1696     }
1697 }
1698
1699 /* Allocate basic variable for the induction variable chain.  */
1700
1701 static void
1702 allocate_basic_variable (struct iv_to_split *ivts)
1703 {
1704   rtx expr = SET_SRC (single_set (ivts->insn));
1705
1706   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1707 }
1708
1709 /* Insert initialization of basic variable of IVTS before INSN, taking
1710    the initial value from INSN.  */
1711
1712 static void
1713 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1714 {
1715   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1716   rtx_insn *seq;
1717
1718   start_sequence ();
1719   expr = force_operand (expr, ivts->base_var);
1720   if (expr != ivts->base_var)
1721     emit_move_insn (ivts->base_var, expr);
1722   seq = get_insns ();
1723   end_sequence ();
1724
1725   emit_insn_before (seq, insn);
1726 }
1727
1728 /* Replace the use of induction variable described in IVTS in INSN
1729    by base variable + DELTA * step.  */
1730
1731 static void
1732 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1733 {
1734   rtx expr, *loc, incr, var;
1735   rtx_insn *seq;
1736   machine_mode mode = GET_MODE (ivts->base_var);
1737   rtx src, dest, set;
1738
1739   /* Construct base + DELTA * step.  */
1740   if (!delta)
1741     expr = ivts->base_var;
1742   else
1743     {
1744       incr = simplify_gen_binary (MULT, mode,
1745                                   copy_rtx (ivts->step),
1746                                   gen_int_mode (delta, mode));
1747       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1748                                   ivts->base_var, incr);
1749     }
1750
1751   /* Figure out where to do the replacement.  */
1752   loc = &SET_SRC (single_set (insn));
1753
1754   /* If we can make the replacement right away, we're done.  */
1755   if (validate_change (insn, loc, expr, 0))
1756     return;
1757
1758   /* Otherwise, force EXPR into a register and try again.  */
1759   start_sequence ();
1760   var = gen_reg_rtx (mode);
1761   expr = force_operand (expr, var);
1762   if (expr != var)
1763     emit_move_insn (var, expr);
1764   seq = get_insns ();
1765   end_sequence ();
1766   emit_insn_before (seq, insn);
1767
1768   if (validate_change (insn, loc, var, 0))
1769     return;
1770
1771   /* The last chance.  Try recreating the assignment in insn
1772      completely from scratch.  */
1773   set = single_set (insn);
1774   gcc_assert (set);
1775
1776   start_sequence ();
1777   *loc = var;
1778   src = copy_rtx (SET_SRC (set));
1779   dest = copy_rtx (SET_DEST (set));
1780   src = force_operand (src, dest);
1781   if (src != dest)
1782     emit_move_insn (dest, src);
1783   seq = get_insns ();
1784   end_sequence ();
1785
1786   emit_insn_before (seq, insn);
1787   delete_insn (insn);
1788 }
1789
1790
1791 /* Return one expansion of the accumulator recorded in struct VE.  */
1792
1793 static rtx
1794 get_expansion (struct var_to_expand *ve)
1795 {
1796   rtx reg;
1797
1798   if (ve->reuse_expansion == 0)
1799     reg = ve->reg;
1800   else
1801     reg = ve->var_expansions[ve->reuse_expansion - 1];
1802
1803   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1804     ve->reuse_expansion = 0;
1805   else
1806     ve->reuse_expansion++;
1807
1808   return reg;
1809 }
1810
1811
1812 /* Given INSN replace the uses of the accumulator recorded in VE
1813    with a new register.  */
1814
1815 static void
1816 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1817 {
1818   rtx new_reg, set;
1819   bool really_new_expansion = false;
1820
1821   set = single_set (insn);
1822   gcc_assert (set);
1823
1824   /* Generate a new register only if the expansion limit has not been
1825      reached.  Else reuse an already existing expansion.  */
1826   if (param_max_variable_expansions > ve->expansion_count)
1827     {
1828       really_new_expansion = true;
1829       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1830     }
1831   else
1832     new_reg = get_expansion (ve);
1833
1834   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1835   if (apply_change_group ())
1836     if (really_new_expansion)
1837       {
1838         ve->var_expansions.safe_push (new_reg);
1839         ve->expansion_count++;
1840       }
1841 }
1842
1843 /* Initialize the variable expansions in loop preheader.  PLACE is the
1844    loop-preheader basic block where the initialization of the
1845    expansions should take place.  The expansions are initialized with
1846    (-0) when the operation is plus or minus to honor sign zero.  This
1847    way we can prevent cases where the sign of the final result is
1848    effected by the sign of the expansion.  Here is an example to
1849    demonstrate this:
1850
1851    for (i = 0 ; i < n; i++)
1852      sum += something;
1853
1854    ==>
1855
1856    sum += something
1857    ....
1858    i = i+1;
1859    sum1 += something
1860    ....
1861    i = i+1
1862    sum2 += something;
1863    ....
1864
1865    When SUM is initialized with -zero and SOMETHING is also -zero; the
1866    final result of sum should be -zero thus the expansions sum1 and sum2
1867    should be initialized with -zero as well (otherwise we will get +zero
1868    as the final result).  */
1869
1870 static void
1871 insert_var_expansion_initialization (struct var_to_expand *ve,
1872                                      basic_block place)
1873 {
1874   rtx_insn *seq;
1875   rtx var, zero_init;
1876   unsigned i;
1877   machine_mode mode = GET_MODE (ve->reg);
1878   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1879
1880   if (ve->var_expansions.length () == 0)
1881     return;
1882
1883   start_sequence ();
1884   switch (ve->op)
1885     {
1886     case FMA:
1887       /* Note that we only accumulate FMA via the ADD operand.  */
1888     case PLUS:
1889     case MINUS:
1890       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1891         {
1892           if (honor_signed_zero_p)
1893             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1894           else
1895             zero_init = CONST0_RTX (mode);
1896           emit_move_insn (var, zero_init);
1897         }
1898       break;
1899
1900     case MULT:
1901       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1902         {
1903           zero_init = CONST1_RTX (GET_MODE (var));
1904           emit_move_insn (var, zero_init);
1905         }
1906       break;
1907
1908     default:
1909       gcc_unreachable ();
1910     }
1911
1912   seq = get_insns ();
1913   end_sequence ();
1914
1915   emit_insn_after (seq, BB_END (place));
1916 }
1917
1918 /* Combine the variable expansions at the loop exit.  PLACE is the
1919    loop exit basic block where the summation of the expansions should
1920    take place.  */
1921
1922 static void
1923 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1924 {
1925   rtx sum = ve->reg;
1926   rtx expr, var;
1927   rtx_insn *seq, *insn;
1928   unsigned i;
1929
1930   if (ve->var_expansions.length () == 0)
1931     return;
1932
1933   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1934      it both here and as the destination of the assignment.  */
1935   sum = copy_rtx (sum);
1936   start_sequence ();
1937   switch (ve->op)
1938     {
1939     case FMA:
1940       /* Note that we only accumulate FMA via the ADD operand.  */
1941     case PLUS:
1942     case MINUS:
1943       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1944         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1945       break;
1946
1947     case MULT:
1948       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1949         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1950       break;
1951
1952     default:
1953       gcc_unreachable ();
1954     }
1955
1956   expr = force_operand (sum, ve->reg);
1957   if (expr != ve->reg)
1958     emit_move_insn (ve->reg, expr);
1959   seq = get_insns ();
1960   end_sequence ();
1961
1962   insn = BB_HEAD (place);
1963   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1964     insn = NEXT_INSN (insn);
1965
1966   emit_insn_after (seq, insn);
1967 }
1968
1969 /* Strip away REG_EQUAL notes for IVs we're splitting.
1970
1971    Updating REG_EQUAL notes for IVs we split is tricky: We
1972    cannot tell until after unrolling, DF-rescanning, and liveness
1973    updating, whether an EQ_USE is reached by the split IV while
1974    the IV reg is still live.  See PR55006.
1975
1976    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1977    because RTL loop-iv requires us to defer rescanning insns and
1978    any notes attached to them.  So resort to old techniques...  */
1979
1980 static void
1981 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1982 {
1983   struct iv_to_split *ivts;
1984   rtx note = find_reg_equal_equiv_note (insn);
1985   if (! note)
1986     return;
1987   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1988     if (reg_mentioned_p (ivts->orig_var, note))
1989       {
1990         remove_note (insn, note);
1991         return;
1992       }
1993 }
1994
1995 /* Apply loop optimizations in loop copies using the
1996    data which gathered during the unrolling.  Structure
1997    OPT_INFO record that data.
1998
1999    UNROLLING is true if we unrolled (not peeled) the loop.
2000    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2001    the loop (as it should happen in complete unrolling, but not in ordinary
2002    peeling of the loop).  */
2003
2004 static void
2005 apply_opt_in_copies (struct opt_info *opt_info,
2006                      unsigned n_copies, bool unrolling,
2007                      bool rewrite_original_loop)
2008 {
2009   unsigned i, delta;
2010   basic_block bb, orig_bb;
2011   rtx_insn *insn, *orig_insn, *next;
2012   struct iv_to_split ivts_templ, *ivts;
2013   struct var_to_expand ve_templ, *ves;
2014
2015   /* Sanity check -- we need to put initialization in the original loop
2016      body.  */
2017   gcc_assert (!unrolling || rewrite_original_loop);
2018
2019   /* Allocate the basic variables (i0).  */
2020   if (opt_info->insns_to_split)
2021     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2022       allocate_basic_variable (ivts);
2023
2024   for (i = opt_info->first_new_block;
2025        i < (unsigned) last_basic_block_for_fn (cfun);
2026        i++)
2027     {
2028       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2029       orig_bb = get_bb_original (bb);
2030
2031       /* bb->aux holds position in copy sequence initialized by
2032          duplicate_loop_to_header_edge.  */
2033       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2034                                         unrolling);
2035       bb->aux = 0;
2036       orig_insn = BB_HEAD (orig_bb);
2037       FOR_BB_INSNS_SAFE (bb, insn, next)
2038         {
2039           if (!INSN_P (insn)
2040               || (DEBUG_BIND_INSN_P (insn)
2041                   && INSN_VAR_LOCATION_DECL (insn)
2042                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2043             continue;
2044
2045           while (!INSN_P (orig_insn)
2046                  || (DEBUG_BIND_INSN_P (orig_insn)
2047                      && INSN_VAR_LOCATION_DECL (orig_insn)
2048                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2049                          == LABEL_DECL)))
2050             orig_insn = NEXT_INSN (orig_insn);
2051
2052           ivts_templ.insn = orig_insn;
2053           ve_templ.insn = orig_insn;
2054
2055           /* Apply splitting iv optimization.  */
2056           if (opt_info->insns_to_split)
2057             {
2058               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2059
2060               ivts = opt_info->insns_to_split->find (&ivts_templ);
2061
2062               if (ivts)
2063                 {
2064                   gcc_assert (GET_CODE (PATTERN (insn))
2065                               == GET_CODE (PATTERN (orig_insn)));
2066
2067                   if (!delta)
2068                     insert_base_initialization (ivts, insn);
2069                   split_iv (ivts, insn, delta);
2070                 }
2071             }
2072           /* Apply variable expansion optimization.  */
2073           if (unrolling && opt_info->insns_with_var_to_expand)
2074             {
2075               ves = (struct var_to_expand *)
2076                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2077               if (ves)
2078                 {
2079                   gcc_assert (GET_CODE (PATTERN (insn))
2080                               == GET_CODE (PATTERN (orig_insn)));
2081                   expand_var_during_unrolling (ves, insn);
2082                 }
2083             }
2084           orig_insn = NEXT_INSN (orig_insn);
2085         }
2086     }
2087
2088   if (!rewrite_original_loop)
2089     return;
2090
2091   /* Initialize the variable expansions in the loop preheader
2092      and take care of combining them at the loop exit.  */
2093   if (opt_info->insns_with_var_to_expand)
2094     {
2095       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2096         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2097       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2098         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2099     }
2100
2101   /* Rewrite also the original loop body.  Find them as originals of the blocks
2102      in the last copied iteration, i.e. those that have
2103      get_bb_copy (get_bb_original (bb)) == bb.  */
2104   for (i = opt_info->first_new_block;
2105        i < (unsigned) last_basic_block_for_fn (cfun);
2106        i++)
2107     {
2108       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2109       orig_bb = get_bb_original (bb);
2110       if (get_bb_copy (orig_bb) != bb)
2111         continue;
2112
2113       delta = determine_split_iv_delta (0, n_copies, unrolling);
2114       for (orig_insn = BB_HEAD (orig_bb);
2115            orig_insn != NEXT_INSN (BB_END (bb));
2116            orig_insn = next)
2117         {
2118           next = NEXT_INSN (orig_insn);
2119
2120           if (!INSN_P (orig_insn))
2121             continue;
2122
2123           ivts_templ.insn = orig_insn;
2124           if (opt_info->insns_to_split)
2125             {
2126               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2127
2128               ivts = (struct iv_to_split *)
2129                 opt_info->insns_to_split->find (&ivts_templ);
2130               if (ivts)
2131                 {
2132                   if (!delta)
2133                     insert_base_initialization (ivts, orig_insn);
2134                   split_iv (ivts, orig_insn, delta);
2135                   continue;
2136                 }
2137             }
2138
2139         }
2140     }
2141 }
2142
2143 /* Release OPT_INFO.  */
2144
2145 static void
2146 free_opt_info (struct opt_info *opt_info)
2147 {
2148   delete opt_info->insns_to_split;
2149   opt_info->insns_to_split = NULL;
2150   if (opt_info->insns_with_var_to_expand)
2151     {
2152       struct var_to_expand *ves;
2153
2154       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2155         ves->var_expansions.release ();
2156       delete opt_info->insns_with_var_to_expand;
2157       opt_info->insns_with_var_to_expand = NULL;
2158     }
2159   free (opt_info);
2160 }