gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "memmodel.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "profile.h"
  33 #include "cfgrtl.h"
  34 #include "cfgloop.h"
  35 #include "dojump.h"
  36 #include "expr.h"
  37 #include "dumpfile.h"
  38
  39 /* This pass performs loop unrolling.  We only perform this
  40    optimization on innermost loops (with single exception) because
  41    the impact on performance is greatest here, and we want to avoid
  42    unnecessary code size growth.  The gain is caused by greater sequentiality
  43    of code, better code to optimize for further passes and in some cases
  44    by fewer testings of exit conditions.  The main problem is code growth,
  45    that impacts performance negatively due to effect of caches.
  46
  47    What we do:
  48
  49    -- unrolling of loops that roll constant times; this is almost always
  50       win, as we get rid of exit condition tests.
  51    -- unrolling of loops that roll number of times that we can compute
  52       in runtime; we also get rid of exit condition tests here, but there
  53       is the extra expense for calculating the number of iterations
  54    -- simple unrolling of remaining loops; this is performed only if we
  55       are asked to, as the gain is questionable in this case and often
  56       it may even slow down the code
  57    For more detailed descriptions of each of those, see comments at
  58    appropriate function below.
  59
  60    There is a lot of parameters (defined and described in params.def) that
  61    control how much we unroll.
  62
  63    ??? A great problem is that we don't have a good way how to determine
  64    how many times we should unroll the loop; the experiments I have made
  65    showed that this choice may affect performance in order of several %.
  66    */
  67
  68 /* Information about induction variables to split.  */
  69
  70 struct iv_to_split
  71 {
  72   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  73   rtx orig_var;         /* The variable (register) for the IV before split.  */
  74   rtx base_var;         /* The variable on that the values in the further
  75                            iterations are based.  */
  76   rtx step;             /* Step of the induction variable.  */
  77   struct iv_to_split *next; /* Next entry in walking order.  */
  78 };
  79
  80 /* Information about accumulators to expand.  */
  81
  82 struct var_to_expand
  83 {
  84   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  85   rtx reg;                         /* The accumulator which is expanded.  */
  86   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  87   struct var_to_expand *next;      /* Next entry in walking order.  */
  88   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  89                                       or multiplication.  */
  90   int expansion_count;             /* Count the number of expansions generated so far.  */
  91   int reuse_expansion;             /* The expansion we intend to reuse to expand
  92                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  93                                       the original accumulator.  Else use
  94                                       var_expansions[REUSE_EXPANSION - 1].  */
  95 };
  96
  97 /* Hashtable helper for iv_to_split.  */
  98
  99 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 100 {
 101   static inline hashval_t hash (const iv_to_split *);
 102   static inline bool equal (const iv_to_split *, const iv_to_split *);
 103 };
 104
 105
 106 /* A hash function for information about insns to split.  */
 107
 108 inline hashval_t
 109 iv_split_hasher::hash (const iv_to_split *ivts)
 110 {
 111   return (hashval_t) INSN_UID (ivts->insn);
 112 }
 113
 114 /* An equality functions for information about insns to split.  */
 115
 116 inline bool
 117 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 118 {
 119   return i1->insn == i2->insn;
 120 }
 121
 122 /* Hashtable helper for iv_to_split.  */
 123
 124 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 125 {
 126   static inline hashval_t hash (const var_to_expand *);
 127   static inline bool equal (const var_to_expand *, const var_to_expand *);
 128 };
 129
 130 /* Return a hash for VES.  */
 131
 132 inline hashval_t
 133 var_expand_hasher::hash (const var_to_expand *ves)
 134 {
 135   return (hashval_t) INSN_UID (ves->insn);
 136 }
 137
 138 /* Return true if I1 and I2 refer to the same instruction.  */
 139
 140 inline bool
 141 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 142 {
 143   return i1->insn == i2->insn;
 144 }
 145
 146 /* Information about optimization applied in
 147    the unrolled loop.  */
 148
 149 struct opt_info
 150 {
 151   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 152                                                   split.  */
 153   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 154   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 155   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 156                                         insns with accumulators to expand.  */
 157   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 158   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 159   unsigned first_new_block;        /* The first basic block that was
 160                                       duplicated.  */
 161   basic_block loop_exit;           /* The loop exit basic block.  */
 162   basic_block loop_preheader;      /* The loop preheader basic block.  */
 163 };
 164
 165 static void decide_unroll_stupid (class loop *, int);
 166 static void decide_unroll_constant_iterations (class loop *, int);
 167 static void decide_unroll_runtime_iterations (class loop *, int);
 168 static void unroll_loop_stupid (class loop *);
 169 static void decide_unrolling (int);
 170 static void unroll_loop_constant_iterations (class loop *);
 171 static void unroll_loop_runtime_iterations (class loop *);
 172 static struct opt_info *analyze_insns_in_loop (class loop *);
 173 static void opt_info_start_duplication (struct opt_info *);
 174 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 175 static void free_opt_info (struct opt_info *);
 176 static struct var_to_expand *analyze_insn_to_expand_var (class loop*, rtx_insn *);
 177 static bool referenced_in_one_insn_in_loop_p (class loop *, rtx, int *);
 178 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 179 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 180 static void insert_var_expansion_initialization (struct var_to_expand *,
 181                                                  basic_block);
 182 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 183                                              basic_block);
 184 static rtx get_expansion (struct var_to_expand *);
 185
 186 /* Emit a message summarizing the unroll that will be
 187    performed for LOOP, along with the loop's location LOCUS, if
 188    appropriate given the dump or -fopt-info settings.  */
 189
 190 static void
 191 report_unroll (class loop *loop, dump_location_t locus)
 192 {
 193   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 194
 195   if (loop->lpt_decision.decision == LPT_NONE)
 196     return;
 197
 198   if (!dump_enabled_p ())
 199     return;
 200
 201   dump_metadata_t metadata (report_flags, locus.get_impl_location ());
 202   dump_printf_loc (metadata, locus.get_user_location (),
 203                    "loop unrolled %d times",
 204                    loop->lpt_decision.times);
 205   if (profile_info && loop->header->count.initialized_p ())
 206     dump_printf (metadata,
 207                  " (header execution count %d)",
 208                  (int)loop->header->count.to_gcov_type ());
 209
 210   dump_printf (metadata, "\n");
 211 }
 212
 213 /* Decide whether unroll loops and how much.  */
 214 static void
 215 decide_unrolling (int flags)
 216 {
 217   /* Scan the loops, inner ones first.  */
 218   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
 219     {
 220       loop->lpt_decision.decision = LPT_NONE;
 221       dump_user_location_t locus = get_loop_location (loop);
 222
 223       if (dump_enabled_p ())
 224         dump_printf_loc (MSG_NOTE, locus,
 225                          "considering unrolling loop %d at BB %d\n",
 226                          loop->num, loop->header->index);
 227
 228       if (loop->unroll == 1)
 229         {
 230           if (dump_file)
 231             fprintf (dump_file,
 232                      ";; Not unrolling loop, user didn't want it unrolled\n");
 233           continue;
 234         }
 235
 236       /* Do not peel cold areas.  */
 237       if (optimize_loop_for_size_p (loop))
 238         {
 239           if (dump_file)
 240             fprintf (dump_file, ";; Not considering loop, cold area\n");
 241           continue;
 242         }
 243
 244       /* Can the loop be manipulated?  */
 245       if (!can_duplicate_loop_p (loop))
 246         {
 247           if (dump_file)
 248             fprintf (dump_file,
 249                      ";; Not considering loop, cannot duplicate\n");
 250           continue;
 251         }
 252
 253       /* Skip non-innermost loops.  */
 254       if (loop->inner)
 255         {
 256           if (dump_file)
 257             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 258           continue;
 259         }
 260
 261       loop->ninsns = num_loop_insns (loop);
 262       loop->av_ninsns = average_num_loop_insns (loop);
 263
 264       /* Try transformations one by one in decreasing order of priority.  */
 265       decide_unroll_constant_iterations (loop, flags);
 266       if (loop->lpt_decision.decision == LPT_NONE)
 267         decide_unroll_runtime_iterations (loop, flags);
 268       if (loop->lpt_decision.decision == LPT_NONE)
 269         decide_unroll_stupid (loop, flags);
 270
 271       report_unroll (loop, locus);
 272     }
 273 }
 274
 275 /* Unroll LOOPS.  */
 276 void
 277 unroll_loops (int flags)
 278 {
 279   bool changed = false;
 280
 281   /* Now decide rest of unrolling.  */
 282   decide_unrolling (flags);
 283
 284   /* Scan the loops, inner ones first.  */
 285   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
 286     {
 287       /* And perform the appropriate transformations.  */
 288       switch (loop->lpt_decision.decision)
 289         {
 290         case LPT_UNROLL_CONSTANT:
 291           unroll_loop_constant_iterations (loop);
 292           changed = true;
 293           break;
 294         case LPT_UNROLL_RUNTIME:
 295           unroll_loop_runtime_iterations (loop);
 296           changed = true;
 297           break;
 298         case LPT_UNROLL_STUPID:
 299           unroll_loop_stupid (loop);
 300           changed = true;
 301           break;
 302         case LPT_NONE:
 303           break;
 304         default:
 305           gcc_unreachable ();
 306         }
 307     }
 308
 309     if (changed)
 310       {
 311         calculate_dominance_info (CDI_DOMINATORS);
 312         fix_loop_structure (NULL);
 313       }
 314
 315   iv_analysis_done ();
 316 }
 317
 318 /* Check whether exit of the LOOP is at the end of loop body.  */
 319
 320 static bool
 321 loop_exit_at_end_p (class loop *loop)
 322 {
 323   class niter_desc *desc = get_simple_loop_desc (loop);
 324   rtx_insn *insn;
 325
 326   /* We should never have conditional in latch block.  */
 327   gcc_assert (desc->in_edge->dest != loop->header);
 328
 329   if (desc->in_edge->dest != loop->latch)
 330     return false;
 331
 332   /* Check that the latch is empty.  */
 333   FOR_BB_INSNS (loop->latch, insn)
 334     {
 335       if (INSN_P (insn) && active_insn_p (insn))
 336         return false;
 337     }
 338
 339   return true;
 340 }
 341
 342 /* Decide whether to unroll LOOP iterating constant number of times
 343    and how much.  */
 344
 345 static void
 346 decide_unroll_constant_iterations (class loop *loop, int flags)
 347 {
 348   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 349   class niter_desc *desc;
 350   widest_int iterations;
 351
 352   /* If we were not asked to unroll this loop, just return back silently.  */
 353   if (!(flags & UAP_UNROLL) && !loop->unroll)
 354     return;
 355
 356   if (dump_enabled_p ())
 357     dump_printf (MSG_NOTE,
 358                  "considering unrolling loop with constant "
 359                  "number of iterations\n");
 360
 361   /* nunroll = total number of copies of the original loop body in
 362      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
 363   nunroll = param_max_unrolled_insns / loop->ninsns;
 364   nunroll_by_av
 365     = param_max_average_unrolled_insns / loop->av_ninsns;
 366   if (nunroll > nunroll_by_av)
 367     nunroll = nunroll_by_av;
 368   if (nunroll > (unsigned) param_max_unroll_times)
 369     nunroll = param_max_unroll_times;
 370
 371   if (targetm.loop_unroll_adjust)
 372     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 373
 374   /* Skip big loops.  */
 375   if (nunroll <= 1)
 376     {
 377       if (dump_file)
 378         fprintf (dump_file, ";; Not considering loop, is too big\n");
 379       return;
 380     }
 381
 382   /* Check for simple loops.  */
 383   desc = get_simple_loop_desc (loop);
 384
 385   /* Check number of iterations.  */
 386   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 387     {
 388       if (dump_file)
 389         fprintf (dump_file,
 390                  ";; Unable to prove that the loop iterates constant times\n");
 391       return;
 392     }
 393
 394   /* Check for an explicit unrolling factor.  */
 395   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 396     {
 397       /* However we cannot unroll completely at the RTL level a loop with
 398          constant number of iterations; it should have been peeled instead.  */
 399       if (desc->niter == 0 || (unsigned) loop->unroll > desc->niter - 1)
 400         {
 401           if (dump_file)
 402             fprintf (dump_file, ";; Loop should have been peeled\n");
 403         }
 404       else
 405         {
 406           loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 407           loop->lpt_decision.times = loop->unroll - 1;
 408         }
 409       return;
 410     }
 411
 412   /* Check whether the loop rolls enough to consider.
 413      Consult also loop bounds and profile; in the case the loop has more
 414      than one exit it may well loop less than determined maximal number
 415      of iterations.  */
 416   if (desc->niter < 2 * nunroll
 417       || ((get_estimated_loop_iterations (loop, &iterations)
 418            || get_likely_max_loop_iterations (loop, &iterations))
 419           && wi::ltu_p (iterations, 2 * nunroll)))
 420     {
 421       if (dump_file)
 422         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 423       return;
 424     }
 425
 426   /* Success; now compute number of iterations to unroll.  We alter
 427      nunroll so that as few as possible copies of loop body are
 428      necessary, while still not decreasing the number of unrollings
 429      too much (at most by 1).  */
 430   best_copies = 2 * nunroll + 10;
 431
 432   i = 2 * nunroll + 2;
 433   if (i > desc->niter - 2)
 434     i = desc->niter - 2;
 435
 436   for (; i >= nunroll - 1; i--)
 437     {
 438       unsigned exit_mod = desc->niter % (i + 1);
 439
 440       if (!loop_exit_at_end_p (loop))
 441         n_copies = exit_mod + i + 1;
 442       else if (exit_mod != (unsigned) i
 443                || desc->noloop_assumptions != NULL_RTX)
 444         n_copies = exit_mod + i + 2;
 445       else
 446         n_copies = i + 1;
 447
 448       if (n_copies < best_copies)
 449         {
 450           best_copies = n_copies;
 451           best_unroll = i;
 452         }
 453     }
 454
 455   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 456   loop->lpt_decision.times = best_unroll;
 457 }
 458
 459 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 460    The transformation does this:
 461
 462    for (i = 0; i < 102; i++)
 463      body;
 464
 465    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 466
 467    i = 0;
 468    body; i++;
 469    body; i++;
 470    while (i < 102)
 471      {
 472        body; i++;
 473        body; i++;
 474        body; i++;
 475        body; i++;
 476      }
 477   */
 478 static void
 479 unroll_loop_constant_iterations (class loop *loop)
 480 {
 481   unsigned HOST_WIDE_INT niter;
 482   unsigned exit_mod;
 483   unsigned i;
 484   edge e;
 485   unsigned max_unroll = loop->lpt_decision.times;
 486   class niter_desc *desc = get_simple_loop_desc (loop);
 487   bool exit_at_end = loop_exit_at_end_p (loop);
 488   struct opt_info *opt_info = NULL;
 489   bool ok;
 490
 491   niter = desc->niter;
 492
 493   /* Should not get here (such loop should be peeled instead).  */
 494   gcc_assert (niter > max_unroll + 1);
 495
 496   exit_mod = niter % (max_unroll + 1);
 497
 498   auto_sbitmap wont_exit (max_unroll + 2);
 499   bitmap_ones (wont_exit);
 500
 501   auto_vec<edge> remove_edges;
 502   if (flag_split_ivs_in_unroller
 503       || flag_variable_expansion_in_unroller)
 504     opt_info = analyze_insns_in_loop (loop);
 505
 506   if (!exit_at_end)
 507     {
 508       /* The exit is not at the end of the loop; leave exit test
 509          in the first copy, so that the loops that start with test
 510          of exit condition have continuous body after unrolling.  */
 511
 512       if (dump_file)
 513         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 514
 515       /* Peel exit_mod iterations.  */
 516       bitmap_clear_bit (wont_exit, 0);
 517       if (desc->noloop_assumptions)
 518         bitmap_clear_bit (wont_exit, 1);
 519
 520       if (exit_mod)
 521         {
 522           opt_info_start_duplication (opt_info);
 523           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 524                                               exit_mod,
 525                                               wont_exit, desc->out_edge,
 526                                               &remove_edges,
 527                                               DLTHE_FLAG_UPDATE_FREQ
 528                                               | (opt_info && exit_mod > 1
 529                                                  ? DLTHE_RECORD_COPY_NUMBER
 530                                                    : 0));
 531           gcc_assert (ok);
 532
 533           if (opt_info && exit_mod > 1)
 534             apply_opt_in_copies (opt_info, exit_mod, false, false);
 535
 536           desc->noloop_assumptions = NULL_RTX;
 537           desc->niter -= exit_mod;
 538           loop->nb_iterations_upper_bound -= exit_mod;
 539           if (loop->any_estimate
 540               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 541             loop->nb_iterations_estimate -= exit_mod;
 542           else
 543             loop->any_estimate = false;
 544           if (loop->any_likely_upper_bound
 545               && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
 546             loop->nb_iterations_likely_upper_bound -= exit_mod;
 547           else
 548             loop->any_likely_upper_bound = false;
 549         }
 550
 551       bitmap_set_bit (wont_exit, 1);
 552     }
 553   else
 554     {
 555       /* Leave exit test in last copy, for the same reason as above if
 556          the loop tests the condition at the end of loop body.  */
 557
 558       if (dump_file)
 559         fprintf (dump_file, ";; Condition at end of loop.\n");
 560
 561       /* We know that niter >= max_unroll + 2; so we do not need to care of
 562          case when we would exit before reaching the loop.  So just peel
 563          exit_mod + 1 iterations.  */
 564       if (exit_mod != max_unroll
 565           || desc->noloop_assumptions)
 566         {
 567           bitmap_clear_bit (wont_exit, 0);
 568           if (desc->noloop_assumptions)
 569             bitmap_clear_bit (wont_exit, 1);
 570
 571           opt_info_start_duplication (opt_info);
 572           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 573                                               exit_mod + 1,
 574                                               wont_exit, desc->out_edge,
 575                                               &remove_edges,
 576                                               DLTHE_FLAG_UPDATE_FREQ
 577                                               | (opt_info && exit_mod > 0
 578                                                  ? DLTHE_RECORD_COPY_NUMBER
 579                                                    : 0));
 580           gcc_assert (ok);
 581
 582           if (opt_info && exit_mod > 0)
 583             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 584
 585           desc->niter -= exit_mod + 1;
 586           loop->nb_iterations_upper_bound -= exit_mod + 1;
 587           if (loop->any_estimate
 588               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 589             loop->nb_iterations_estimate -= exit_mod + 1;
 590           else
 591             loop->any_estimate = false;
 592           if (loop->any_likely_upper_bound
 593               && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
 594             loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
 595           else
 596             loop->any_likely_upper_bound = false;
 597           desc->noloop_assumptions = NULL_RTX;
 598
 599           bitmap_set_bit (wont_exit, 0);
 600           bitmap_set_bit (wont_exit, 1);
 601         }
 602
 603       bitmap_clear_bit (wont_exit, max_unroll);
 604     }
 605
 606   /* Now unroll the loop.  */
 607
 608   opt_info_start_duplication (opt_info);
 609   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 610                                       max_unroll,
 611                                       wont_exit, desc->out_edge,
 612                                       &remove_edges,
 613                                       DLTHE_FLAG_UPDATE_FREQ
 614                                       | (opt_info
 615                                          ? DLTHE_RECORD_COPY_NUMBER
 616                                            : 0));
 617   gcc_assert (ok);
 618
 619   if (opt_info)
 620     {
 621       apply_opt_in_copies (opt_info, max_unroll, true, true);
 622       free_opt_info (opt_info);
 623     }
 624
 625   if (exit_at_end)
 626     {
 627       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 628       /* Find a new in and out edge; they are in the last copy we have made.  */
 629
 630       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 631         {
 632           desc->out_edge = EDGE_SUCC (exit_block, 0);
 633           desc->in_edge = EDGE_SUCC (exit_block, 1);
 634         }
 635       else
 636         {
 637           desc->out_edge = EDGE_SUCC (exit_block, 1);
 638           desc->in_edge = EDGE_SUCC (exit_block, 0);
 639         }
 640     }
 641
 642   desc->niter /= max_unroll + 1;
 643   loop->nb_iterations_upper_bound
 644     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 645   if (loop->any_estimate)
 646     loop->nb_iterations_estimate
 647       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 648   if (loop->any_likely_upper_bound)
 649     loop->nb_iterations_likely_upper_bound
 650       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
 651   desc->niter_expr = gen_int_mode (desc->niter, desc->mode);
 652
 653   /* Remove the edges.  */
 654   FOR_EACH_VEC_ELT (remove_edges, i, e)
 655     remove_path (e);
 656
 657   if (dump_file)
 658     fprintf (dump_file,
 659              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 660              max_unroll, num_loop_insns (loop));
 661 }
 662
 663 /* Decide whether to unroll LOOP iterating runtime computable number of times
 664    and how much.  */
 665 static void
 666 decide_unroll_runtime_iterations (class loop *loop, int flags)
 667 {
 668   unsigned nunroll, nunroll_by_av, i;
 669   class niter_desc *desc;
 670   widest_int iterations;
 671
 672   /* If we were not asked to unroll this loop, just return back silently.  */
 673   if (!(flags & UAP_UNROLL) && !loop->unroll)
 674     return;
 675
 676   if (dump_enabled_p ())
 677     dump_printf (MSG_NOTE,
 678                  "considering unrolling loop with runtime-"
 679                  "computable number of iterations\n");
 680
 681   /* nunroll = total number of copies of the original loop body in
 682      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 683   nunroll = param_max_unrolled_insns / loop->ninsns;
 684   nunroll_by_av = param_max_average_unrolled_insns / loop->av_ninsns;
 685   if (nunroll > nunroll_by_av)
 686     nunroll = nunroll_by_av;
 687   if (nunroll > (unsigned) param_max_unroll_times)
 688     nunroll = param_max_unroll_times;
 689
 690   if (targetm.loop_unroll_adjust)
 691     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 692
 693   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 694     nunroll = loop->unroll;
 695
 696   /* Skip big loops.  */
 697   if (nunroll <= 1)
 698     {
 699       if (dump_file)
 700         fprintf (dump_file, ";; Not considering loop, is too big\n");
 701       return;
 702     }
 703
 704   /* Check for simple loops.  */
 705   desc = get_simple_loop_desc (loop);
 706
 707   /* Check simpleness.  */
 708   if (!desc->simple_p || desc->assumptions)
 709     {
 710       if (dump_file)
 711         fprintf (dump_file,
 712                  ";; Unable to prove that the number of iterations "
 713                  "can be counted in runtime\n");
 714       return;
 715     }
 716
 717   if (desc->const_iter)
 718     {
 719       if (dump_file)
 720         fprintf (dump_file, ";; Loop iterates constant times\n");
 721       return;
 722     }
 723
 724   /* Check whether the loop rolls.  */
 725   if ((get_estimated_loop_iterations (loop, &iterations)
 726        || get_likely_max_loop_iterations (loop, &iterations))
 727       && wi::ltu_p (iterations, 2 * nunroll))
 728     {
 729       if (dump_file)
 730         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 731       return;
 732     }
 733
 734   /* Success; now force nunroll to be power of 2, as code-gen
 735      requires it, we are unable to cope with overflows in
 736      computation of number of iterations.  */
 737   for (i = 1; 2 * i <= nunroll; i *= 2)
 738     continue;
 739
 740   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 741   loop->lpt_decision.times = i - 1;
 742 }
 743
 744 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 745    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 746    and NULL is returned instead.  */
 747
 748 basic_block
 749 split_edge_and_insert (edge e, rtx_insn *insns)
 750 {
 751   basic_block bb;
 752
 753   if (!insns)
 754     return NULL;
 755   bb = split_edge (e);
 756   emit_insn_after (insns, BB_END (bb));
 757
 758   /* ??? We used to assume that INSNS can contain control flow insns, and
 759      that we had to try to find sub basic blocks in BB to maintain a valid
 760      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 761      and call break_superblocks when going out of cfglayout mode.  But it
 762      turns out that this never happens; and that if it does ever happen,
 763      the verify_flow_info at the end of the RTL loop passes would fail.
 764
 765      There are two reasons why we expected we could have control flow insns
 766      in INSNS.  The first is when a comparison has to be done in parts, and
 767      the second is when the number of iterations is computed for loops with
 768      the number of iterations known at runtime.  In both cases, test cases
 769      to get control flow in INSNS appear to be impossible to construct:
 770
 771       * If do_compare_rtx_and_jump needs several branches to do comparison
 772         in a mode that needs comparison by parts, we cannot analyze the
 773         number of iterations of the loop, and we never get to unrolling it.
 774
 775       * The code in expand_divmod that was suspected to cause creation of
 776         branching code seems to be only accessed for signed division.  The
 777         divisions used by # of iterations analysis are always unsigned.
 778         Problems might arise on architectures that emits branching code
 779         for some operations that may appear in the unroller (especially
 780         for division), but we have no such architectures.
 781
 782      Considering all this, it was decided that we should for now assume
 783      that INSNS can in theory contain control flow insns, but in practice
 784      it never does.  So we don't handle the theoretical case, and should
 785      a real failure ever show up, we have a pretty good clue for how to
 786      fix it.  */
 787
 788   return bb;
 789 }
 790
 791 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 792    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 793    in order to create a jump.  */
 794
 795 static rtx_insn *
 796 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 797                       rtx_code_label *label, profile_probability prob,
 798                       rtx_insn *cinsn)
 799 {
 800   rtx_insn *seq;
 801   rtx_jump_insn *jump;
 802   rtx cond;
 803   machine_mode mode;
 804
 805   mode = GET_MODE (op0);
 806   if (mode == VOIDmode)
 807     mode = GET_MODE (op1);
 808
 809   start_sequence ();
 810   if (GET_MODE_CLASS (mode) == MODE_CC)
 811     {
 812       /* A hack -- there seems to be no easy generic way how to make a
 813          conditional jump from a ccmode comparison.  */
 814       gcc_assert (cinsn);
 815       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 816       gcc_assert (GET_CODE (cond) == comp);
 817       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 818       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 819       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 820       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 821       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 822       LABEL_NUSES (JUMP_LABEL (jump))++;
 823       redirect_jump (jump, label, 0);
 824     }
 825   else
 826     {
 827       gcc_assert (!cinsn);
 828
 829       op0 = force_operand (op0, NULL_RTX);
 830       op1 = force_operand (op1, NULL_RTX);
 831       do_compare_rtx_and_jump (op0, op1, comp, 0,
 832                                mode, NULL_RTX, NULL, label,
 833                                profile_probability::uninitialized ());
 834       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 835       jump->set_jump_target (label);
 836       LABEL_NUSES (label)++;
 837     }
 838   if (prob.initialized_p ())
 839     add_reg_br_prob_note (jump, prob);
 840
 841   seq = get_insns ();
 842   end_sequence ();
 843
 844   return seq;
 845 }
 846
 847 /* Unroll LOOP for which we are able to count number of iterations in
 848    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
 849    power of two.  The transformation does this (with some extra care
 850    for case n < 0):
 851
 852    for (i = 0; i < n; i++)
 853      body;
 854
 855    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 856
 857    i = 0;
 858    mod = n % 4;
 859
 860    switch (mod)
 861      {
 862        case 3:
 863          body; i++;
 864        case 2:
 865          body; i++;
 866        case 1:
 867          body; i++;
 868        case 0: ;
 869      }
 870
 871    while (i < n)
 872      {
 873        body; i++;
 874        body; i++;
 875        body; i++;
 876        body; i++;
 877      }
 878    */
 879 static void
 880 unroll_loop_runtime_iterations (class loop *loop)
 881 {
 882   rtx old_niter, niter, tmp;
 883   rtx_insn *init_code, *branch_code;
 884   unsigned i;
 885   profile_probability p;
 886   basic_block preheader, *body, swtch, ezc_swtch = NULL;
 887   int may_exit_copy;
 888   profile_count iter_count, new_count;
 889   unsigned n_peel;
 890   edge e;
 891   bool extra_zero_check, last_may_exit;
 892   unsigned max_unroll = loop->lpt_decision.times;
 893   class niter_desc *desc = get_simple_loop_desc (loop);
 894   bool exit_at_end = loop_exit_at_end_p (loop);
 895   struct opt_info *opt_info = NULL;
 896   bool ok;
 897
 898   if (flag_split_ivs_in_unroller
 899       || flag_variable_expansion_in_unroller)
 900     opt_info = analyze_insns_in_loop (loop);
 901
 902   /* Remember blocks whose dominators will have to be updated.  */
 903   auto_vec<basic_block> dom_bbs;
 904
 905   body = get_loop_body (loop);
 906   for (i = 0; i < loop->num_nodes; i++)
 907     {
 908       for (basic_block bb : get_dominated_by (CDI_DOMINATORS, body[i]))
 909         if (!flow_bb_inside_loop_p (loop, bb))
 910           dom_bbs.safe_push (bb);
 911     }
 912   free (body);
 913
 914   if (!exit_at_end)
 915     {
 916       /* Leave exit in first copy (for explanation why see comment in
 917          unroll_loop_constant_iterations).  */
 918       may_exit_copy = 0;
 919       n_peel = max_unroll - 1;
 920       extra_zero_check = true;
 921       last_may_exit = false;
 922     }
 923   else
 924     {
 925       /* Leave exit in last copy (for explanation why see comment in
 926          unroll_loop_constant_iterations).  */
 927       may_exit_copy = max_unroll;
 928       n_peel = max_unroll;
 929       extra_zero_check = false;
 930       last_may_exit = true;
 931     }
 932
 933   /* Get expression for number of iterations.  */
 934   start_sequence ();
 935   old_niter = niter = gen_reg_rtx (desc->mode);
 936   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 937   if (tmp != niter)
 938     emit_move_insn (niter, tmp);
 939
 940   /* For loops that exit at end and whose number of iterations is reliable,
 941      add one to niter to account for first pass through loop body before
 942      reaching exit test. */
 943   if (exit_at_end && !desc->noloop_assumptions)
 944     {
 945       niter = expand_simple_binop (desc->mode, PLUS,
 946                                    niter, const1_rtx,
 947                                    NULL_RTX, 0, OPTAB_LIB_WIDEN);
 948       old_niter = niter;
 949     }
 950
 951   /* Count modulo by ANDing it with max_unroll; we use the fact that
 952      the number of unrollings is a power of two, and thus this is correct
 953      even if there is overflow in the computation.  */
 954   niter = expand_simple_binop (desc->mode, AND,
 955                                niter, gen_int_mode (max_unroll, desc->mode),
 956                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 957
 958   init_code = get_insns ();
 959   end_sequence ();
 960   unshare_all_rtl_in_chain (init_code);
 961
 962   /* Precondition the loop.  */
 963   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 964
 965   auto_vec<edge> remove_edges;
 966
 967   auto_sbitmap wont_exit (max_unroll + 2);
 968
 969   if (extra_zero_check || desc->noloop_assumptions)
 970     {
 971       /* Peel the first copy of loop body.  Leave the exit test if the number
 972          of iterations is not reliable.  Also record the place of the extra zero
 973          check.  */
 974       bitmap_clear (wont_exit);
 975       if (!desc->noloop_assumptions)
 976         bitmap_set_bit (wont_exit, 1);
 977       ezc_swtch = loop_preheader_edge (loop)->src;
 978       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 979                                           1, wont_exit, desc->out_edge,
 980                                           &remove_edges,
 981                                           DLTHE_FLAG_UPDATE_FREQ);
 982       gcc_assert (ok);
 983     }
 984
 985   /* Record the place where switch will be built for preconditioning.  */
 986   swtch = split_edge (loop_preheader_edge (loop));
 987
 988   /* Compute count increments for each switch block and initialize
 989      innermost switch block.  Switch blocks and peeled loop copies are built
 990      from innermost outward.  */
 991   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
 992   swtch->count = new_count;
 993
 994   for (i = 0; i < n_peel; i++)
 995     {
 996       /* Peel the copy.  */
 997       bitmap_clear (wont_exit);
 998       if (i != n_peel - 1 || !last_may_exit)
 999         bitmap_set_bit (wont_exit, 1);
1000       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1001                                           1, wont_exit, desc->out_edge,
1002                                           &remove_edges,
1003                                           DLTHE_FLAG_UPDATE_FREQ);
1004       gcc_assert (ok);
1005
1006       /* Create item for switch.  */
1007       unsigned j = n_peel - i - (extra_zero_check ? 0 : 1);
1008       p = profile_probability::always ().apply_scale (1, i + 2);
1009
1010       preheader = split_edge (loop_preheader_edge (loop));
1011       /* Add in count of edge from switch block.  */
1012       preheader->count += iter_count;
1013       branch_code = compare_and_jump_seq (copy_rtx (niter),
1014                                           gen_int_mode (j, desc->mode), EQ,
1015                                           block_label (preheader), p, NULL);
1016
1017       /* We rely on the fact that the compare and jump cannot be optimized out,
1018          and hence the cfg we create is correct.  */
1019       gcc_assert (branch_code != NULL_RTX);
1020
1021       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1022       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1023       single_succ_edge (swtch)->probability = p.invert ();
1024       new_count += iter_count;
1025       swtch->count = new_count;
1026       e = make_edge (swtch, preheader,
1027                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1028       e->probability = p;
1029     }
1030
1031   if (extra_zero_check)
1032     {
1033       /* Add branch for zero iterations.  */
1034       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1035       swtch = ezc_swtch;
1036       preheader = split_edge (loop_preheader_edge (loop));
1037       /* Recompute count adjustments since initial peel copy may
1038          have exited and reduced those values that were computed above.  */
1039       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1040       /* Add in count of edge from switch block.  */
1041       preheader->count += iter_count;
1042       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1043                                           block_label (preheader), p,
1044                                           NULL);
1045       gcc_assert (branch_code != NULL_RTX);
1046
1047       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1048       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1049       single_succ_edge (swtch)->probability = p.invert ();
1050       e = make_edge (swtch, preheader,
1051                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1052       e->probability = p;
1053     }
1054
1055   /* Recount dominators for outer blocks.  */
1056   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1057
1058   /* And unroll loop.  */
1059
1060   bitmap_ones (wont_exit);
1061   bitmap_clear_bit (wont_exit, may_exit_copy);
1062   opt_info_start_duplication (opt_info);
1063
1064   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1065                                       max_unroll,
1066                                       wont_exit, desc->out_edge,
1067                                       &remove_edges,
1068                                       DLTHE_FLAG_UPDATE_FREQ
1069                                       | (opt_info
1070                                          ? DLTHE_RECORD_COPY_NUMBER
1071                                            : 0));
1072   gcc_assert (ok);
1073
1074   if (opt_info)
1075     {
1076       apply_opt_in_copies (opt_info, max_unroll, true, true);
1077       free_opt_info (opt_info);
1078     }
1079
1080   if (exit_at_end)
1081     {
1082       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1083       /* Find a new in and out edge; they are in the last copy we have
1084          made.  */
1085
1086       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1087         {
1088           desc->out_edge = EDGE_SUCC (exit_block, 0);
1089           desc->in_edge = EDGE_SUCC (exit_block, 1);
1090         }
1091       else
1092         {
1093           desc->out_edge = EDGE_SUCC (exit_block, 1);
1094           desc->in_edge = EDGE_SUCC (exit_block, 0);
1095         }
1096     }
1097
1098   /* Remove the edges.  */
1099   FOR_EACH_VEC_ELT (remove_edges, i, e)
1100     remove_path (e);
1101
1102   /* We must be careful when updating the number of iterations due to
1103      preconditioning and the fact that the value must be valid at entry
1104      of the loop.  After passing through the above code, we see that
1105      the correct new number of iterations is this:  */
1106   gcc_assert (!desc->const_iter);
1107   desc->niter_expr =
1108     simplify_gen_binary (UDIV, desc->mode, old_niter,
1109                          gen_int_mode (max_unroll + 1, desc->mode));
1110   loop->nb_iterations_upper_bound
1111     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1112   if (loop->any_estimate)
1113     loop->nb_iterations_estimate
1114       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1115   if (loop->any_likely_upper_bound)
1116     loop->nb_iterations_likely_upper_bound
1117       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1118   if (exit_at_end)
1119     {
1120       desc->niter_expr =
1121         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1122       desc->noloop_assumptions = NULL_RTX;
1123       --loop->nb_iterations_upper_bound;
1124       if (loop->any_estimate
1125           && loop->nb_iterations_estimate != 0)
1126         --loop->nb_iterations_estimate;
1127       else
1128         loop->any_estimate = false;
1129       if (loop->any_likely_upper_bound
1130           && loop->nb_iterations_likely_upper_bound != 0)
1131         --loop->nb_iterations_likely_upper_bound;
1132       else
1133         loop->any_likely_upper_bound = false;
1134     }
1135
1136   if (dump_file)
1137     fprintf (dump_file,
1138              ";; Unrolled loop %d times, counting # of iterations "
1139              "in runtime, %i insns\n",
1140              max_unroll, num_loop_insns (loop));
1141 }
1142
1143 /* Decide whether to unroll LOOP stupidly and how much.  */
1144 static void
1145 decide_unroll_stupid (class loop *loop, int flags)
1146 {
1147   unsigned nunroll, nunroll_by_av, i;
1148   class niter_desc *desc;
1149   widest_int iterations;
1150
1151   /* If we were not asked to unroll this loop, just return back silently.  */
1152   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
1153     return;
1154
1155   if (dump_enabled_p ())
1156     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
1157
1158   /* nunroll = total number of copies of the original loop body in
1159      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1160   nunroll = param_max_unrolled_insns / loop->ninsns;
1161   nunroll_by_av
1162     = param_max_average_unrolled_insns / loop->av_ninsns;
1163   if (nunroll > nunroll_by_av)
1164     nunroll = nunroll_by_av;
1165   if (nunroll > (unsigned) param_max_unroll_times)
1166     nunroll = param_max_unroll_times;
1167
1168   if (targetm.loop_unroll_adjust)
1169     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1170
1171   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1172     nunroll = loop->unroll;
1173
1174   /* Skip big loops.  */
1175   if (nunroll <= 1)
1176     {
1177       if (dump_file)
1178         fprintf (dump_file, ";; Not considering loop, is too big\n");
1179       return;
1180     }
1181
1182   /* Check for simple loops.  */
1183   desc = get_simple_loop_desc (loop);
1184
1185   /* Check simpleness.  */
1186   if (desc->simple_p && !desc->assumptions)
1187     {
1188       if (dump_file)
1189         fprintf (dump_file, ";; Loop is simple\n");
1190       return;
1191     }
1192
1193   /* Do not unroll loops with branches inside -- it increases number
1194      of mispredicts.
1195      TODO: this heuristic needs tunning; call inside the loop body
1196      is also relatively good reason to not unroll.  */
1197   if (num_loop_branches (loop) > 1)
1198     {
1199       if (dump_file)
1200         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1201       return;
1202     }
1203
1204   /* Check whether the loop rolls.  */
1205   if ((get_estimated_loop_iterations (loop, &iterations)
1206        || get_likely_max_loop_iterations (loop, &iterations))
1207       && wi::ltu_p (iterations, 2 * nunroll))
1208     {
1209       if (dump_file)
1210         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1211       return;
1212     }
1213
1214   /* Success.  Now force nunroll to be power of 2, as it seems that this
1215      improves results (partially because of better alignments, partially
1216      because of some dark magic).  */
1217   for (i = 1; 2 * i <= nunroll; i *= 2)
1218     continue;
1219
1220   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1221   loop->lpt_decision.times = i - 1;
1222 }
1223
1224 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1225
1226    while (cond)
1227      body;
1228
1229    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1230
1231    while (cond)
1232      {
1233        body;
1234        if (!cond) break;
1235        body;
1236        if (!cond) break;
1237        body;
1238        if (!cond) break;
1239        body;
1240      }
1241    */
1242 static void
1243 unroll_loop_stupid (class loop *loop)
1244 {
1245   unsigned nunroll = loop->lpt_decision.times;
1246   class niter_desc *desc = get_simple_loop_desc (loop);
1247   struct opt_info *opt_info = NULL;
1248   bool ok;
1249
1250   if (flag_split_ivs_in_unroller
1251       || flag_variable_expansion_in_unroller)
1252     opt_info = analyze_insns_in_loop (loop);
1253
1254   auto_sbitmap wont_exit (nunroll + 1);
1255   bitmap_clear (wont_exit);
1256   opt_info_start_duplication (opt_info);
1257
1258   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1259                                       nunroll, wont_exit,
1260                                       NULL, NULL,
1261                                       DLTHE_FLAG_UPDATE_FREQ
1262                                       | (opt_info
1263                                          ? DLTHE_RECORD_COPY_NUMBER
1264                                            : 0));
1265   gcc_assert (ok);
1266
1267   if (opt_info)
1268     {
1269       apply_opt_in_copies (opt_info, nunroll, true, true);
1270       free_opt_info (opt_info);
1271     }
1272
1273   if (desc->simple_p)
1274     {
1275       /* We indeed may get here provided that there are nontrivial assumptions
1276          for a loop to be really simple.  We could update the counts, but the
1277          problem is that we are unable to decide which exit will be taken
1278          (not really true in case the number of iterations is constant,
1279          but no one will do anything with this information, so we do not
1280          worry about it).  */
1281       desc->simple_p = false;
1282     }
1283
1284   if (dump_file)
1285     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1286              nunroll, num_loop_insns (loop));
1287 }
1288
1289 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1290    Set *DEBUG_USES to the number of debug insns that reference the
1291    variable.  */
1292
1293 static bool
1294 referenced_in_one_insn_in_loop_p (class loop *loop, rtx reg,
1295                                   int *debug_uses)
1296 {
1297   basic_block *body, bb;
1298   unsigned i;
1299   int count_ref = 0;
1300   rtx_insn *insn;
1301
1302   body = get_loop_body (loop);
1303   for (i = 0; i < loop->num_nodes; i++)
1304     {
1305       bb = body[i];
1306
1307       FOR_BB_INSNS (bb, insn)
1308         if (!rtx_referenced_p (reg, insn))
1309           continue;
1310         else if (DEBUG_INSN_P (insn))
1311           ++*debug_uses;
1312         else if (++count_ref > 1)
1313           break;
1314     }
1315   free (body);
1316   return (count_ref  == 1);
1317 }
1318
1319 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1320
1321 static void
1322 reset_debug_uses_in_loop (class loop *loop, rtx reg, int debug_uses)
1323 {
1324   basic_block *body, bb;
1325   unsigned i;
1326   rtx_insn *insn;
1327
1328   body = get_loop_body (loop);
1329   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1330     {
1331       bb = body[i];
1332
1333       FOR_BB_INSNS (bb, insn)
1334         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1335           continue;
1336         else
1337           {
1338             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1339                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1340             if (!--debug_uses)
1341               break;
1342           }
1343     }
1344   free (body);
1345 }
1346
1347 /* Determine whether INSN contains an accumulator
1348    which can be expanded into separate copies,
1349    one for each copy of the LOOP body.
1350
1351    for (i = 0 ; i < n; i++)
1352      sum += a[i];
1353
1354    ==>
1355
1356    sum += a[i]
1357    ....
1358    i = i+1;
1359    sum1 += a[i]
1360    ....
1361    i = i+1
1362    sum2 += a[i];
1363    ....
1364
1365    Return NULL if INSN contains no opportunity for expansion of accumulator.
1366    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1367    information and return a pointer to it.
1368 */
1369
1370 static struct var_to_expand *
1371 analyze_insn_to_expand_var (class loop *loop, rtx_insn *insn)
1372 {
1373   rtx set, dest, src;
1374   struct var_to_expand *ves;
1375   unsigned accum_pos;
1376   enum rtx_code code;
1377   int debug_uses = 0;
1378
1379   set = single_set (insn);
1380   if (!set)
1381     return NULL;
1382
1383   dest = SET_DEST (set);
1384   src = SET_SRC (set);
1385   code = GET_CODE (src);
1386
1387   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1388     return NULL;
1389
1390   if (FLOAT_MODE_P (GET_MODE (dest)))
1391     {
1392       if (!flag_associative_math)
1393         return NULL;
1394       /* In the case of FMA, we're also changing the rounding.  */
1395       if (code == FMA && !flag_unsafe_math_optimizations)
1396         return NULL;
1397     }
1398
1399   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1400      in MD.  But if there is no optab to generate the insn, we cannot
1401      perform the variable expansion.  This can happen if an MD provides
1402      an insn but not a named pattern to generate it, for example to avoid
1403      producing code that needs additional mode switches like for x87/mmx.
1404
1405      So we check have_insn_for which looks for an optab for the operation
1406      in SRC.  If it doesn't exist, we can't perform the expansion even
1407      though INSN is valid.  */
1408   if (!have_insn_for (code, GET_MODE (src)))
1409     return NULL;
1410
1411   if (!REG_P (dest)
1412       && !(GET_CODE (dest) == SUBREG
1413            && REG_P (SUBREG_REG (dest))))
1414     return NULL;
1415
1416   /* Find the accumulator use within the operation.  */
1417   if (code == FMA)
1418     {
1419       /* We only support accumulation via FMA in the ADD position.  */
1420       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1421         return NULL;
1422       accum_pos = 2;
1423     }
1424   else if (rtx_equal_p (dest, XEXP (src, 0)))
1425     accum_pos = 0;
1426   else if (rtx_equal_p (dest, XEXP (src, 1)))
1427     {
1428       /* The method of expansion that we are using; which includes the
1429          initialization of the expansions with zero and the summation of
1430          the expansions at the end of the computation will yield wrong
1431          results for (x = something - x) thus avoid using it in that case.  */
1432       if (code == MINUS)
1433         return NULL;
1434       accum_pos = 1;
1435     }
1436   else
1437     return NULL;
1438
1439   /* It must not otherwise be used.  */
1440   if (code == FMA)
1441     {
1442       if (rtx_referenced_p (dest, XEXP (src, 0))
1443           || rtx_referenced_p (dest, XEXP (src, 1)))
1444         return NULL;
1445     }
1446   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1447     return NULL;
1448
1449   /* It must be used in exactly one insn.  */
1450   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1451     return NULL;
1452
1453   if (dump_file)
1454     {
1455       fprintf (dump_file, "\n;; Expanding Accumulator ");
1456       print_rtl (dump_file, dest);
1457       fprintf (dump_file, "\n");
1458     }
1459
1460   if (debug_uses)
1461     /* Instead of resetting the debug insns, we could replace each
1462        debug use in the loop with the sum or product of all expanded
1463        accumulators.  Since we'll only know of all expansions at the
1464        end, we'd have to keep track of which vars_to_expand a debug
1465        insn in the loop references, take note of each copy of the
1466        debug insn during unrolling, and when it's all done, compute
1467        the sum or product of each variable and adjust the original
1468        debug insn and each copy thereof.  What a pain!  */
1469     reset_debug_uses_in_loop (loop, dest, debug_uses);
1470
1471   /* Record the accumulator to expand.  */
1472   ves = XNEW (struct var_to_expand);
1473   ves->insn = insn;
1474   ves->reg = copy_rtx (dest);
1475   ves->var_expansions.create (1);
1476   ves->next = NULL;
1477   ves->op = GET_CODE (src);
1478   ves->expansion_count = 0;
1479   ves->reuse_expansion = 0;
1480   return ves;
1481 }
1482
1483 /* Determine whether there is an induction variable in INSN that
1484    we would like to split during unrolling.
1485
1486    I.e. replace
1487
1488    i = i + 1;
1489    ...
1490    i = i + 1;
1491    ...
1492    i = i + 1;
1493    ...
1494
1495    type chains by
1496
1497    i0 = i + 1
1498    ...
1499    i = i0 + 1
1500    ...
1501    i = i0 + 2
1502    ...
1503
1504    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1505    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1506    pointer to it.  */
1507
1508 static struct iv_to_split *
1509 analyze_iv_to_split_insn (rtx_insn *insn)
1510 {
1511   rtx set, dest;
1512   class rtx_iv iv;
1513   struct iv_to_split *ivts;
1514   scalar_int_mode mode;
1515   bool ok;
1516
1517   /* For now we just split the basic induction variables.  Later this may be
1518      extended for example by selecting also addresses of memory references.  */
1519   set = single_set (insn);
1520   if (!set)
1521     return NULL;
1522
1523   dest = SET_DEST (set);
1524   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1525     return NULL;
1526
1527   if (!biv_p (insn, mode, dest))
1528     return NULL;
1529
1530   ok = iv_analyze_result (insn, dest, &iv);
1531
1532   /* This used to be an assert under the assumption that if biv_p returns
1533      true that iv_analyze_result must also return true.  However, that
1534      assumption is not strictly correct as evidenced by pr25569.
1535
1536      Returning NULL when iv_analyze_result returns false is safe and
1537      avoids the problems in pr25569 until the iv_analyze_* routines
1538      can be fixed, which is apparently hard and time consuming
1539      according to their author.  */
1540   if (! ok)
1541     return NULL;
1542
1543   if (iv.step == const0_rtx
1544       || iv.mode != iv.extend_mode)
1545     return NULL;
1546
1547   /* Record the insn to split.  */
1548   ivts = XNEW (struct iv_to_split);
1549   ivts->insn = insn;
1550   ivts->orig_var = dest;
1551   ivts->base_var = NULL_RTX;
1552   ivts->step = iv.step;
1553   ivts->next = NULL;
1554
1555   return ivts;
1556 }
1557
1558 /* Determines which of insns in LOOP can be optimized.
1559    Return a OPT_INFO struct with the relevant hash tables filled
1560    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1561    is undefined for the return value.  */
1562
1563 static struct opt_info *
1564 analyze_insns_in_loop (class loop *loop)
1565 {
1566   basic_block *body, bb;
1567   unsigned i;
1568   struct opt_info *opt_info = XCNEW (struct opt_info);
1569   rtx_insn *insn;
1570   struct iv_to_split *ivts = NULL;
1571   struct var_to_expand *ves = NULL;
1572   iv_to_split **slot1;
1573   var_to_expand **slot2;
1574   auto_vec<edge> edges = get_loop_exit_edges (loop);
1575   edge exit;
1576   bool can_apply = false;
1577
1578   iv_analysis_loop_init (loop);
1579
1580   body = get_loop_body (loop);
1581
1582   if (flag_split_ivs_in_unroller)
1583     {
1584       opt_info->insns_to_split
1585         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1586       opt_info->iv_to_split_head = NULL;
1587       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1588     }
1589
1590   /* Record the loop exit bb and loop preheader before the unrolling.  */
1591   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1592
1593   if (edges.length () == 1)
1594     {
1595       exit = edges[0];
1596       if (!(exit->flags & EDGE_COMPLEX))
1597         {
1598           opt_info->loop_exit = split_edge (exit);
1599           can_apply = true;
1600         }
1601     }
1602
1603   if (flag_variable_expansion_in_unroller
1604       && can_apply)
1605     {
1606       opt_info->insns_with_var_to_expand
1607         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1608       opt_info->var_to_expand_head = NULL;
1609       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1610     }
1611
1612   for (i = 0; i < loop->num_nodes; i++)
1613     {
1614       bb = body[i];
1615       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1616         continue;
1617
1618       FOR_BB_INSNS (bb, insn)
1619       {
1620         if (!INSN_P (insn))
1621           continue;
1622
1623         if (opt_info->insns_to_split)
1624           ivts = analyze_iv_to_split_insn (insn);
1625
1626         if (ivts)
1627           {
1628             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1629             gcc_assert (*slot1 == NULL);
1630             *slot1 = ivts;
1631             *opt_info->iv_to_split_tail = ivts;
1632             opt_info->iv_to_split_tail = &ivts->next;
1633             continue;
1634           }
1635
1636         if (opt_info->insns_with_var_to_expand)
1637           ves = analyze_insn_to_expand_var (loop, insn);
1638
1639         if (ves)
1640           {
1641             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1642             gcc_assert (*slot2 == NULL);
1643             *slot2 = ves;
1644             *opt_info->var_to_expand_tail = ves;
1645             opt_info->var_to_expand_tail = &ves->next;
1646           }
1647       }
1648     }
1649
1650   free (body);
1651   return opt_info;
1652 }
1653
1654 /* Called just before loop duplication.  Records start of duplicated area
1655    to OPT_INFO.  */
1656
1657 static void
1658 opt_info_start_duplication (struct opt_info *opt_info)
1659 {
1660   if (opt_info)
1661     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1662 }
1663
1664 /* Determine the number of iterations between initialization of the base
1665    variable and the current copy (N_COPY).  N_COPIES is the total number
1666    of newly created copies.  UNROLLING is true if we are unrolling
1667    (not peeling) the loop.  */
1668
1669 static unsigned
1670 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1671 {
1672   if (unrolling)
1673     {
1674       /* If we are unrolling, initialization is done in the original loop
1675          body (number 0).  */
1676       return n_copy;
1677     }
1678   else
1679     {
1680       /* If we are peeling, the copy in that the initialization occurs has
1681          number 1.  The original loop (number 0) is the last.  */
1682       if (n_copy)
1683         return n_copy - 1;
1684       else
1685         return n_copies;
1686     }
1687 }
1688
1689 /* Allocate basic variable for the induction variable chain.  */
1690
1691 static void
1692 allocate_basic_variable (struct iv_to_split *ivts)
1693 {
1694   rtx expr = SET_SRC (single_set (ivts->insn));
1695
1696   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1697 }
1698
1699 /* Insert initialization of basic variable of IVTS before INSN, taking
1700    the initial value from INSN.  */
1701
1702 static void
1703 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1704 {
1705   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1706   rtx_insn *seq;
1707
1708   start_sequence ();
1709   expr = force_operand (expr, ivts->base_var);
1710   if (expr != ivts->base_var)
1711     emit_move_insn (ivts->base_var, expr);
1712   seq = get_insns ();
1713   end_sequence ();
1714
1715   emit_insn_before (seq, insn);
1716 }
1717
1718 /* Replace the use of induction variable described in IVTS in INSN
1719    by base variable + DELTA * step.  */
1720
1721 static void
1722 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1723 {
1724   rtx expr, *loc, incr, var;
1725   rtx_insn *seq;
1726   machine_mode mode = GET_MODE (ivts->base_var);
1727   rtx src, dest, set;
1728
1729   /* Construct base + DELTA * step.  */
1730   if (!delta)
1731     expr = ivts->base_var;
1732   else
1733     {
1734       incr = simplify_gen_binary (MULT, mode,
1735                                   copy_rtx (ivts->step),
1736                                   gen_int_mode (delta, mode));
1737       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1738                                   ivts->base_var, incr);
1739     }
1740
1741   /* Figure out where to do the replacement.  */
1742   loc = &SET_SRC (single_set (insn));
1743
1744   /* If we can make the replacement right away, we're done.  */
1745   if (validate_change (insn, loc, expr, 0))
1746     return;
1747
1748   /* Otherwise, force EXPR into a register and try again.  */
1749   start_sequence ();
1750   var = gen_reg_rtx (mode);
1751   expr = force_operand (expr, var);
1752   if (expr != var)
1753     emit_move_insn (var, expr);
1754   seq = get_insns ();
1755   end_sequence ();
1756   emit_insn_before (seq, insn);
1757
1758   if (validate_change (insn, loc, var, 0))
1759     return;
1760
1761   /* The last chance.  Try recreating the assignment in insn
1762      completely from scratch.  */
1763   set = single_set (insn);
1764   gcc_assert (set);
1765
1766   start_sequence ();
1767   *loc = var;
1768   src = copy_rtx (SET_SRC (set));
1769   dest = copy_rtx (SET_DEST (set));
1770   src = force_operand (src, dest);
1771   if (src != dest)
1772     emit_move_insn (dest, src);
1773   seq = get_insns ();
1774   end_sequence ();
1775
1776   emit_insn_before (seq, insn);
1777   delete_insn (insn);
1778 }
1779
1780
1781 /* Return one expansion of the accumulator recorded in struct VE.  */
1782
1783 static rtx
1784 get_expansion (struct var_to_expand *ve)
1785 {
1786   rtx reg;
1787
1788   if (ve->reuse_expansion == 0)
1789     reg = ve->reg;
1790   else
1791     reg = ve->var_expansions[ve->reuse_expansion - 1];
1792
1793   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1794     ve->reuse_expansion = 0;
1795   else
1796     ve->reuse_expansion++;
1797
1798   return reg;
1799 }
1800
1801
1802 /* Given INSN replace the uses of the accumulator recorded in VE
1803    with a new register.  */
1804
1805 static void
1806 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1807 {
1808   rtx new_reg, set;
1809   bool really_new_expansion = false;
1810
1811   set = single_set (insn);
1812   gcc_assert (set);
1813
1814   /* Generate a new register only if the expansion limit has not been
1815      reached.  Else reuse an already existing expansion.  */
1816   if (param_max_variable_expansions > ve->expansion_count)
1817     {
1818       really_new_expansion = true;
1819       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1820     }
1821   else
1822     new_reg = get_expansion (ve);
1823
1824   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1825   if (apply_change_group ())
1826     if (really_new_expansion)
1827       {
1828         ve->var_expansions.safe_push (new_reg);
1829         ve->expansion_count++;
1830       }
1831 }
1832
1833 /* Initialize the variable expansions in loop preheader.  PLACE is the
1834    loop-preheader basic block where the initialization of the
1835    expansions should take place.  The expansions are initialized with
1836    (-0) when the operation is plus or minus to honor sign zero.  This
1837    way we can prevent cases where the sign of the final result is
1838    effected by the sign of the expansion.  Here is an example to
1839    demonstrate this:
1840
1841    for (i = 0 ; i < n; i++)
1842      sum += something;
1843
1844    ==>
1845
1846    sum += something
1847    ....
1848    i = i+1;
1849    sum1 += something
1850    ....
1851    i = i+1
1852    sum2 += something;
1853    ....
1854
1855    When SUM is initialized with -zero and SOMETHING is also -zero; the
1856    final result of sum should be -zero thus the expansions sum1 and sum2
1857    should be initialized with -zero as well (otherwise we will get +zero
1858    as the final result).  */
1859
1860 static void
1861 insert_var_expansion_initialization (struct var_to_expand *ve,
1862                                      basic_block place)
1863 {
1864   rtx_insn *seq;
1865   rtx var, zero_init;
1866   unsigned i;
1867   machine_mode mode = GET_MODE (ve->reg);
1868   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1869
1870   if (ve->var_expansions.length () == 0)
1871     return;
1872
1873   start_sequence ();
1874   switch (ve->op)
1875     {
1876     case FMA:
1877       /* Note that we only accumulate FMA via the ADD operand.  */
1878     case PLUS:
1879     case MINUS:
1880       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1881         {
1882           if (honor_signed_zero_p)
1883             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1884           else
1885             zero_init = CONST0_RTX (mode);
1886           emit_move_insn (var, zero_init);
1887         }
1888       break;
1889
1890     case MULT:
1891       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1892         {
1893           zero_init = CONST1_RTX (GET_MODE (var));
1894           emit_move_insn (var, zero_init);
1895         }
1896       break;
1897
1898     default:
1899       gcc_unreachable ();
1900     }
1901
1902   seq = get_insns ();
1903   end_sequence ();
1904
1905   emit_insn_after (seq, BB_END (place));
1906 }
1907
1908 /* Combine the variable expansions at the loop exit.  PLACE is the
1909    loop exit basic block where the summation of the expansions should
1910    take place.  */
1911
1912 static void
1913 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1914 {
1915   rtx sum = ve->reg;
1916   rtx expr, var;
1917   rtx_insn *seq, *insn;
1918   unsigned i;
1919
1920   if (ve->var_expansions.length () == 0)
1921     return;
1922
1923   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1924      it both here and as the destination of the assignment.  */
1925   sum = copy_rtx (sum);
1926   start_sequence ();
1927   switch (ve->op)
1928     {
1929     case FMA:
1930       /* Note that we only accumulate FMA via the ADD operand.  */
1931     case PLUS:
1932     case MINUS:
1933       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1934         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1935       break;
1936
1937     case MULT:
1938       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1939         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1940       break;
1941
1942     default:
1943       gcc_unreachable ();
1944     }
1945
1946   expr = force_operand (sum, ve->reg);
1947   if (expr != ve->reg)
1948     emit_move_insn (ve->reg, expr);
1949   seq = get_insns ();
1950   end_sequence ();
1951
1952   insn = BB_HEAD (place);
1953   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1954     insn = NEXT_INSN (insn);
1955
1956   emit_insn_after (seq, insn);
1957 }
1958
1959 /* Strip away REG_EQUAL notes for IVs we're splitting.
1960
1961    Updating REG_EQUAL notes for IVs we split is tricky: We
1962    cannot tell until after unrolling, DF-rescanning, and liveness
1963    updating, whether an EQ_USE is reached by the split IV while
1964    the IV reg is still live.  See PR55006.
1965
1966    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1967    because RTL loop-iv requires us to defer rescanning insns and
1968    any notes attached to them.  So resort to old techniques...  */
1969
1970 static void
1971 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1972 {
1973   struct iv_to_split *ivts;
1974   rtx note = find_reg_equal_equiv_note (insn);
1975   if (! note)
1976     return;
1977   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1978     if (reg_mentioned_p (ivts->orig_var, note))
1979       {
1980         remove_note (insn, note);
1981         return;
1982       }
1983 }
1984
1985 /* Apply loop optimizations in loop copies using the
1986    data which gathered during the unrolling.  Structure
1987    OPT_INFO record that data.
1988
1989    UNROLLING is true if we unrolled (not peeled) the loop.
1990    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1991    the loop (as it should happen in complete unrolling, but not in ordinary
1992    peeling of the loop).  */
1993
1994 static void
1995 apply_opt_in_copies (struct opt_info *opt_info,
1996                      unsigned n_copies, bool unrolling,
1997                      bool rewrite_original_loop)
1998 {
1999   unsigned i, delta;
2000   basic_block bb, orig_bb;
2001   rtx_insn *insn, *orig_insn, *next;
2002   struct iv_to_split ivts_templ, *ivts;
2003   struct var_to_expand ve_templ, *ves;
2004
2005   /* Sanity check -- we need to put initialization in the original loop
2006      body.  */
2007   gcc_assert (!unrolling || rewrite_original_loop);
2008
2009   /* Allocate the basic variables (i0).  */
2010   if (opt_info->insns_to_split)
2011     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2012       allocate_basic_variable (ivts);
2013
2014   for (i = opt_info->first_new_block;
2015        i < (unsigned) last_basic_block_for_fn (cfun);
2016        i++)
2017     {
2018       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2019       orig_bb = get_bb_original (bb);
2020
2021       /* bb->aux holds position in copy sequence initialized by
2022          duplicate_loop_to_header_edge.  */
2023       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2024                                         unrolling);
2025       bb->aux = 0;
2026       orig_insn = BB_HEAD (orig_bb);
2027       FOR_BB_INSNS_SAFE (bb, insn, next)
2028         {
2029           if (!INSN_P (insn)
2030               || (DEBUG_BIND_INSN_P (insn)
2031                   && INSN_VAR_LOCATION_DECL (insn)
2032                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2033             continue;
2034
2035           while (!INSN_P (orig_insn)
2036                  || (DEBUG_BIND_INSN_P (orig_insn)
2037                      && INSN_VAR_LOCATION_DECL (orig_insn)
2038                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2039                          == LABEL_DECL)))
2040             orig_insn = NEXT_INSN (orig_insn);
2041
2042           ivts_templ.insn = orig_insn;
2043           ve_templ.insn = orig_insn;
2044
2045           /* Apply splitting iv optimization.  */
2046           if (opt_info->insns_to_split)
2047             {
2048               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2049
2050               ivts = opt_info->insns_to_split->find (&ivts_templ);
2051
2052               if (ivts)
2053                 {
2054                   gcc_assert (GET_CODE (PATTERN (insn))
2055                               == GET_CODE (PATTERN (orig_insn)));
2056
2057                   if (!delta)
2058                     insert_base_initialization (ivts, insn);
2059                   split_iv (ivts, insn, delta);
2060                 }
2061             }
2062           /* Apply variable expansion optimization.  */
2063           if (unrolling && opt_info->insns_with_var_to_expand)
2064             {
2065               ves = (struct var_to_expand *)
2066                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2067               if (ves)
2068                 {
2069                   gcc_assert (GET_CODE (PATTERN (insn))
2070                               == GET_CODE (PATTERN (orig_insn)));
2071                   expand_var_during_unrolling (ves, insn);
2072                 }
2073             }
2074           orig_insn = NEXT_INSN (orig_insn);
2075         }
2076     }
2077
2078   if (!rewrite_original_loop)
2079     return;
2080
2081   /* Initialize the variable expansions in the loop preheader
2082      and take care of combining them at the loop exit.  */
2083   if (opt_info->insns_with_var_to_expand)
2084     {
2085       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2086         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2087       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2088         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2089     }
2090
2091   /* Rewrite also the original loop body.  Find them as originals of the blocks
2092      in the last copied iteration, i.e. those that have
2093      get_bb_copy (get_bb_original (bb)) == bb.  */
2094   for (i = opt_info->first_new_block;
2095        i < (unsigned) last_basic_block_for_fn (cfun);
2096        i++)
2097     {
2098       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2099       orig_bb = get_bb_original (bb);
2100       if (get_bb_copy (orig_bb) != bb)
2101         continue;
2102
2103       delta = determine_split_iv_delta (0, n_copies, unrolling);
2104       for (orig_insn = BB_HEAD (orig_bb);
2105            orig_insn != NEXT_INSN (BB_END (bb));
2106            orig_insn = next)
2107         {
2108           next = NEXT_INSN (orig_insn);
2109
2110           if (!INSN_P (orig_insn))
2111             continue;
2112
2113           ivts_templ.insn = orig_insn;
2114           if (opt_info->insns_to_split)
2115             {
2116               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2117
2118               ivts = (struct iv_to_split *)
2119                 opt_info->insns_to_split->find (&ivts_templ);
2120               if (ivts)
2121                 {
2122                   if (!delta)
2123                     insert_base_initialization (ivts, orig_insn);
2124                   split_iv (ivts, orig_insn, delta);
2125                   continue;
2126                 }
2127             }
2128
2129         }
2130     }
2131 }
2132
2133 /* Release OPT_INFO.  */
2134
2135 static void
2136 free_opt_info (struct opt_info *opt_info)
2137 {
2138   delete opt_info->insns_to_split;
2139   opt_info->insns_to_split = NULL;
2140   if (opt_info->insns_with_var_to_expand)
2141     {
2142       struct var_to_expand *ves;
2143
2144       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2145         ves->var_expansions.release ();
2146       delete opt_info->insns_with_var_to_expand;
2147       opt_info->insns_with_var_to_expand = NULL;
2148     }
2149   free (opt_info);
2150 }