gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "memmodel.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "profile.h"
  33 #include "cfgrtl.h"
  34 #include "cfgloop.h"
  35 #include "params.h"
  36 #include "dojump.h"
  37 #include "expr.h"
  38 #include "dumpfile.h"
  39
  40 /* This pass performs loop unrolling.  We only perform this
  41    optimization on innermost loops (with single exception) because
  42    the impact on performance is greatest here, and we want to avoid
  43    unnecessary code size growth.  The gain is caused by greater sequentiality
  44    of code, better code to optimize for further passes and in some cases
  45    by fewer testings of exit conditions.  The main problem is code growth,
  46    that impacts performance negatively due to effect of caches.
  47
  48    What we do:
  49
  50    -- unrolling of loops that roll constant times; this is almost always
  51       win, as we get rid of exit condition tests.
  52    -- unrolling of loops that roll number of times that we can compute
  53       in runtime; we also get rid of exit condition tests here, but there
  54       is the extra expense for calculating the number of iterations
  55    -- simple unrolling of remaining loops; this is performed only if we
  56       are asked to, as the gain is questionable in this case and often
  57       it may even slow down the code
  58    For more detailed descriptions of each of those, see comments at
  59    appropriate function below.
  60
  61    There is a lot of parameters (defined and described in params.def) that
  62    control how much we unroll.
  63
  64    ??? A great problem is that we don't have a good way how to determine
  65    how many times we should unroll the loop; the experiments I have made
  66    showed that this choice may affect performance in order of several %.
  67    */
  68
  69 /* Information about induction variables to split.  */
  70
  71 struct iv_to_split
  72 {
  73   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  74   rtx orig_var;         /* The variable (register) for the IV before split.  */
  75   rtx base_var;         /* The variable on that the values in the further
  76                            iterations are based.  */
  77   rtx step;             /* Step of the induction variable.  */
  78   struct iv_to_split *next; /* Next entry in walking order.  */
  79 };
  80
  81 /* Information about accumulators to expand.  */
  82
  83 struct var_to_expand
  84 {
  85   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  86   rtx reg;                         /* The accumulator which is expanded.  */
  87   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  88   struct var_to_expand *next;      /* Next entry in walking order.  */
  89   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  90                                       or multiplication.  */
  91   int expansion_count;             /* Count the number of expansions generated so far.  */
  92   int reuse_expansion;             /* The expansion we intend to reuse to expand
  93                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  94                                       the original accumulator.  Else use
  95                                       var_expansions[REUSE_EXPANSION - 1].  */
  96 };
  97
  98 /* Hashtable helper for iv_to_split.  */
  99
 100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 101 {
 102   static inline hashval_t hash (const iv_to_split *);
 103   static inline bool equal (const iv_to_split *, const iv_to_split *);
 104 };
 105
 106
 107 /* A hash function for information about insns to split.  */
 108
 109 inline hashval_t
 110 iv_split_hasher::hash (const iv_to_split *ivts)
 111 {
 112   return (hashval_t) INSN_UID (ivts->insn);
 113 }
 114
 115 /* An equality functions for information about insns to split.  */
 116
 117 inline bool
 118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 119 {
 120   return i1->insn == i2->insn;
 121 }
 122
 123 /* Hashtable helper for iv_to_split.  */
 124
 125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 126 {
 127   static inline hashval_t hash (const var_to_expand *);
 128   static inline bool equal (const var_to_expand *, const var_to_expand *);
 129 };
 130
 131 /* Return a hash for VES.  */
 132
 133 inline hashval_t
 134 var_expand_hasher::hash (const var_to_expand *ves)
 135 {
 136   return (hashval_t) INSN_UID (ves->insn);
 137 }
 138
 139 /* Return true if I1 and I2 refer to the same instruction.  */
 140
 141 inline bool
 142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 143 {
 144   return i1->insn == i2->insn;
 145 }
 146
 147 /* Information about optimization applied in
 148    the unrolled loop.  */
 149
 150 struct opt_info
 151 {
 152   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 153                                                   split.  */
 154   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 155   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 156   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 157                                         insns with accumulators to expand.  */
 158   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 159   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 160   unsigned first_new_block;        /* The first basic block that was
 161                                       duplicated.  */
 162   basic_block loop_exit;           /* The loop exit basic block.  */
 163   basic_block loop_preheader;      /* The loop preheader basic block.  */
 164 };
 165
 166 static void decide_unroll_stupid (struct loop *, int);
 167 static void decide_unroll_constant_iterations (struct loop *, int);
 168 static void decide_unroll_runtime_iterations (struct loop *, int);
 169 static void unroll_loop_stupid (struct loop *);
 170 static void decide_unrolling (int);
 171 static void unroll_loop_constant_iterations (struct loop *);
 172 static void unroll_loop_runtime_iterations (struct loop *);
 173 static struct opt_info *analyze_insns_in_loop (struct loop *);
 174 static void opt_info_start_duplication (struct opt_info *);
 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 176 static void free_opt_info (struct opt_info *);
 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 181 static void insert_var_expansion_initialization (struct var_to_expand *,
 182                                                  basic_block);
 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 184                                              basic_block);
 185 static rtx get_expansion (struct var_to_expand *);
 186
 187 /* Emit a message summarizing the unroll that will be
 188    performed for LOOP, along with the loop's location LOCUS, if
 189    appropriate given the dump or -fopt-info settings.  */
 190
 191 static void
 192 report_unroll (struct loop *loop, location_t locus)
 193 {
 194   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 195
 196   if (loop->lpt_decision.decision == LPT_NONE)
 197     return;
 198
 199   if (!dump_enabled_p ())
 200     return;
 201
 202   dump_printf_loc (report_flags, locus,
 203                    "loop unrolled %d times",
 204                    loop->lpt_decision.times);
 205   if (profile_info && loop->header->count.initialized_p ())
 206     dump_printf (report_flags,
 207                  " (header execution count %d)",
 208                  (int)loop->header->count.to_gcov_type ());
 209
 210   dump_printf (report_flags, "\n");
 211 }
 212
 213 /* Decide whether unroll loops and how much.  */
 214 static void
 215 decide_unrolling (int flags)
 216 {
 217   struct loop *loop;
 218
 219   /* Scan the loops, inner ones first.  */
 220   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 221     {
 222       loop->lpt_decision.decision = LPT_NONE;
 223       location_t locus = get_loop_location (loop);
 224
 225       if (dump_enabled_p ())
 226         dump_printf_loc (MSG_NOTE, locus,
 227                          ";; *** Considering loop %d at BB %d for "
 228                          "unrolling ***\n",
 229                          loop->num, loop->header->index);
 230
 231       /* Do not peel cold areas.  */
 232       if (optimize_loop_for_size_p (loop))
 233         {
 234           if (dump_file)
 235             fprintf (dump_file, ";; Not considering loop, cold area\n");
 236           continue;
 237         }
 238
 239       /* Can the loop be manipulated?  */
 240       if (!can_duplicate_loop_p (loop))
 241         {
 242           if (dump_file)
 243             fprintf (dump_file,
 244                      ";; Not considering loop, cannot duplicate\n");
 245           continue;
 246         }
 247
 248       /* Skip non-innermost loops.  */
 249       if (loop->inner)
 250         {
 251           if (dump_file)
 252             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 253           continue;
 254         }
 255
 256       loop->ninsns = num_loop_insns (loop);
 257       loop->av_ninsns = average_num_loop_insns (loop);
 258
 259       /* Try transformations one by one in decreasing order of
 260          priority.  */
 261
 262       decide_unroll_constant_iterations (loop, flags);
 263       if (loop->lpt_decision.decision == LPT_NONE)
 264         decide_unroll_runtime_iterations (loop, flags);
 265       if (loop->lpt_decision.decision == LPT_NONE)
 266         decide_unroll_stupid (loop, flags);
 267
 268       report_unroll (loop, locus);
 269     }
 270 }
 271
 272 /* Unroll LOOPS.  */
 273 void
 274 unroll_loops (int flags)
 275 {
 276   struct loop *loop;
 277   bool changed = false;
 278
 279   /* Now decide rest of unrolling.  */
 280   decide_unrolling (flags);
 281
 282   /* Scan the loops, inner ones first.  */
 283   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 284     {
 285       /* And perform the appropriate transformations.  */
 286       switch (loop->lpt_decision.decision)
 287         {
 288         case LPT_UNROLL_CONSTANT:
 289           unroll_loop_constant_iterations (loop);
 290           changed = true;
 291           break;
 292         case LPT_UNROLL_RUNTIME:
 293           unroll_loop_runtime_iterations (loop);
 294           changed = true;
 295           break;
 296         case LPT_UNROLL_STUPID:
 297           unroll_loop_stupid (loop);
 298           changed = true;
 299           break;
 300         case LPT_NONE:
 301           break;
 302         default:
 303           gcc_unreachable ();
 304         }
 305     }
 306
 307     if (changed)
 308       {
 309         calculate_dominance_info (CDI_DOMINATORS);
 310         fix_loop_structure (NULL);
 311       }
 312
 313   iv_analysis_done ();
 314 }
 315
 316 /* Check whether exit of the LOOP is at the end of loop body.  */
 317
 318 static bool
 319 loop_exit_at_end_p (struct loop *loop)
 320 {
 321   struct niter_desc *desc = get_simple_loop_desc (loop);
 322   rtx_insn *insn;
 323
 324   /* We should never have conditional in latch block.  */
 325   gcc_assert (desc->in_edge->dest != loop->header);
 326
 327   if (desc->in_edge->dest != loop->latch)
 328     return false;
 329
 330   /* Check that the latch is empty.  */
 331   FOR_BB_INSNS (loop->latch, insn)
 332     {
 333       if (INSN_P (insn) && active_insn_p (insn))
 334         return false;
 335     }
 336
 337   return true;
 338 }
 339
 340 /* Decide whether to unroll LOOP iterating constant number of times
 341    and how much.  */
 342
 343 static void
 344 decide_unroll_constant_iterations (struct loop *loop, int flags)
 345 {
 346   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 347   struct niter_desc *desc;
 348   widest_int iterations;
 349
 350   if (!(flags & UAP_UNROLL))
 351     {
 352       /* We were not asked to, just return back silently.  */
 353       return;
 354     }
 355
 356   if (dump_file)
 357     fprintf (dump_file,
 358              "\n;; Considering unrolling loop with constant "
 359              "number of iterations\n");
 360
 361   /* nunroll = total number of copies of the original loop body in
 362      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 363   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 364   nunroll_by_av
 365     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 366   if (nunroll > nunroll_by_av)
 367     nunroll = nunroll_by_av;
 368   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 369     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 370
 371   if (targetm.loop_unroll_adjust)
 372     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 373
 374   /* Skip big loops.  */
 375   if (nunroll <= 1)
 376     {
 377       if (dump_file)
 378         fprintf (dump_file, ";; Not considering loop, is too big\n");
 379       return;
 380     }
 381
 382   /* Check for simple loops.  */
 383   desc = get_simple_loop_desc (loop);
 384
 385   /* Check number of iterations.  */
 386   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 387     {
 388       if (dump_file)
 389         fprintf (dump_file,
 390                  ";; Unable to prove that the loop iterates constant times\n");
 391       return;
 392     }
 393
 394   /* Check whether the loop rolls enough to consider.
 395      Consult also loop bounds and profile; in the case the loop has more
 396      than one exit it may well loop less than determined maximal number
 397      of iterations.  */
 398   if (desc->niter < 2 * nunroll
 399       || ((get_estimated_loop_iterations (loop, &iterations)
 400            || get_likely_max_loop_iterations (loop, &iterations))
 401           && wi::ltu_p (iterations, 2 * nunroll)))
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 405       return;
 406     }
 407
 408   /* Success; now compute number of iterations to unroll.  We alter
 409      nunroll so that as few as possible copies of loop body are
 410      necessary, while still not decreasing the number of unrollings
 411      too much (at most by 1).  */
 412   best_copies = 2 * nunroll + 10;
 413
 414   i = 2 * nunroll + 2;
 415   if (i - 1 >= desc->niter)
 416     i = desc->niter - 2;
 417
 418   for (; i >= nunroll - 1; i--)
 419     {
 420       unsigned exit_mod = desc->niter % (i + 1);
 421
 422       if (!loop_exit_at_end_p (loop))
 423         n_copies = exit_mod + i + 1;
 424       else if (exit_mod != (unsigned) i
 425                || desc->noloop_assumptions != NULL_RTX)
 426         n_copies = exit_mod + i + 2;
 427       else
 428         n_copies = i + 1;
 429
 430       if (n_copies < best_copies)
 431         {
 432           best_copies = n_copies;
 433           best_unroll = i;
 434         }
 435     }
 436
 437   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 438   loop->lpt_decision.times = best_unroll;
 439 }
 440
 441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 442    The transformation does this:
 443
 444    for (i = 0; i < 102; i++)
 445      body;
 446
 447    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 448
 449    i = 0;
 450    body; i++;
 451    body; i++;
 452    while (i < 102)
 453      {
 454        body; i++;
 455        body; i++;
 456        body; i++;
 457        body; i++;
 458      }
 459   */
 460 static void
 461 unroll_loop_constant_iterations (struct loop *loop)
 462 {
 463   unsigned HOST_WIDE_INT niter;
 464   unsigned exit_mod;
 465   unsigned i;
 466   edge e;
 467   unsigned max_unroll = loop->lpt_decision.times;
 468   struct niter_desc *desc = get_simple_loop_desc (loop);
 469   bool exit_at_end = loop_exit_at_end_p (loop);
 470   struct opt_info *opt_info = NULL;
 471   bool ok;
 472
 473   niter = desc->niter;
 474
 475   /* Should not get here (such loop should be peeled instead).  */
 476   gcc_assert (niter > max_unroll + 1);
 477
 478   exit_mod = niter % (max_unroll + 1);
 479
 480   auto_sbitmap wont_exit (max_unroll + 1);
 481   bitmap_ones (wont_exit);
 482
 483   auto_vec<edge> remove_edges;
 484   if (flag_split_ivs_in_unroller
 485       || flag_variable_expansion_in_unroller)
 486     opt_info = analyze_insns_in_loop (loop);
 487
 488   if (!exit_at_end)
 489     {
 490       /* The exit is not at the end of the loop; leave exit test
 491          in the first copy, so that the loops that start with test
 492          of exit condition have continuous body after unrolling.  */
 493
 494       if (dump_file)
 495         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 496
 497       /* Peel exit_mod iterations.  */
 498       bitmap_clear_bit (wont_exit, 0);
 499       if (desc->noloop_assumptions)
 500         bitmap_clear_bit (wont_exit, 1);
 501
 502       if (exit_mod)
 503         {
 504           opt_info_start_duplication (opt_info);
 505           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 506                                               exit_mod,
 507                                               wont_exit, desc->out_edge,
 508                                               &remove_edges,
 509                                               DLTHE_FLAG_UPDATE_FREQ
 510                                               | (opt_info && exit_mod > 1
 511                                                  ? DLTHE_RECORD_COPY_NUMBER
 512                                                    : 0));
 513           gcc_assert (ok);
 514
 515           if (opt_info && exit_mod > 1)
 516             apply_opt_in_copies (opt_info, exit_mod, false, false);
 517
 518           desc->noloop_assumptions = NULL_RTX;
 519           desc->niter -= exit_mod;
 520           loop->nb_iterations_upper_bound -= exit_mod;
 521           if (loop->any_estimate
 522               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 523             loop->nb_iterations_estimate -= exit_mod;
 524           else
 525             loop->any_estimate = false;
 526           if (loop->any_likely_upper_bound
 527               && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
 528             loop->nb_iterations_likely_upper_bound -= exit_mod;
 529           else
 530             loop->any_likely_upper_bound = false;
 531         }
 532
 533       bitmap_set_bit (wont_exit, 1);
 534     }
 535   else
 536     {
 537       /* Leave exit test in last copy, for the same reason as above if
 538          the loop tests the condition at the end of loop body.  */
 539
 540       if (dump_file)
 541         fprintf (dump_file, ";; Condition at end of loop.\n");
 542
 543       /* We know that niter >= max_unroll + 2; so we do not need to care of
 544          case when we would exit before reaching the loop.  So just peel
 545          exit_mod + 1 iterations.  */
 546       if (exit_mod != max_unroll
 547           || desc->noloop_assumptions)
 548         {
 549           bitmap_clear_bit (wont_exit, 0);
 550           if (desc->noloop_assumptions)
 551             bitmap_clear_bit (wont_exit, 1);
 552
 553           opt_info_start_duplication (opt_info);
 554           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 555                                               exit_mod + 1,
 556                                               wont_exit, desc->out_edge,
 557                                               &remove_edges,
 558                                               DLTHE_FLAG_UPDATE_FREQ
 559                                               | (opt_info && exit_mod > 0
 560                                                  ? DLTHE_RECORD_COPY_NUMBER
 561                                                    : 0));
 562           gcc_assert (ok);
 563
 564           if (opt_info && exit_mod > 0)
 565             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 566
 567           desc->niter -= exit_mod + 1;
 568           loop->nb_iterations_upper_bound -= exit_mod + 1;
 569           if (loop->any_estimate
 570               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 571             loop->nb_iterations_estimate -= exit_mod + 1;
 572           else
 573             loop->any_estimate = false;
 574           if (loop->any_likely_upper_bound
 575               && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
 576             loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
 577           else
 578             loop->any_likely_upper_bound = false;
 579           desc->noloop_assumptions = NULL_RTX;
 580
 581           bitmap_set_bit (wont_exit, 0);
 582           bitmap_set_bit (wont_exit, 1);
 583         }
 584
 585       bitmap_clear_bit (wont_exit, max_unroll);
 586     }
 587
 588   /* Now unroll the loop.  */
 589
 590   opt_info_start_duplication (opt_info);
 591   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 592                                       max_unroll,
 593                                       wont_exit, desc->out_edge,
 594                                       &remove_edges,
 595                                       DLTHE_FLAG_UPDATE_FREQ
 596                                       | (opt_info
 597                                          ? DLTHE_RECORD_COPY_NUMBER
 598                                            : 0));
 599   gcc_assert (ok);
 600
 601   if (opt_info)
 602     {
 603       apply_opt_in_copies (opt_info, max_unroll, true, true);
 604       free_opt_info (opt_info);
 605     }
 606
 607   if (exit_at_end)
 608     {
 609       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 610       /* Find a new in and out edge; they are in the last copy we have made.  */
 611
 612       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 613         {
 614           desc->out_edge = EDGE_SUCC (exit_block, 0);
 615           desc->in_edge = EDGE_SUCC (exit_block, 1);
 616         }
 617       else
 618         {
 619           desc->out_edge = EDGE_SUCC (exit_block, 1);
 620           desc->in_edge = EDGE_SUCC (exit_block, 0);
 621         }
 622     }
 623
 624   desc->niter /= max_unroll + 1;
 625   loop->nb_iterations_upper_bound
 626     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 627   if (loop->any_estimate)
 628     loop->nb_iterations_estimate
 629       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 630   if (loop->any_likely_upper_bound)
 631     loop->nb_iterations_likely_upper_bound
 632       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
 633   desc->niter_expr = GEN_INT (desc->niter);
 634
 635   /* Remove the edges.  */
 636   FOR_EACH_VEC_ELT (remove_edges, i, e)
 637     remove_path (e);
 638
 639   if (dump_file)
 640     fprintf (dump_file,
 641              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 642              max_unroll, num_loop_insns (loop));
 643 }
 644
 645 /* Decide whether to unroll LOOP iterating runtime computable number of times
 646    and how much.  */
 647 static void
 648 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 649 {
 650   unsigned nunroll, nunroll_by_av, i;
 651   struct niter_desc *desc;
 652   widest_int iterations;
 653
 654   if (!(flags & UAP_UNROLL))
 655     {
 656       /* We were not asked to, just return back silently.  */
 657       return;
 658     }
 659
 660   if (dump_file)
 661     fprintf (dump_file,
 662              "\n;; Considering unrolling loop with runtime "
 663              "computable number of iterations\n");
 664
 665   /* nunroll = total number of copies of the original loop body in
 666      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 667   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 668   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 669   if (nunroll > nunroll_by_av)
 670     nunroll = nunroll_by_av;
 671   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 672     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 673
 674   if (targetm.loop_unroll_adjust)
 675     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 676
 677   /* Skip big loops.  */
 678   if (nunroll <= 1)
 679     {
 680       if (dump_file)
 681         fprintf (dump_file, ";; Not considering loop, is too big\n");
 682       return;
 683     }
 684
 685   /* Check for simple loops.  */
 686   desc = get_simple_loop_desc (loop);
 687
 688   /* Check simpleness.  */
 689   if (!desc->simple_p || desc->assumptions)
 690     {
 691       if (dump_file)
 692         fprintf (dump_file,
 693                  ";; Unable to prove that the number of iterations "
 694                  "can be counted in runtime\n");
 695       return;
 696     }
 697
 698   if (desc->const_iter)
 699     {
 700       if (dump_file)
 701         fprintf (dump_file, ";; Loop iterates constant times\n");
 702       return;
 703     }
 704
 705   /* Check whether the loop rolls.  */
 706   if ((get_estimated_loop_iterations (loop, &iterations)
 707        || get_likely_max_loop_iterations (loop, &iterations))
 708       && wi::ltu_p (iterations, 2 * nunroll))
 709     {
 710       if (dump_file)
 711         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 712       return;
 713     }
 714
 715   /* Success; now force nunroll to be power of 2, as we are unable to
 716      cope with overflows in computation of number of iterations.  */
 717   for (i = 1; 2 * i <= nunroll; i *= 2)
 718     continue;
 719
 720   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 721   loop->lpt_decision.times = i - 1;
 722 }
 723
 724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 725    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 726    and NULL is returned instead.  */
 727
 728 basic_block
 729 split_edge_and_insert (edge e, rtx_insn *insns)
 730 {
 731   basic_block bb;
 732
 733   if (!insns)
 734     return NULL;
 735   bb = split_edge (e);
 736   emit_insn_after (insns, BB_END (bb));
 737
 738   /* ??? We used to assume that INSNS can contain control flow insns, and
 739      that we had to try to find sub basic blocks in BB to maintain a valid
 740      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 741      and call break_superblocks when going out of cfglayout mode.  But it
 742      turns out that this never happens; and that if it does ever happen,
 743      the verify_flow_info at the end of the RTL loop passes would fail.
 744
 745      There are two reasons why we expected we could have control flow insns
 746      in INSNS.  The first is when a comparison has to be done in parts, and
 747      the second is when the number of iterations is computed for loops with
 748      the number of iterations known at runtime.  In both cases, test cases
 749      to get control flow in INSNS appear to be impossible to construct:
 750
 751       * If do_compare_rtx_and_jump needs several branches to do comparison
 752         in a mode that needs comparison by parts, we cannot analyze the
 753         number of iterations of the loop, and we never get to unrolling it.
 754
 755       * The code in expand_divmod that was suspected to cause creation of
 756         branching code seems to be only accessed for signed division.  The
 757         divisions used by # of iterations analysis are always unsigned.
 758         Problems might arise on architectures that emits branching code
 759         for some operations that may appear in the unroller (especially
 760         for division), but we have no such architectures.
 761
 762      Considering all this, it was decided that we should for now assume
 763      that INSNS can in theory contain control flow insns, but in practice
 764      it never does.  So we don't handle the theoretical case, and should
 765      a real failure ever show up, we have a pretty good clue for how to
 766      fix it.  */
 767
 768   return bb;
 769 }
 770
 771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 772    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 773    in order to create a jump.  */
 774
 775 static rtx_insn *
 776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 777                       rtx_code_label *label, profile_probability prob,
 778                       rtx_insn *cinsn)
 779 {
 780   rtx_insn *seq;
 781   rtx_jump_insn *jump;
 782   rtx cond;
 783   machine_mode mode;
 784
 785   mode = GET_MODE (op0);
 786   if (mode == VOIDmode)
 787     mode = GET_MODE (op1);
 788
 789   start_sequence ();
 790   if (GET_MODE_CLASS (mode) == MODE_CC)
 791     {
 792       /* A hack -- there seems to be no easy generic way how to make a
 793          conditional jump from a ccmode comparison.  */
 794       gcc_assert (cinsn);
 795       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 796       gcc_assert (GET_CODE (cond) == comp);
 797       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 798       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 799       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 800       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 801       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 802       LABEL_NUSES (JUMP_LABEL (jump))++;
 803       redirect_jump (jump, label, 0);
 804     }
 805   else
 806     {
 807       gcc_assert (!cinsn);
 808
 809       op0 = force_operand (op0, NULL_RTX);
 810       op1 = force_operand (op1, NULL_RTX);
 811       do_compare_rtx_and_jump (op0, op1, comp, 0,
 812                                mode, NULL_RTX, NULL, label,
 813                                profile_probability::uninitialized ());
 814       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 815       jump->set_jump_target (label);
 816       LABEL_NUSES (label)++;
 817     }
 818   if (prob.initialized_p ())
 819     add_reg_br_prob_note (jump, prob);
 820
 821   seq = get_insns ();
 822   end_sequence ();
 823
 824   return seq;
 825 }
 826
 827 /* Unroll LOOP for which we are able to count number of iterations in runtime
 828    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 829    extra care for case n < 0):
 830
 831    for (i = 0; i < n; i++)
 832      body;
 833
 834    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 835
 836    i = 0;
 837    mod = n % 4;
 838
 839    switch (mod)
 840      {
 841        case 3:
 842          body; i++;
 843        case 2:
 844          body; i++;
 845        case 1:
 846          body; i++;
 847        case 0: ;
 848      }
 849
 850    while (i < n)
 851      {
 852        body; i++;
 853        body; i++;
 854        body; i++;
 855        body; i++;
 856      }
 857    */
 858 static void
 859 unroll_loop_runtime_iterations (struct loop *loop)
 860 {
 861   rtx old_niter, niter, tmp;
 862   rtx_insn *init_code, *branch_code;
 863   unsigned i, j;
 864   profile_probability p;
 865   basic_block preheader, *body, swtch, ezc_swtch = NULL;
 866   int may_exit_copy, iter_freq, new_freq;
 867   profile_count iter_count, new_count;
 868   unsigned n_peel;
 869   edge e;
 870   bool extra_zero_check, last_may_exit;
 871   unsigned max_unroll = loop->lpt_decision.times;
 872   struct niter_desc *desc = get_simple_loop_desc (loop);
 873   bool exit_at_end = loop_exit_at_end_p (loop);
 874   struct opt_info *opt_info = NULL;
 875   bool ok;
 876
 877   if (flag_split_ivs_in_unroller
 878       || flag_variable_expansion_in_unroller)
 879     opt_info = analyze_insns_in_loop (loop);
 880
 881   /* Remember blocks whose dominators will have to be updated.  */
 882   auto_vec<basic_block> dom_bbs;
 883
 884   body = get_loop_body (loop);
 885   for (i = 0; i < loop->num_nodes; i++)
 886     {
 887       vec<basic_block> ldom;
 888       basic_block bb;
 889
 890       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 891       FOR_EACH_VEC_ELT (ldom, j, bb)
 892         if (!flow_bb_inside_loop_p (loop, bb))
 893           dom_bbs.safe_push (bb);
 894
 895       ldom.release ();
 896     }
 897   free (body);
 898
 899   if (!exit_at_end)
 900     {
 901       /* Leave exit in first copy (for explanation why see comment in
 902          unroll_loop_constant_iterations).  */
 903       may_exit_copy = 0;
 904       n_peel = max_unroll - 1;
 905       extra_zero_check = true;
 906       last_may_exit = false;
 907     }
 908   else
 909     {
 910       /* Leave exit in last copy (for explanation why see comment in
 911          unroll_loop_constant_iterations).  */
 912       may_exit_copy = max_unroll;
 913       n_peel = max_unroll;
 914       extra_zero_check = false;
 915       last_may_exit = true;
 916     }
 917
 918   /* Get expression for number of iterations.  */
 919   start_sequence ();
 920   old_niter = niter = gen_reg_rtx (desc->mode);
 921   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 922   if (tmp != niter)
 923     emit_move_insn (niter, tmp);
 924
 925   /* For loops that exit at end and whose number of iterations is reliable,
 926      add one to niter to account for first pass through loop body before
 927      reaching exit test. */
 928   if (exit_at_end && !desc->noloop_assumptions)
 929     {
 930       niter = expand_simple_binop (desc->mode, PLUS,
 931                                    niter, const1_rtx,
 932                                    NULL_RTX, 0, OPTAB_LIB_WIDEN);
 933       old_niter = niter;
 934     }
 935
 936   /* Count modulo by ANDing it with max_unroll; we use the fact that
 937      the number of unrollings is a power of two, and thus this is correct
 938      even if there is overflow in the computation.  */
 939   niter = expand_simple_binop (desc->mode, AND,
 940                                niter, gen_int_mode (max_unroll, desc->mode),
 941                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 942
 943   init_code = get_insns ();
 944   end_sequence ();
 945   unshare_all_rtl_in_chain (init_code);
 946
 947   /* Precondition the loop.  */
 948   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 949
 950   auto_vec<edge> remove_edges;
 951
 952   auto_sbitmap wont_exit (max_unroll + 2);
 953
 954   if (extra_zero_check || desc->noloop_assumptions)
 955     {
 956       /* Peel the first copy of loop body.  Leave the exit test if the number
 957          of iterations is not reliable.  Also record the place of the extra zero
 958          check.  */
 959       bitmap_clear (wont_exit);
 960       if (!desc->noloop_assumptions)
 961         bitmap_set_bit (wont_exit, 1);
 962       ezc_swtch = loop_preheader_edge (loop)->src;
 963       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 964                                           1, wont_exit, desc->out_edge,
 965                                           &remove_edges,
 966                                           DLTHE_FLAG_UPDATE_FREQ);
 967       gcc_assert (ok);
 968     }
 969
 970   /* Record the place where switch will be built for preconditioning.  */
 971   swtch = split_edge (loop_preheader_edge (loop));
 972
 973   /* Compute frequency/count increments for each switch block and initialize
 974      innermost switch block.  Switch blocks and peeled loop copies are built
 975      from innermost outward.  */
 976   iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
 977   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
 978   swtch->frequency = new_freq;
 979   swtch->count = new_count;
 980   single_succ_edge (swtch)->count = new_count;
 981
 982   for (i = 0; i < n_peel; i++)
 983     {
 984       /* Peel the copy.  */
 985       bitmap_clear (wont_exit);
 986       if (i != n_peel - 1 || !last_may_exit)
 987         bitmap_set_bit (wont_exit, 1);
 988       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 989                                           1, wont_exit, desc->out_edge,
 990                                           &remove_edges,
 991                                           DLTHE_FLAG_UPDATE_FREQ);
 992       gcc_assert (ok);
 993
 994       /* Create item for switch.  */
 995       j = n_peel - i - (extra_zero_check ? 0 : 1);
 996       p = profile_probability::always ().apply_scale (1, i + 2);
 997
 998       preheader = split_edge (loop_preheader_edge (loop));
 999       /* Add in frequency/count of edge from switch block.  */
1000       preheader->frequency += iter_freq;
1001       preheader->count += iter_count;
1002       single_succ_edge (preheader)->count = preheader->count;
1003       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1004                                           block_label (preheader), p,
1005                                           NULL);
1006
1007       /* We rely on the fact that the compare and jump cannot be optimized out,
1008          and hence the cfg we create is correct.  */
1009       gcc_assert (branch_code != NULL_RTX);
1010
1011       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1012       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1013       single_succ_edge (swtch)->probability = p.invert ();
1014       single_succ_edge (swtch)->count = new_count;
1015       new_freq += iter_freq;
1016       new_count += iter_count;
1017       swtch->frequency = new_freq;
1018       swtch->count = new_count;
1019       e = make_edge (swtch, preheader,
1020                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1021       e->count = iter_count;
1022       e->probability = p;
1023     }
1024
1025   if (extra_zero_check)
1026     {
1027       /* Add branch for zero iterations.  */
1028       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1029       swtch = ezc_swtch;
1030       preheader = split_edge (loop_preheader_edge (loop));
1031       /* Recompute frequency/count adjustments since initial peel copy may
1032          have exited and reduced those values that were computed above.  */
1033       iter_freq = swtch->frequency / (max_unroll + 1);
1034       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1035       /* Add in frequency/count of edge from switch block.  */
1036       preheader->frequency += iter_freq;
1037       preheader->count += iter_count;
1038       single_succ_edge (preheader)->count = preheader->count;
1039       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1040                                           block_label (preheader), p,
1041                                           NULL);
1042       gcc_assert (branch_code != NULL_RTX);
1043
1044       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1045       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1046       single_succ_edge (swtch)->probability = p.invert ();
1047       single_succ_edge (swtch)->count -= iter_count;
1048       e = make_edge (swtch, preheader,
1049                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1050       e->count = iter_count;
1051       e->probability = p;
1052     }
1053
1054   /* Recount dominators for outer blocks.  */
1055   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1056
1057   /* And unroll loop.  */
1058
1059   bitmap_ones (wont_exit);
1060   bitmap_clear_bit (wont_exit, may_exit_copy);
1061   opt_info_start_duplication (opt_info);
1062
1063   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1064                                       max_unroll,
1065                                       wont_exit, desc->out_edge,
1066                                       &remove_edges,
1067                                       DLTHE_FLAG_UPDATE_FREQ
1068                                       | (opt_info
1069                                          ? DLTHE_RECORD_COPY_NUMBER
1070                                            : 0));
1071   gcc_assert (ok);
1072
1073   if (opt_info)
1074     {
1075       apply_opt_in_copies (opt_info, max_unroll, true, true);
1076       free_opt_info (opt_info);
1077     }
1078
1079   if (exit_at_end)
1080     {
1081       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1082       /* Find a new in and out edge; they are in the last copy we have
1083          made.  */
1084
1085       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1086         {
1087           desc->out_edge = EDGE_SUCC (exit_block, 0);
1088           desc->in_edge = EDGE_SUCC (exit_block, 1);
1089         }
1090       else
1091         {
1092           desc->out_edge = EDGE_SUCC (exit_block, 1);
1093           desc->in_edge = EDGE_SUCC (exit_block, 0);
1094         }
1095     }
1096
1097   /* Remove the edges.  */
1098   FOR_EACH_VEC_ELT (remove_edges, i, e)
1099     remove_path (e);
1100
1101   /* We must be careful when updating the number of iterations due to
1102      preconditioning and the fact that the value must be valid at entry
1103      of the loop.  After passing through the above code, we see that
1104      the correct new number of iterations is this:  */
1105   gcc_assert (!desc->const_iter);
1106   desc->niter_expr =
1107     simplify_gen_binary (UDIV, desc->mode, old_niter,
1108                          gen_int_mode (max_unroll + 1, desc->mode));
1109   loop->nb_iterations_upper_bound
1110     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1111   if (loop->any_estimate)
1112     loop->nb_iterations_estimate
1113       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1114   if (loop->any_likely_upper_bound)
1115     loop->nb_iterations_likely_upper_bound
1116       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1117   if (exit_at_end)
1118     {
1119       desc->niter_expr =
1120         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1121       desc->noloop_assumptions = NULL_RTX;
1122       --loop->nb_iterations_upper_bound;
1123       if (loop->any_estimate
1124           && loop->nb_iterations_estimate != 0)
1125         --loop->nb_iterations_estimate;
1126       else
1127         loop->any_estimate = false;
1128       if (loop->any_likely_upper_bound
1129           && loop->nb_iterations_likely_upper_bound != 0)
1130         --loop->nb_iterations_likely_upper_bound;
1131       else
1132         loop->any_likely_upper_bound = false;
1133     }
1134
1135   if (dump_file)
1136     fprintf (dump_file,
1137              ";; Unrolled loop %d times, counting # of iterations "
1138              "in runtime, %i insns\n",
1139              max_unroll, num_loop_insns (loop));
1140 }
1141
1142 /* Decide whether to unroll LOOP stupidly and how much.  */
1143 static void
1144 decide_unroll_stupid (struct loop *loop, int flags)
1145 {
1146   unsigned nunroll, nunroll_by_av, i;
1147   struct niter_desc *desc;
1148   widest_int iterations;
1149
1150   if (!(flags & UAP_UNROLL_ALL))
1151     {
1152       /* We were not asked to, just return back silently.  */
1153       return;
1154     }
1155
1156   if (dump_file)
1157     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1158
1159   /* nunroll = total number of copies of the original loop body in
1160      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1161   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1162   nunroll_by_av
1163     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1164   if (nunroll > nunroll_by_av)
1165     nunroll = nunroll_by_av;
1166   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1167     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1168
1169   if (targetm.loop_unroll_adjust)
1170     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1171
1172   /* Skip big loops.  */
1173   if (nunroll <= 1)
1174     {
1175       if (dump_file)
1176         fprintf (dump_file, ";; Not considering loop, is too big\n");
1177       return;
1178     }
1179
1180   /* Check for simple loops.  */
1181   desc = get_simple_loop_desc (loop);
1182
1183   /* Check simpleness.  */
1184   if (desc->simple_p && !desc->assumptions)
1185     {
1186       if (dump_file)
1187         fprintf (dump_file, ";; The loop is simple\n");
1188       return;
1189     }
1190
1191   /* Do not unroll loops with branches inside -- it increases number
1192      of mispredicts.
1193      TODO: this heuristic needs tunning; call inside the loop body
1194      is also relatively good reason to not unroll.  */
1195   if (num_loop_branches (loop) > 1)
1196     {
1197       if (dump_file)
1198         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1199       return;
1200     }
1201
1202   /* Check whether the loop rolls.  */
1203   if ((get_estimated_loop_iterations (loop, &iterations)
1204        || get_likely_max_loop_iterations (loop, &iterations))
1205       && wi::ltu_p (iterations, 2 * nunroll))
1206     {
1207       if (dump_file)
1208         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1209       return;
1210     }
1211
1212   /* Success.  Now force nunroll to be power of 2, as it seems that this
1213      improves results (partially because of better alignments, partially
1214      because of some dark magic).  */
1215   for (i = 1; 2 * i <= nunroll; i *= 2)
1216     continue;
1217
1218   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1219   loop->lpt_decision.times = i - 1;
1220 }
1221
1222 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1223
1224    while (cond)
1225      body;
1226
1227    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1228
1229    while (cond)
1230      {
1231        body;
1232        if (!cond) break;
1233        body;
1234        if (!cond) break;
1235        body;
1236        if (!cond) break;
1237        body;
1238      }
1239    */
1240 static void
1241 unroll_loop_stupid (struct loop *loop)
1242 {
1243   unsigned nunroll = loop->lpt_decision.times;
1244   struct niter_desc *desc = get_simple_loop_desc (loop);
1245   struct opt_info *opt_info = NULL;
1246   bool ok;
1247
1248   if (flag_split_ivs_in_unroller
1249       || flag_variable_expansion_in_unroller)
1250     opt_info = analyze_insns_in_loop (loop);
1251
1252   auto_sbitmap wont_exit (nunroll + 1);
1253   bitmap_clear (wont_exit);
1254   opt_info_start_duplication (opt_info);
1255
1256   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1257                                       nunroll, wont_exit,
1258                                       NULL, NULL,
1259                                       DLTHE_FLAG_UPDATE_FREQ
1260                                       | (opt_info
1261                                          ? DLTHE_RECORD_COPY_NUMBER
1262                                            : 0));
1263   gcc_assert (ok);
1264
1265   if (opt_info)
1266     {
1267       apply_opt_in_copies (opt_info, nunroll, true, true);
1268       free_opt_info (opt_info);
1269     }
1270
1271   if (desc->simple_p)
1272     {
1273       /* We indeed may get here provided that there are nontrivial assumptions
1274          for a loop to be really simple.  We could update the counts, but the
1275          problem is that we are unable to decide which exit will be taken
1276          (not really true in case the number of iterations is constant,
1277          but no one will do anything with this information, so we do not
1278          worry about it).  */
1279       desc->simple_p = false;
1280     }
1281
1282   if (dump_file)
1283     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1284              nunroll, num_loop_insns (loop));
1285 }
1286
1287 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1288    Set *DEBUG_USES to the number of debug insns that reference the
1289    variable.  */
1290
1291 static bool
1292 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1293                                   int *debug_uses)
1294 {
1295   basic_block *body, bb;
1296   unsigned i;
1297   int count_ref = 0;
1298   rtx_insn *insn;
1299
1300   body = get_loop_body (loop);
1301   for (i = 0; i < loop->num_nodes; i++)
1302     {
1303       bb = body[i];
1304
1305       FOR_BB_INSNS (bb, insn)
1306         if (!rtx_referenced_p (reg, insn))
1307           continue;
1308         else if (DEBUG_INSN_P (insn))
1309           ++*debug_uses;
1310         else if (++count_ref > 1)
1311           break;
1312     }
1313   free (body);
1314   return (count_ref  == 1);
1315 }
1316
1317 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1318
1319 static void
1320 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1321 {
1322   basic_block *body, bb;
1323   unsigned i;
1324   rtx_insn *insn;
1325
1326   body = get_loop_body (loop);
1327   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1328     {
1329       bb = body[i];
1330
1331       FOR_BB_INSNS (bb, insn)
1332         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1333           continue;
1334         else
1335           {
1336             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1337                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1338             if (!--debug_uses)
1339               break;
1340           }
1341     }
1342   free (body);
1343 }
1344
1345 /* Determine whether INSN contains an accumulator
1346    which can be expanded into separate copies,
1347    one for each copy of the LOOP body.
1348
1349    for (i = 0 ; i < n; i++)
1350      sum += a[i];
1351
1352    ==>
1353
1354    sum += a[i]
1355    ....
1356    i = i+1;
1357    sum1 += a[i]
1358    ....
1359    i = i+1
1360    sum2 += a[i];
1361    ....
1362
1363    Return NULL if INSN contains no opportunity for expansion of accumulator.
1364    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1365    information and return a pointer to it.
1366 */
1367
1368 static struct var_to_expand *
1369 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1370 {
1371   rtx set, dest, src;
1372   struct var_to_expand *ves;
1373   unsigned accum_pos;
1374   enum rtx_code code;
1375   int debug_uses = 0;
1376
1377   set = single_set (insn);
1378   if (!set)
1379     return NULL;
1380
1381   dest = SET_DEST (set);
1382   src = SET_SRC (set);
1383   code = GET_CODE (src);
1384
1385   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1386     return NULL;
1387
1388   if (FLOAT_MODE_P (GET_MODE (dest)))
1389     {
1390       if (!flag_associative_math)
1391         return NULL;
1392       /* In the case of FMA, we're also changing the rounding.  */
1393       if (code == FMA && !flag_unsafe_math_optimizations)
1394         return NULL;
1395     }
1396
1397   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1398      in MD.  But if there is no optab to generate the insn, we can not
1399      perform the variable expansion.  This can happen if an MD provides
1400      an insn but not a named pattern to generate it, for example to avoid
1401      producing code that needs additional mode switches like for x87/mmx.
1402
1403      So we check have_insn_for which looks for an optab for the operation
1404      in SRC.  If it doesn't exist, we can't perform the expansion even
1405      though INSN is valid.  */
1406   if (!have_insn_for (code, GET_MODE (src)))
1407     return NULL;
1408
1409   if (!REG_P (dest)
1410       && !(GET_CODE (dest) == SUBREG
1411            && REG_P (SUBREG_REG (dest))))
1412     return NULL;
1413
1414   /* Find the accumulator use within the operation.  */
1415   if (code == FMA)
1416     {
1417       /* We only support accumulation via FMA in the ADD position.  */
1418       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1419         return NULL;
1420       accum_pos = 2;
1421     }
1422   else if (rtx_equal_p (dest, XEXP (src, 0)))
1423     accum_pos = 0;
1424   else if (rtx_equal_p (dest, XEXP (src, 1)))
1425     {
1426       /* The method of expansion that we are using; which includes the
1427          initialization of the expansions with zero and the summation of
1428          the expansions at the end of the computation will yield wrong
1429          results for (x = something - x) thus avoid using it in that case.  */
1430       if (code == MINUS)
1431         return NULL;
1432       accum_pos = 1;
1433     }
1434   else
1435     return NULL;
1436
1437   /* It must not otherwise be used.  */
1438   if (code == FMA)
1439     {
1440       if (rtx_referenced_p (dest, XEXP (src, 0))
1441           || rtx_referenced_p (dest, XEXP (src, 1)))
1442         return NULL;
1443     }
1444   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1445     return NULL;
1446
1447   /* It must be used in exactly one insn.  */
1448   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1449     return NULL;
1450
1451   if (dump_file)
1452     {
1453       fprintf (dump_file, "\n;; Expanding Accumulator ");
1454       print_rtl (dump_file, dest);
1455       fprintf (dump_file, "\n");
1456     }
1457
1458   if (debug_uses)
1459     /* Instead of resetting the debug insns, we could replace each
1460        debug use in the loop with the sum or product of all expanded
1461        accumulators.  Since we'll only know of all expansions at the
1462        end, we'd have to keep track of which vars_to_expand a debug
1463        insn in the loop references, take note of each copy of the
1464        debug insn during unrolling, and when it's all done, compute
1465        the sum or product of each variable and adjust the original
1466        debug insn and each copy thereof.  What a pain!  */
1467     reset_debug_uses_in_loop (loop, dest, debug_uses);
1468
1469   /* Record the accumulator to expand.  */
1470   ves = XNEW (struct var_to_expand);
1471   ves->insn = insn;
1472   ves->reg = copy_rtx (dest);
1473   ves->var_expansions.create (1);
1474   ves->next = NULL;
1475   ves->op = GET_CODE (src);
1476   ves->expansion_count = 0;
1477   ves->reuse_expansion = 0;
1478   return ves;
1479 }
1480
1481 /* Determine whether there is an induction variable in INSN that
1482    we would like to split during unrolling.
1483
1484    I.e. replace
1485
1486    i = i + 1;
1487    ...
1488    i = i + 1;
1489    ...
1490    i = i + 1;
1491    ...
1492
1493    type chains by
1494
1495    i0 = i + 1
1496    ...
1497    i = i0 + 1
1498    ...
1499    i = i0 + 2
1500    ...
1501
1502    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1503    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1504    pointer to it.  */
1505
1506 static struct iv_to_split *
1507 analyze_iv_to_split_insn (rtx_insn *insn)
1508 {
1509   rtx set, dest;
1510   struct rtx_iv iv;
1511   struct iv_to_split *ivts;
1512   scalar_int_mode mode;
1513   bool ok;
1514
1515   /* For now we just split the basic induction variables.  Later this may be
1516      extended for example by selecting also addresses of memory references.  */
1517   set = single_set (insn);
1518   if (!set)
1519     return NULL;
1520
1521   dest = SET_DEST (set);
1522   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1523     return NULL;
1524
1525   if (!biv_p (insn, mode, dest))
1526     return NULL;
1527
1528   ok = iv_analyze_result (insn, dest, &iv);
1529
1530   /* This used to be an assert under the assumption that if biv_p returns
1531      true that iv_analyze_result must also return true.  However, that
1532      assumption is not strictly correct as evidenced by pr25569.
1533
1534      Returning NULL when iv_analyze_result returns false is safe and
1535      avoids the problems in pr25569 until the iv_analyze_* routines
1536      can be fixed, which is apparently hard and time consuming
1537      according to their author.  */
1538   if (! ok)
1539     return NULL;
1540
1541   if (iv.step == const0_rtx
1542       || iv.mode != iv.extend_mode)
1543     return NULL;
1544
1545   /* Record the insn to split.  */
1546   ivts = XNEW (struct iv_to_split);
1547   ivts->insn = insn;
1548   ivts->orig_var = dest;
1549   ivts->base_var = NULL_RTX;
1550   ivts->step = iv.step;
1551   ivts->next = NULL;
1552
1553   return ivts;
1554 }
1555
1556 /* Determines which of insns in LOOP can be optimized.
1557    Return a OPT_INFO struct with the relevant hash tables filled
1558    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1559    is undefined for the return value.  */
1560
1561 static struct opt_info *
1562 analyze_insns_in_loop (struct loop *loop)
1563 {
1564   basic_block *body, bb;
1565   unsigned i;
1566   struct opt_info *opt_info = XCNEW (struct opt_info);
1567   rtx_insn *insn;
1568   struct iv_to_split *ivts = NULL;
1569   struct var_to_expand *ves = NULL;
1570   iv_to_split **slot1;
1571   var_to_expand **slot2;
1572   vec<edge> edges = get_loop_exit_edges (loop);
1573   edge exit;
1574   bool can_apply = false;
1575
1576   iv_analysis_loop_init (loop);
1577
1578   body = get_loop_body (loop);
1579
1580   if (flag_split_ivs_in_unroller)
1581     {
1582       opt_info->insns_to_split
1583         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1584       opt_info->iv_to_split_head = NULL;
1585       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1586     }
1587
1588   /* Record the loop exit bb and loop preheader before the unrolling.  */
1589   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1590
1591   if (edges.length () == 1)
1592     {
1593       exit = edges[0];
1594       if (!(exit->flags & EDGE_COMPLEX))
1595         {
1596           opt_info->loop_exit = split_edge (exit);
1597           can_apply = true;
1598         }
1599     }
1600
1601   if (flag_variable_expansion_in_unroller
1602       && can_apply)
1603     {
1604       opt_info->insns_with_var_to_expand
1605         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1606       opt_info->var_to_expand_head = NULL;
1607       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1608     }
1609
1610   for (i = 0; i < loop->num_nodes; i++)
1611     {
1612       bb = body[i];
1613       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1614         continue;
1615
1616       FOR_BB_INSNS (bb, insn)
1617       {
1618         if (!INSN_P (insn))
1619           continue;
1620
1621         if (opt_info->insns_to_split)
1622           ivts = analyze_iv_to_split_insn (insn);
1623
1624         if (ivts)
1625           {
1626             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1627             gcc_assert (*slot1 == NULL);
1628             *slot1 = ivts;
1629             *opt_info->iv_to_split_tail = ivts;
1630             opt_info->iv_to_split_tail = &ivts->next;
1631             continue;
1632           }
1633
1634         if (opt_info->insns_with_var_to_expand)
1635           ves = analyze_insn_to_expand_var (loop, insn);
1636
1637         if (ves)
1638           {
1639             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1640             gcc_assert (*slot2 == NULL);
1641             *slot2 = ves;
1642             *opt_info->var_to_expand_tail = ves;
1643             opt_info->var_to_expand_tail = &ves->next;
1644           }
1645       }
1646     }
1647
1648   edges.release ();
1649   free (body);
1650   return opt_info;
1651 }
1652
1653 /* Called just before loop duplication.  Records start of duplicated area
1654    to OPT_INFO.  */
1655
1656 static void
1657 opt_info_start_duplication (struct opt_info *opt_info)
1658 {
1659   if (opt_info)
1660     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1661 }
1662
1663 /* Determine the number of iterations between initialization of the base
1664    variable and the current copy (N_COPY).  N_COPIES is the total number
1665    of newly created copies.  UNROLLING is true if we are unrolling
1666    (not peeling) the loop.  */
1667
1668 static unsigned
1669 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1670 {
1671   if (unrolling)
1672     {
1673       /* If we are unrolling, initialization is done in the original loop
1674          body (number 0).  */
1675       return n_copy;
1676     }
1677   else
1678     {
1679       /* If we are peeling, the copy in that the initialization occurs has
1680          number 1.  The original loop (number 0) is the last.  */
1681       if (n_copy)
1682         return n_copy - 1;
1683       else
1684         return n_copies;
1685     }
1686 }
1687
1688 /* Allocate basic variable for the induction variable chain.  */
1689
1690 static void
1691 allocate_basic_variable (struct iv_to_split *ivts)
1692 {
1693   rtx expr = SET_SRC (single_set (ivts->insn));
1694
1695   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1696 }
1697
1698 /* Insert initialization of basic variable of IVTS before INSN, taking
1699    the initial value from INSN.  */
1700
1701 static void
1702 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1703 {
1704   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1705   rtx_insn *seq;
1706
1707   start_sequence ();
1708   expr = force_operand (expr, ivts->base_var);
1709   if (expr != ivts->base_var)
1710     emit_move_insn (ivts->base_var, expr);
1711   seq = get_insns ();
1712   end_sequence ();
1713
1714   emit_insn_before (seq, insn);
1715 }
1716
1717 /* Replace the use of induction variable described in IVTS in INSN
1718    by base variable + DELTA * step.  */
1719
1720 static void
1721 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1722 {
1723   rtx expr, *loc, incr, var;
1724   rtx_insn *seq;
1725   machine_mode mode = GET_MODE (ivts->base_var);
1726   rtx src, dest, set;
1727
1728   /* Construct base + DELTA * step.  */
1729   if (!delta)
1730     expr = ivts->base_var;
1731   else
1732     {
1733       incr = simplify_gen_binary (MULT, mode,
1734                                   copy_rtx (ivts->step),
1735                                   gen_int_mode (delta, mode));
1736       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1737                                   ivts->base_var, incr);
1738     }
1739
1740   /* Figure out where to do the replacement.  */
1741   loc = &SET_SRC (single_set (insn));
1742
1743   /* If we can make the replacement right away, we're done.  */
1744   if (validate_change (insn, loc, expr, 0))
1745     return;
1746
1747   /* Otherwise, force EXPR into a register and try again.  */
1748   start_sequence ();
1749   var = gen_reg_rtx (mode);
1750   expr = force_operand (expr, var);
1751   if (expr != var)
1752     emit_move_insn (var, expr);
1753   seq = get_insns ();
1754   end_sequence ();
1755   emit_insn_before (seq, insn);
1756
1757   if (validate_change (insn, loc, var, 0))
1758     return;
1759
1760   /* The last chance.  Try recreating the assignment in insn
1761      completely from scratch.  */
1762   set = single_set (insn);
1763   gcc_assert (set);
1764
1765   start_sequence ();
1766   *loc = var;
1767   src = copy_rtx (SET_SRC (set));
1768   dest = copy_rtx (SET_DEST (set));
1769   src = force_operand (src, dest);
1770   if (src != dest)
1771     emit_move_insn (dest, src);
1772   seq = get_insns ();
1773   end_sequence ();
1774
1775   emit_insn_before (seq, insn);
1776   delete_insn (insn);
1777 }
1778
1779
1780 /* Return one expansion of the accumulator recorded in struct VE.  */
1781
1782 static rtx
1783 get_expansion (struct var_to_expand *ve)
1784 {
1785   rtx reg;
1786
1787   if (ve->reuse_expansion == 0)
1788     reg = ve->reg;
1789   else
1790     reg = ve->var_expansions[ve->reuse_expansion - 1];
1791
1792   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1793     ve->reuse_expansion = 0;
1794   else
1795     ve->reuse_expansion++;
1796
1797   return reg;
1798 }
1799
1800
1801 /* Given INSN replace the uses of the accumulator recorded in VE
1802    with a new register.  */
1803
1804 static void
1805 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1806 {
1807   rtx new_reg, set;
1808   bool really_new_expansion = false;
1809
1810   set = single_set (insn);
1811   gcc_assert (set);
1812
1813   /* Generate a new register only if the expansion limit has not been
1814      reached.  Else reuse an already existing expansion.  */
1815   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1816     {
1817       really_new_expansion = true;
1818       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1819     }
1820   else
1821     new_reg = get_expansion (ve);
1822
1823   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1824   if (apply_change_group ())
1825     if (really_new_expansion)
1826       {
1827         ve->var_expansions.safe_push (new_reg);
1828         ve->expansion_count++;
1829       }
1830 }
1831
1832 /* Initialize the variable expansions in loop preheader.  PLACE is the
1833    loop-preheader basic block where the initialization of the
1834    expansions should take place.  The expansions are initialized with
1835    (-0) when the operation is plus or minus to honor sign zero.  This
1836    way we can prevent cases where the sign of the final result is
1837    effected by the sign of the expansion.  Here is an example to
1838    demonstrate this:
1839
1840    for (i = 0 ; i < n; i++)
1841      sum += something;
1842
1843    ==>
1844
1845    sum += something
1846    ....
1847    i = i+1;
1848    sum1 += something
1849    ....
1850    i = i+1
1851    sum2 += something;
1852    ....
1853
1854    When SUM is initialized with -zero and SOMETHING is also -zero; the
1855    final result of sum should be -zero thus the expansions sum1 and sum2
1856    should be initialized with -zero as well (otherwise we will get +zero
1857    as the final result).  */
1858
1859 static void
1860 insert_var_expansion_initialization (struct var_to_expand *ve,
1861                                      basic_block place)
1862 {
1863   rtx_insn *seq;
1864   rtx var, zero_init;
1865   unsigned i;
1866   machine_mode mode = GET_MODE (ve->reg);
1867   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1868
1869   if (ve->var_expansions.length () == 0)
1870     return;
1871
1872   start_sequence ();
1873   switch (ve->op)
1874     {
1875     case FMA:
1876       /* Note that we only accumulate FMA via the ADD operand.  */
1877     case PLUS:
1878     case MINUS:
1879       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1880         {
1881           if (honor_signed_zero_p)
1882             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1883           else
1884             zero_init = CONST0_RTX (mode);
1885           emit_move_insn (var, zero_init);
1886         }
1887       break;
1888
1889     case MULT:
1890       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1891         {
1892           zero_init = CONST1_RTX (GET_MODE (var));
1893           emit_move_insn (var, zero_init);
1894         }
1895       break;
1896
1897     default:
1898       gcc_unreachable ();
1899     }
1900
1901   seq = get_insns ();
1902   end_sequence ();
1903
1904   emit_insn_after (seq, BB_END (place));
1905 }
1906
1907 /* Combine the variable expansions at the loop exit.  PLACE is the
1908    loop exit basic block where the summation of the expansions should
1909    take place.  */
1910
1911 static void
1912 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1913 {
1914   rtx sum = ve->reg;
1915   rtx expr, var;
1916   rtx_insn *seq, *insn;
1917   unsigned i;
1918
1919   if (ve->var_expansions.length () == 0)
1920     return;
1921
1922   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1923      it both here and as the destination of the assignment.  */
1924   sum = copy_rtx (sum);
1925   start_sequence ();
1926   switch (ve->op)
1927     {
1928     case FMA:
1929       /* Note that we only accumulate FMA via the ADD operand.  */
1930     case PLUS:
1931     case MINUS:
1932       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1933         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1934       break;
1935
1936     case MULT:
1937       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1938         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1939       break;
1940
1941     default:
1942       gcc_unreachable ();
1943     }
1944
1945   expr = force_operand (sum, ve->reg);
1946   if (expr != ve->reg)
1947     emit_move_insn (ve->reg, expr);
1948   seq = get_insns ();
1949   end_sequence ();
1950
1951   insn = BB_HEAD (place);
1952   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1953     insn = NEXT_INSN (insn);
1954
1955   emit_insn_after (seq, insn);
1956 }
1957
1958 /* Strip away REG_EQUAL notes for IVs we're splitting.
1959
1960    Updating REG_EQUAL notes for IVs we split is tricky: We
1961    cannot tell until after unrolling, DF-rescanning, and liveness
1962    updating, whether an EQ_USE is reached by the split IV while
1963    the IV reg is still live.  See PR55006.
1964
1965    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1966    because RTL loop-iv requires us to defer rescanning insns and
1967    any notes attached to them.  So resort to old techniques...  */
1968
1969 static void
1970 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1971 {
1972   struct iv_to_split *ivts;
1973   rtx note = find_reg_equal_equiv_note (insn);
1974   if (! note)
1975     return;
1976   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1977     if (reg_mentioned_p (ivts->orig_var, note))
1978       {
1979         remove_note (insn, note);
1980         return;
1981       }
1982 }
1983
1984 /* Apply loop optimizations in loop copies using the
1985    data which gathered during the unrolling.  Structure
1986    OPT_INFO record that data.
1987
1988    UNROLLING is true if we unrolled (not peeled) the loop.
1989    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1990    the loop (as it should happen in complete unrolling, but not in ordinary
1991    peeling of the loop).  */
1992
1993 static void
1994 apply_opt_in_copies (struct opt_info *opt_info,
1995                      unsigned n_copies, bool unrolling,
1996                      bool rewrite_original_loop)
1997 {
1998   unsigned i, delta;
1999   basic_block bb, orig_bb;
2000   rtx_insn *insn, *orig_insn, *next;
2001   struct iv_to_split ivts_templ, *ivts;
2002   struct var_to_expand ve_templ, *ves;
2003
2004   /* Sanity check -- we need to put initialization in the original loop
2005      body.  */
2006   gcc_assert (!unrolling || rewrite_original_loop);
2007
2008   /* Allocate the basic variables (i0).  */
2009   if (opt_info->insns_to_split)
2010     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2011       allocate_basic_variable (ivts);
2012
2013   for (i = opt_info->first_new_block;
2014        i < (unsigned) last_basic_block_for_fn (cfun);
2015        i++)
2016     {
2017       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2018       orig_bb = get_bb_original (bb);
2019
2020       /* bb->aux holds position in copy sequence initialized by
2021          duplicate_loop_to_header_edge.  */
2022       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2023                                         unrolling);
2024       bb->aux = 0;
2025       orig_insn = BB_HEAD (orig_bb);
2026       FOR_BB_INSNS_SAFE (bb, insn, next)
2027         {
2028           if (!INSN_P (insn)
2029               || (DEBUG_INSN_P (insn)
2030                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2031             continue;
2032
2033           while (!INSN_P (orig_insn)
2034                  || (DEBUG_INSN_P (orig_insn)
2035                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2036                          == LABEL_DECL)))
2037             orig_insn = NEXT_INSN (orig_insn);
2038
2039           ivts_templ.insn = orig_insn;
2040           ve_templ.insn = orig_insn;
2041
2042           /* Apply splitting iv optimization.  */
2043           if (opt_info->insns_to_split)
2044             {
2045               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2046
2047               ivts = opt_info->insns_to_split->find (&ivts_templ);
2048
2049               if (ivts)
2050                 {
2051                   gcc_assert (GET_CODE (PATTERN (insn))
2052                               == GET_CODE (PATTERN (orig_insn)));
2053
2054                   if (!delta)
2055                     insert_base_initialization (ivts, insn);
2056                   split_iv (ivts, insn, delta);
2057                 }
2058             }
2059           /* Apply variable expansion optimization.  */
2060           if (unrolling && opt_info->insns_with_var_to_expand)
2061             {
2062               ves = (struct var_to_expand *)
2063                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2064               if (ves)
2065                 {
2066                   gcc_assert (GET_CODE (PATTERN (insn))
2067                               == GET_CODE (PATTERN (orig_insn)));
2068                   expand_var_during_unrolling (ves, insn);
2069                 }
2070             }
2071           orig_insn = NEXT_INSN (orig_insn);
2072         }
2073     }
2074
2075   if (!rewrite_original_loop)
2076     return;
2077
2078   /* Initialize the variable expansions in the loop preheader
2079      and take care of combining them at the loop exit.  */
2080   if (opt_info->insns_with_var_to_expand)
2081     {
2082       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2083         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2084       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2085         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2086     }
2087
2088   /* Rewrite also the original loop body.  Find them as originals of the blocks
2089      in the last copied iteration, i.e. those that have
2090      get_bb_copy (get_bb_original (bb)) == bb.  */
2091   for (i = opt_info->first_new_block;
2092        i < (unsigned) last_basic_block_for_fn (cfun);
2093        i++)
2094     {
2095       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2096       orig_bb = get_bb_original (bb);
2097       if (get_bb_copy (orig_bb) != bb)
2098         continue;
2099
2100       delta = determine_split_iv_delta (0, n_copies, unrolling);
2101       for (orig_insn = BB_HEAD (orig_bb);
2102            orig_insn != NEXT_INSN (BB_END (bb));
2103            orig_insn = next)
2104         {
2105           next = NEXT_INSN (orig_insn);
2106
2107           if (!INSN_P (orig_insn))
2108             continue;
2109
2110           ivts_templ.insn = orig_insn;
2111           if (opt_info->insns_to_split)
2112             {
2113               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2114
2115               ivts = (struct iv_to_split *)
2116                 opt_info->insns_to_split->find (&ivts_templ);
2117               if (ivts)
2118                 {
2119                   if (!delta)
2120                     insert_base_initialization (ivts, orig_insn);
2121                   split_iv (ivts, orig_insn, delta);
2122                   continue;
2123                 }
2124             }
2125
2126         }
2127     }
2128 }
2129
2130 /* Release OPT_INFO.  */
2131
2132 static void
2133 free_opt_info (struct opt_info *opt_info)
2134 {
2135   delete opt_info->insns_to_split;
2136   opt_info->insns_to_split = NULL;
2137   if (opt_info->insns_with_var_to_expand)
2138     {
2139       struct var_to_expand *ves;
2140
2141       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2142         ves->var_expansions.release ();
2143       delete opt_info->insns_with_var_to_expand;
2144       opt_info->insns_with_var_to_expand = NULL;
2145     }
2146   free (opt_info);
2147 }