gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "memmodel.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "profile.h"
  33 #include "cfgrtl.h"
  34 #include "cfgloop.h"
  35 #include "params.h"
  36 #include "dojump.h"
  37 #include "expr.h"
  38 #include "dumpfile.h"
  39
  40 /* This pass performs loop unrolling.  We only perform this
  41    optimization on innermost loops (with single exception) because
  42    the impact on performance is greatest here, and we want to avoid
  43    unnecessary code size growth.  The gain is caused by greater sequentiality
  44    of code, better code to optimize for further passes and in some cases
  45    by fewer testings of exit conditions.  The main problem is code growth,
  46    that impacts performance negatively due to effect of caches.
  47
  48    What we do:
  49
  50    -- unrolling of loops that roll constant times; this is almost always
  51       win, as we get rid of exit condition tests.
  52    -- unrolling of loops that roll number of times that we can compute
  53       in runtime; we also get rid of exit condition tests here, but there
  54       is the extra expense for calculating the number of iterations
  55    -- simple unrolling of remaining loops; this is performed only if we
  56       are asked to, as the gain is questionable in this case and often
  57       it may even slow down the code
  58    For more detailed descriptions of each of those, see comments at
  59    appropriate function below.
  60
  61    There is a lot of parameters (defined and described in params.def) that
  62    control how much we unroll.
  63
  64    ??? A great problem is that we don't have a good way how to determine
  65    how many times we should unroll the loop; the experiments I have made
  66    showed that this choice may affect performance in order of several %.
  67    */
  68
  69 /* Information about induction variables to split.  */
  70
  71 struct iv_to_split
  72 {
  73   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  74   rtx orig_var;         /* The variable (register) for the IV before split.  */
  75   rtx base_var;         /* The variable on that the values in the further
  76                            iterations are based.  */
  77   rtx step;             /* Step of the induction variable.  */
  78   struct iv_to_split *next; /* Next entry in walking order.  */
  79 };
  80
  81 /* Information about accumulators to expand.  */
  82
  83 struct var_to_expand
  84 {
  85   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  86   rtx reg;                         /* The accumulator which is expanded.  */
  87   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  88   struct var_to_expand *next;      /* Next entry in walking order.  */
  89   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  90                                       or multiplication.  */
  91   int expansion_count;             /* Count the number of expansions generated so far.  */
  92   int reuse_expansion;             /* The expansion we intend to reuse to expand
  93                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  94                                       the original accumulator.  Else use
  95                                       var_expansions[REUSE_EXPANSION - 1].  */
  96 };
  97
  98 /* Hashtable helper for iv_to_split.  */
  99
 100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 101 {
 102   static inline hashval_t hash (const iv_to_split *);
 103   static inline bool equal (const iv_to_split *, const iv_to_split *);
 104 };
 105
 106
 107 /* A hash function for information about insns to split.  */
 108
 109 inline hashval_t
 110 iv_split_hasher::hash (const iv_to_split *ivts)
 111 {
 112   return (hashval_t) INSN_UID (ivts->insn);
 113 }
 114
 115 /* An equality functions for information about insns to split.  */
 116
 117 inline bool
 118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 119 {
 120   return i1->insn == i2->insn;
 121 }
 122
 123 /* Hashtable helper for iv_to_split.  */
 124
 125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 126 {
 127   static inline hashval_t hash (const var_to_expand *);
 128   static inline bool equal (const var_to_expand *, const var_to_expand *);
 129 };
 130
 131 /* Return a hash for VES.  */
 132
 133 inline hashval_t
 134 var_expand_hasher::hash (const var_to_expand *ves)
 135 {
 136   return (hashval_t) INSN_UID (ves->insn);
 137 }
 138
 139 /* Return true if I1 and I2 refer to the same instruction.  */
 140
 141 inline bool
 142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 143 {
 144   return i1->insn == i2->insn;
 145 }
 146
 147 /* Information about optimization applied in
 148    the unrolled loop.  */
 149
 150 struct opt_info
 151 {
 152   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 153                                                   split.  */
 154   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 155   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 156   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 157                                         insns with accumulators to expand.  */
 158   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 159   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 160   unsigned first_new_block;        /* The first basic block that was
 161                                       duplicated.  */
 162   basic_block loop_exit;           /* The loop exit basic block.  */
 163   basic_block loop_preheader;      /* The loop preheader basic block.  */
 164 };
 165
 166 static void decide_unroll_stupid (struct loop *, int);
 167 static void decide_unroll_constant_iterations (struct loop *, int);
 168 static void decide_unroll_runtime_iterations (struct loop *, int);
 169 static void unroll_loop_stupid (struct loop *);
 170 static void decide_unrolling (int);
 171 static void unroll_loop_constant_iterations (struct loop *);
 172 static void unroll_loop_runtime_iterations (struct loop *);
 173 static struct opt_info *analyze_insns_in_loop (struct loop *);
 174 static void opt_info_start_duplication (struct opt_info *);
 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 176 static void free_opt_info (struct opt_info *);
 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 181 static void insert_var_expansion_initialization (struct var_to_expand *,
 182                                                  basic_block);
 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 184                                              basic_block);
 185 static rtx get_expansion (struct var_to_expand *);
 186
 187 /* Emit a message summarizing the unroll that will be
 188    performed for LOOP, along with the loop's location LOCUS, if
 189    appropriate given the dump or -fopt-info settings.  */
 190
 191 static void
 192 report_unroll (struct loop *loop, location_t locus)
 193 {
 194   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 195
 196   if (loop->lpt_decision.decision == LPT_NONE)
 197     return;
 198
 199   if (!dump_enabled_p ())
 200     return;
 201
 202   dump_printf_loc (report_flags, locus,
 203                    "loop unrolled %d times",
 204                    loop->lpt_decision.times);
 205   if (profile_info)
 206     dump_printf (report_flags,
 207                  " (header execution count %d)",
 208                  (int)loop->header->count);
 209
 210   dump_printf (report_flags, "\n");
 211 }
 212
 213 /* Decide whether unroll loops and how much.  */
 214 static void
 215 decide_unrolling (int flags)
 216 {
 217   struct loop *loop;
 218
 219   /* Scan the loops, inner ones first.  */
 220   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 221     {
 222       loop->lpt_decision.decision = LPT_NONE;
 223       location_t locus = get_loop_location (loop);
 224
 225       if (dump_enabled_p ())
 226         dump_printf_loc (TDF_RTL, locus,
 227                          ";; *** Considering loop %d at BB %d for "
 228                          "unrolling ***\n",
 229                          loop->num, loop->header->index);
 230
 231       /* Do not peel cold areas.  */
 232       if (optimize_loop_for_size_p (loop))
 233         {
 234           if (dump_file)
 235             fprintf (dump_file, ";; Not considering loop, cold area\n");
 236           continue;
 237         }
 238
 239       /* Can the loop be manipulated?  */
 240       if (!can_duplicate_loop_p (loop))
 241         {
 242           if (dump_file)
 243             fprintf (dump_file,
 244                      ";; Not considering loop, cannot duplicate\n");
 245           continue;
 246         }
 247
 248       /* Skip non-innermost loops.  */
 249       if (loop->inner)
 250         {
 251           if (dump_file)
 252             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 253           continue;
 254         }
 255
 256       loop->ninsns = num_loop_insns (loop);
 257       loop->av_ninsns = average_num_loop_insns (loop);
 258
 259       /* Try transformations one by one in decreasing order of
 260          priority.  */
 261
 262       decide_unroll_constant_iterations (loop, flags);
 263       if (loop->lpt_decision.decision == LPT_NONE)
 264         decide_unroll_runtime_iterations (loop, flags);
 265       if (loop->lpt_decision.decision == LPT_NONE)
 266         decide_unroll_stupid (loop, flags);
 267
 268       report_unroll (loop, locus);
 269     }
 270 }
 271
 272 /* Unroll LOOPS.  */
 273 void
 274 unroll_loops (int flags)
 275 {
 276   struct loop *loop;
 277   bool changed = false;
 278
 279   /* Now decide rest of unrolling.  */
 280   decide_unrolling (flags);
 281
 282   /* Scan the loops, inner ones first.  */
 283   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 284     {
 285       /* And perform the appropriate transformations.  */
 286       switch (loop->lpt_decision.decision)
 287         {
 288         case LPT_UNROLL_CONSTANT:
 289           unroll_loop_constant_iterations (loop);
 290           changed = true;
 291           break;
 292         case LPT_UNROLL_RUNTIME:
 293           unroll_loop_runtime_iterations (loop);
 294           changed = true;
 295           break;
 296         case LPT_UNROLL_STUPID:
 297           unroll_loop_stupid (loop);
 298           changed = true;
 299           break;
 300         case LPT_NONE:
 301           break;
 302         default:
 303           gcc_unreachable ();
 304         }
 305     }
 306
 307     if (changed)
 308       {
 309         calculate_dominance_info (CDI_DOMINATORS);
 310         fix_loop_structure (NULL);
 311       }
 312
 313   iv_analysis_done ();
 314 }
 315
 316 /* Check whether exit of the LOOP is at the end of loop body.  */
 317
 318 static bool
 319 loop_exit_at_end_p (struct loop *loop)
 320 {
 321   struct niter_desc *desc = get_simple_loop_desc (loop);
 322   rtx_insn *insn;
 323
 324   /* We should never have conditional in latch block.  */
 325   gcc_assert (desc->in_edge->dest != loop->header);
 326
 327   if (desc->in_edge->dest != loop->latch)
 328     return false;
 329
 330   /* Check that the latch is empty.  */
 331   FOR_BB_INSNS (loop->latch, insn)
 332     {
 333       if (INSN_P (insn) && active_insn_p (insn))
 334         return false;
 335     }
 336
 337   return true;
 338 }
 339
 340 /* Decide whether to unroll LOOP iterating constant number of times
 341    and how much.  */
 342
 343 static void
 344 decide_unroll_constant_iterations (struct loop *loop, int flags)
 345 {
 346   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 347   struct niter_desc *desc;
 348   widest_int iterations;
 349
 350   if (!(flags & UAP_UNROLL))
 351     {
 352       /* We were not asked to, just return back silently.  */
 353       return;
 354     }
 355
 356   if (dump_file)
 357     fprintf (dump_file,
 358              "\n;; Considering unrolling loop with constant "
 359              "number of iterations\n");
 360
 361   /* nunroll = total number of copies of the original loop body in
 362      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 363   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 364   nunroll_by_av
 365     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 366   if (nunroll > nunroll_by_av)
 367     nunroll = nunroll_by_av;
 368   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 369     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 370
 371   if (targetm.loop_unroll_adjust)
 372     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 373
 374   /* Skip big loops.  */
 375   if (nunroll <= 1)
 376     {
 377       if (dump_file)
 378         fprintf (dump_file, ";; Not considering loop, is too big\n");
 379       return;
 380     }
 381
 382   /* Check for simple loops.  */
 383   desc = get_simple_loop_desc (loop);
 384
 385   /* Check number of iterations.  */
 386   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 387     {
 388       if (dump_file)
 389         fprintf (dump_file,
 390                  ";; Unable to prove that the loop iterates constant times\n");
 391       return;
 392     }
 393
 394   /* Check whether the loop rolls enough to consider.
 395      Consult also loop bounds and profile; in the case the loop has more
 396      than one exit it may well loop less than determined maximal number
 397      of iterations.  */
 398   if (desc->niter < 2 * nunroll
 399       || ((get_estimated_loop_iterations (loop, &iterations)
 400            || get_likely_max_loop_iterations (loop, &iterations))
 401           && wi::ltu_p (iterations, 2 * nunroll)))
 402     {
 403       if (dump_file)
 404         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 405       return;
 406     }
 407
 408   /* Success; now compute number of iterations to unroll.  We alter
 409      nunroll so that as few as possible copies of loop body are
 410      necessary, while still not decreasing the number of unrollings
 411      too much (at most by 1).  */
 412   best_copies = 2 * nunroll + 10;
 413
 414   i = 2 * nunroll + 2;
 415   if (i - 1 >= desc->niter)
 416     i = desc->niter - 2;
 417
 418   for (; i >= nunroll - 1; i--)
 419     {
 420       unsigned exit_mod = desc->niter % (i + 1);
 421
 422       if (!loop_exit_at_end_p (loop))
 423         n_copies = exit_mod + i + 1;
 424       else if (exit_mod != (unsigned) i
 425                || desc->noloop_assumptions != NULL_RTX)
 426         n_copies = exit_mod + i + 2;
 427       else
 428         n_copies = i + 1;
 429
 430       if (n_copies < best_copies)
 431         {
 432           best_copies = n_copies;
 433           best_unroll = i;
 434         }
 435     }
 436
 437   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 438   loop->lpt_decision.times = best_unroll;
 439 }
 440
 441 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 442    The transformation does this:
 443
 444    for (i = 0; i < 102; i++)
 445      body;
 446
 447    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 448
 449    i = 0;
 450    body; i++;
 451    body; i++;
 452    while (i < 102)
 453      {
 454        body; i++;
 455        body; i++;
 456        body; i++;
 457        body; i++;
 458      }
 459   */
 460 static void
 461 unroll_loop_constant_iterations (struct loop *loop)
 462 {
 463   unsigned HOST_WIDE_INT niter;
 464   unsigned exit_mod;
 465   unsigned i;
 466   edge e;
 467   unsigned max_unroll = loop->lpt_decision.times;
 468   struct niter_desc *desc = get_simple_loop_desc (loop);
 469   bool exit_at_end = loop_exit_at_end_p (loop);
 470   struct opt_info *opt_info = NULL;
 471   bool ok;
 472
 473   niter = desc->niter;
 474
 475   /* Should not get here (such loop should be peeled instead).  */
 476   gcc_assert (niter > max_unroll + 1);
 477
 478   exit_mod = niter % (max_unroll + 1);
 479
 480   auto_sbitmap wont_exit (max_unroll + 1);
 481   bitmap_ones (wont_exit);
 482
 483   auto_vec<edge> remove_edges;
 484   if (flag_split_ivs_in_unroller
 485       || flag_variable_expansion_in_unroller)
 486     opt_info = analyze_insns_in_loop (loop);
 487
 488   if (!exit_at_end)
 489     {
 490       /* The exit is not at the end of the loop; leave exit test
 491          in the first copy, so that the loops that start with test
 492          of exit condition have continuous body after unrolling.  */
 493
 494       if (dump_file)
 495         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 496
 497       /* Peel exit_mod iterations.  */
 498       bitmap_clear_bit (wont_exit, 0);
 499       if (desc->noloop_assumptions)
 500         bitmap_clear_bit (wont_exit, 1);
 501
 502       if (exit_mod)
 503         {
 504           opt_info_start_duplication (opt_info);
 505           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 506                                               exit_mod,
 507                                               wont_exit, desc->out_edge,
 508                                               &remove_edges,
 509                                               DLTHE_FLAG_UPDATE_FREQ
 510                                               | (opt_info && exit_mod > 1
 511                                                  ? DLTHE_RECORD_COPY_NUMBER
 512                                                    : 0));
 513           gcc_assert (ok);
 514
 515           if (opt_info && exit_mod > 1)
 516             apply_opt_in_copies (opt_info, exit_mod, false, false);
 517
 518           desc->noloop_assumptions = NULL_RTX;
 519           desc->niter -= exit_mod;
 520           loop->nb_iterations_upper_bound -= exit_mod;
 521           if (loop->any_estimate
 522               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 523             loop->nb_iterations_estimate -= exit_mod;
 524           else
 525             loop->any_estimate = false;
 526           if (loop->any_likely_upper_bound
 527               && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
 528             loop->nb_iterations_likely_upper_bound -= exit_mod;
 529           else
 530             loop->any_likely_upper_bound = false;
 531         }
 532
 533       bitmap_set_bit (wont_exit, 1);
 534     }
 535   else
 536     {
 537       /* Leave exit test in last copy, for the same reason as above if
 538          the loop tests the condition at the end of loop body.  */
 539
 540       if (dump_file)
 541         fprintf (dump_file, ";; Condition at end of loop.\n");
 542
 543       /* We know that niter >= max_unroll + 2; so we do not need to care of
 544          case when we would exit before reaching the loop.  So just peel
 545          exit_mod + 1 iterations.  */
 546       if (exit_mod != max_unroll
 547           || desc->noloop_assumptions)
 548         {
 549           bitmap_clear_bit (wont_exit, 0);
 550           if (desc->noloop_assumptions)
 551             bitmap_clear_bit (wont_exit, 1);
 552
 553           opt_info_start_duplication (opt_info);
 554           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 555                                               exit_mod + 1,
 556                                               wont_exit, desc->out_edge,
 557                                               &remove_edges,
 558                                               DLTHE_FLAG_UPDATE_FREQ
 559                                               | (opt_info && exit_mod > 0
 560                                                  ? DLTHE_RECORD_COPY_NUMBER
 561                                                    : 0));
 562           gcc_assert (ok);
 563
 564           if (opt_info && exit_mod > 0)
 565             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 566
 567           desc->niter -= exit_mod + 1;
 568           loop->nb_iterations_upper_bound -= exit_mod + 1;
 569           if (loop->any_estimate
 570               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 571             loop->nb_iterations_estimate -= exit_mod + 1;
 572           else
 573             loop->any_estimate = false;
 574           if (loop->any_likely_upper_bound
 575               && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
 576             loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
 577           else
 578             loop->any_likely_upper_bound = false;
 579           desc->noloop_assumptions = NULL_RTX;
 580
 581           bitmap_set_bit (wont_exit, 0);
 582           bitmap_set_bit (wont_exit, 1);
 583         }
 584
 585       bitmap_clear_bit (wont_exit, max_unroll);
 586     }
 587
 588   /* Now unroll the loop.  */
 589
 590   opt_info_start_duplication (opt_info);
 591   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 592                                       max_unroll,
 593                                       wont_exit, desc->out_edge,
 594                                       &remove_edges,
 595                                       DLTHE_FLAG_UPDATE_FREQ
 596                                       | (opt_info
 597                                          ? DLTHE_RECORD_COPY_NUMBER
 598                                            : 0));
 599   gcc_assert (ok);
 600
 601   if (opt_info)
 602     {
 603       apply_opt_in_copies (opt_info, max_unroll, true, true);
 604       free_opt_info (opt_info);
 605     }
 606
 607   if (exit_at_end)
 608     {
 609       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 610       /* Find a new in and out edge; they are in the last copy we have made.  */
 611
 612       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 613         {
 614           desc->out_edge = EDGE_SUCC (exit_block, 0);
 615           desc->in_edge = EDGE_SUCC (exit_block, 1);
 616         }
 617       else
 618         {
 619           desc->out_edge = EDGE_SUCC (exit_block, 1);
 620           desc->in_edge = EDGE_SUCC (exit_block, 0);
 621         }
 622     }
 623
 624   desc->niter /= max_unroll + 1;
 625   loop->nb_iterations_upper_bound
 626     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 627   if (loop->any_estimate)
 628     loop->nb_iterations_estimate
 629       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 630   if (loop->any_likely_upper_bound)
 631     loop->nb_iterations_likely_upper_bound
 632       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
 633   desc->niter_expr = GEN_INT (desc->niter);
 634
 635   /* Remove the edges.  */
 636   FOR_EACH_VEC_ELT (remove_edges, i, e)
 637     remove_path (e);
 638
 639   if (dump_file)
 640     fprintf (dump_file,
 641              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 642              max_unroll, num_loop_insns (loop));
 643 }
 644
 645 /* Decide whether to unroll LOOP iterating runtime computable number of times
 646    and how much.  */
 647 static void
 648 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 649 {
 650   unsigned nunroll, nunroll_by_av, i;
 651   struct niter_desc *desc;
 652   widest_int iterations;
 653
 654   if (!(flags & UAP_UNROLL))
 655     {
 656       /* We were not asked to, just return back silently.  */
 657       return;
 658     }
 659
 660   if (dump_file)
 661     fprintf (dump_file,
 662              "\n;; Considering unrolling loop with runtime "
 663              "computable number of iterations\n");
 664
 665   /* nunroll = total number of copies of the original loop body in
 666      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 667   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 668   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 669   if (nunroll > nunroll_by_av)
 670     nunroll = nunroll_by_av;
 671   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 672     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 673
 674   if (targetm.loop_unroll_adjust)
 675     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 676
 677   /* Skip big loops.  */
 678   if (nunroll <= 1)
 679     {
 680       if (dump_file)
 681         fprintf (dump_file, ";; Not considering loop, is too big\n");
 682       return;
 683     }
 684
 685   /* Check for simple loops.  */
 686   desc = get_simple_loop_desc (loop);
 687
 688   /* Check simpleness.  */
 689   if (!desc->simple_p || desc->assumptions)
 690     {
 691       if (dump_file)
 692         fprintf (dump_file,
 693                  ";; Unable to prove that the number of iterations "
 694                  "can be counted in runtime\n");
 695       return;
 696     }
 697
 698   if (desc->const_iter)
 699     {
 700       if (dump_file)
 701         fprintf (dump_file, ";; Loop iterates constant times\n");
 702       return;
 703     }
 704
 705   /* Check whether the loop rolls.  */
 706   if ((get_estimated_loop_iterations (loop, &iterations)
 707        || get_likely_max_loop_iterations (loop, &iterations))
 708       && wi::ltu_p (iterations, 2 * nunroll))
 709     {
 710       if (dump_file)
 711         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 712       return;
 713     }
 714
 715   /* Success; now force nunroll to be power of 2, as we are unable to
 716      cope with overflows in computation of number of iterations.  */
 717   for (i = 1; 2 * i <= nunroll; i *= 2)
 718     continue;
 719
 720   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 721   loop->lpt_decision.times = i - 1;
 722 }
 723
 724 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 725    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 726    and NULL is returned instead.  */
 727
 728 basic_block
 729 split_edge_and_insert (edge e, rtx_insn *insns)
 730 {
 731   basic_block bb;
 732
 733   if (!insns)
 734     return NULL;
 735   bb = split_edge (e);
 736   emit_insn_after (insns, BB_END (bb));
 737
 738   /* ??? We used to assume that INSNS can contain control flow insns, and
 739      that we had to try to find sub basic blocks in BB to maintain a valid
 740      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 741      and call break_superblocks when going out of cfglayout mode.  But it
 742      turns out that this never happens; and that if it does ever happen,
 743      the verify_flow_info at the end of the RTL loop passes would fail.
 744
 745      There are two reasons why we expected we could have control flow insns
 746      in INSNS.  The first is when a comparison has to be done in parts, and
 747      the second is when the number of iterations is computed for loops with
 748      the number of iterations known at runtime.  In both cases, test cases
 749      to get control flow in INSNS appear to be impossible to construct:
 750
 751       * If do_compare_rtx_and_jump needs several branches to do comparison
 752         in a mode that needs comparison by parts, we cannot analyze the
 753         number of iterations of the loop, and we never get to unrolling it.
 754
 755       * The code in expand_divmod that was suspected to cause creation of
 756         branching code seems to be only accessed for signed division.  The
 757         divisions used by # of iterations analysis are always unsigned.
 758         Problems might arise on architectures that emits branching code
 759         for some operations that may appear in the unroller (especially
 760         for division), but we have no such architectures.
 761
 762      Considering all this, it was decided that we should for now assume
 763      that INSNS can in theory contain control flow insns, but in practice
 764      it never does.  So we don't handle the theoretical case, and should
 765      a real failure ever show up, we have a pretty good clue for how to
 766      fix it.  */
 767
 768   return bb;
 769 }
 770
 771 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 772    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 773    in order to create a jump.  */
 774
 775 static rtx_insn *
 776 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 777                       rtx_code_label *label, int prob, rtx_insn *cinsn)
 778 {
 779   rtx_insn *seq;
 780   rtx_jump_insn *jump;
 781   rtx cond;
 782   machine_mode mode;
 783
 784   mode = GET_MODE (op0);
 785   if (mode == VOIDmode)
 786     mode = GET_MODE (op1);
 787
 788   start_sequence ();
 789   if (GET_MODE_CLASS (mode) == MODE_CC)
 790     {
 791       /* A hack -- there seems to be no easy generic way how to make a
 792          conditional jump from a ccmode comparison.  */
 793       gcc_assert (cinsn);
 794       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 795       gcc_assert (GET_CODE (cond) == comp);
 796       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 797       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 798       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 799       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 800       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 801       LABEL_NUSES (JUMP_LABEL (jump))++;
 802       redirect_jump (jump, label, 0);
 803     }
 804   else
 805     {
 806       gcc_assert (!cinsn);
 807
 808       op0 = force_operand (op0, NULL_RTX);
 809       op1 = force_operand (op1, NULL_RTX);
 810       do_compare_rtx_and_jump (op0, op1, comp, 0,
 811                                mode, NULL_RTX, NULL, label, -1);
 812       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 813       jump->set_jump_target (label);
 814       LABEL_NUSES (label)++;
 815     }
 816   add_int_reg_note (jump, REG_BR_PROB, prob);
 817
 818   seq = get_insns ();
 819   end_sequence ();
 820
 821   return seq;
 822 }
 823
 824 /* Unroll LOOP for which we are able to count number of iterations in runtime
 825    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 826    extra care for case n < 0):
 827
 828    for (i = 0; i < n; i++)
 829      body;
 830
 831    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 832
 833    i = 0;
 834    mod = n % 4;
 835
 836    switch (mod)
 837      {
 838        case 3:
 839          body; i++;
 840        case 2:
 841          body; i++;
 842        case 1:
 843          body; i++;
 844        case 0: ;
 845      }
 846
 847    while (i < n)
 848      {
 849        body; i++;
 850        body; i++;
 851        body; i++;
 852        body; i++;
 853      }
 854    */
 855 static void
 856 unroll_loop_runtime_iterations (struct loop *loop)
 857 {
 858   rtx old_niter, niter, tmp;
 859   rtx_insn *init_code, *branch_code;
 860   unsigned i, j, p;
 861   basic_block preheader, *body, swtch, ezc_swtch = NULL;
 862   int may_exit_copy, iter_freq, new_freq;
 863   gcov_type iter_count, new_count;
 864   unsigned n_peel;
 865   edge e;
 866   bool extra_zero_check, last_may_exit;
 867   unsigned max_unroll = loop->lpt_decision.times;
 868   struct niter_desc *desc = get_simple_loop_desc (loop);
 869   bool exit_at_end = loop_exit_at_end_p (loop);
 870   struct opt_info *opt_info = NULL;
 871   bool ok;
 872
 873   if (flag_split_ivs_in_unroller
 874       || flag_variable_expansion_in_unroller)
 875     opt_info = analyze_insns_in_loop (loop);
 876
 877   /* Remember blocks whose dominators will have to be updated.  */
 878   auto_vec<basic_block> dom_bbs;
 879
 880   body = get_loop_body (loop);
 881   for (i = 0; i < loop->num_nodes; i++)
 882     {
 883       vec<basic_block> ldom;
 884       basic_block bb;
 885
 886       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 887       FOR_EACH_VEC_ELT (ldom, j, bb)
 888         if (!flow_bb_inside_loop_p (loop, bb))
 889           dom_bbs.safe_push (bb);
 890
 891       ldom.release ();
 892     }
 893   free (body);
 894
 895   if (!exit_at_end)
 896     {
 897       /* Leave exit in first copy (for explanation why see comment in
 898          unroll_loop_constant_iterations).  */
 899       may_exit_copy = 0;
 900       n_peel = max_unroll - 1;
 901       extra_zero_check = true;
 902       last_may_exit = false;
 903     }
 904   else
 905     {
 906       /* Leave exit in last copy (for explanation why see comment in
 907          unroll_loop_constant_iterations).  */
 908       may_exit_copy = max_unroll;
 909       n_peel = max_unroll;
 910       extra_zero_check = false;
 911       last_may_exit = true;
 912     }
 913
 914   /* Get expression for number of iterations.  */
 915   start_sequence ();
 916   old_niter = niter = gen_reg_rtx (desc->mode);
 917   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 918   if (tmp != niter)
 919     emit_move_insn (niter, tmp);
 920
 921   /* For loops that exit at end, add one to niter to account for first pass
 922      through loop body before reaching exit test. */
 923   if (exit_at_end)
 924     {
 925       niter = expand_simple_binop (desc->mode, PLUS,
 926                                    niter, const1_rtx,
 927                                    NULL_RTX, 0, OPTAB_LIB_WIDEN);
 928       old_niter = niter;
 929     }
 930
 931   /* Count modulo by ANDing it with max_unroll; we use the fact that
 932      the number of unrollings is a power of two, and thus this is correct
 933      even if there is overflow in the computation.  */
 934   niter = expand_simple_binop (desc->mode, AND,
 935                                niter, gen_int_mode (max_unroll, desc->mode),
 936                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 937
 938   init_code = get_insns ();
 939   end_sequence ();
 940   unshare_all_rtl_in_chain (init_code);
 941
 942   /* Precondition the loop.  */
 943   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 944
 945   auto_vec<edge> remove_edges;
 946
 947   auto_sbitmap wont_exit (max_unroll + 2);
 948
 949   if (extra_zero_check)
 950     {
 951       /* Peel the first copy of loop body.  Leave the exit test if the number
 952          of iterations is not reliable.  Also record the place of the extra zero
 953          check.  */
 954       bitmap_clear (wont_exit);
 955       if (!desc->noloop_assumptions)
 956         bitmap_set_bit (wont_exit, 1);
 957       ezc_swtch = loop_preheader_edge (loop)->src;
 958       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 959                                           1, wont_exit, desc->out_edge,
 960                                           &remove_edges,
 961                                           DLTHE_FLAG_UPDATE_FREQ);
 962       gcc_assert (ok);
 963     }
 964
 965   /* Record the place where switch will be built for preconditioning.  */
 966   swtch = split_edge (loop_preheader_edge (loop));
 967
 968   /* Compute frequency/count increments for each switch block and initialize
 969      innermost switch block.  Switch blocks and peeled loop copies are built
 970      from innermost outward.  */
 971   iter_freq = new_freq = swtch->frequency / (max_unroll + 1);
 972   iter_count = new_count = swtch->count / (max_unroll + 1);
 973   swtch->frequency = new_freq;
 974   swtch->count = new_count;
 975   single_succ_edge (swtch)->count = new_count;
 976
 977   for (i = 0; i < n_peel; i++)
 978     {
 979       /* Peel the copy.  */
 980       bitmap_clear (wont_exit);
 981       if (i != n_peel - 1 || !last_may_exit)
 982         bitmap_set_bit (wont_exit, 1);
 983       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 984                                           1, wont_exit, desc->out_edge,
 985                                           &remove_edges,
 986                                           DLTHE_FLAG_UPDATE_FREQ);
 987       gcc_assert (ok);
 988
 989       /* Create item for switch.  */
 990       j = n_peel - i - (extra_zero_check ? 0 : 1);
 991       p = REG_BR_PROB_BASE / (i + 2);
 992
 993       preheader = split_edge (loop_preheader_edge (loop));
 994       /* Add in frequency/count of edge from switch block.  */
 995       preheader->frequency += iter_freq;
 996       preheader->count += iter_count;
 997       single_succ_edge (preheader)->count = preheader->count;
 998       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 999                                           block_label (preheader), p,
1000                                           NULL);
1001
1002       /* We rely on the fact that the compare and jump cannot be optimized out,
1003          and hence the cfg we create is correct.  */
1004       gcc_assert (branch_code != NULL_RTX);
1005
1006       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1007       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1008       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1009       single_succ_edge (swtch)->count = new_count;
1010       new_freq += iter_freq;
1011       new_count += iter_count;
1012       swtch->frequency = new_freq;
1013       swtch->count = new_count;
1014       e = make_edge (swtch, preheader,
1015                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1016       e->count = iter_count;
1017       e->probability = p;
1018     }
1019
1020   if (extra_zero_check)
1021     {
1022       /* Add branch for zero iterations.  */
1023       p = REG_BR_PROB_BASE / (max_unroll + 1);
1024       swtch = ezc_swtch;
1025       preheader = split_edge (loop_preheader_edge (loop));
1026       /* Recompute frequency/count adjustments since initial peel copy may
1027          have exited and reduced those values that were computed above.  */
1028       iter_freq = swtch->frequency / (max_unroll + 1);
1029       iter_count = swtch->count / (max_unroll + 1);
1030       /* Add in frequency/count of edge from switch block.  */
1031       preheader->frequency += iter_freq;
1032       preheader->count += iter_count;
1033       single_succ_edge (preheader)->count = preheader->count;
1034       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1035                                           block_label (preheader), p,
1036                                           NULL);
1037       gcc_assert (branch_code != NULL_RTX);
1038
1039       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1040       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1041       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1042       single_succ_edge (swtch)->count -= iter_count;
1043       e = make_edge (swtch, preheader,
1044                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1045       e->count = iter_count;
1046       e->probability = p;
1047     }
1048
1049   /* Recount dominators for outer blocks.  */
1050   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1051
1052   /* And unroll loop.  */
1053
1054   bitmap_ones (wont_exit);
1055   bitmap_clear_bit (wont_exit, may_exit_copy);
1056   opt_info_start_duplication (opt_info);
1057
1058   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1059                                       max_unroll,
1060                                       wont_exit, desc->out_edge,
1061                                       &remove_edges,
1062                                       DLTHE_FLAG_UPDATE_FREQ
1063                                       | (opt_info
1064                                          ? DLTHE_RECORD_COPY_NUMBER
1065                                            : 0));
1066   gcc_assert (ok);
1067
1068   if (opt_info)
1069     {
1070       apply_opt_in_copies (opt_info, max_unroll, true, true);
1071       free_opt_info (opt_info);
1072     }
1073
1074   if (exit_at_end)
1075     {
1076       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1077       /* Find a new in and out edge; they are in the last copy we have
1078          made.  */
1079
1080       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1081         {
1082           desc->out_edge = EDGE_SUCC (exit_block, 0);
1083           desc->in_edge = EDGE_SUCC (exit_block, 1);
1084         }
1085       else
1086         {
1087           desc->out_edge = EDGE_SUCC (exit_block, 1);
1088           desc->in_edge = EDGE_SUCC (exit_block, 0);
1089         }
1090     }
1091
1092   /* Remove the edges.  */
1093   FOR_EACH_VEC_ELT (remove_edges, i, e)
1094     remove_path (e);
1095
1096   /* We must be careful when updating the number of iterations due to
1097      preconditioning and the fact that the value must be valid at entry
1098      of the loop.  After passing through the above code, we see that
1099      the correct new number of iterations is this:  */
1100   gcc_assert (!desc->const_iter);
1101   desc->niter_expr =
1102     simplify_gen_binary (UDIV, desc->mode, old_niter,
1103                          gen_int_mode (max_unroll + 1, desc->mode));
1104   loop->nb_iterations_upper_bound
1105     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1106   if (loop->any_estimate)
1107     loop->nb_iterations_estimate
1108       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1109   if (loop->any_likely_upper_bound)
1110     loop->nb_iterations_likely_upper_bound
1111       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1112   if (exit_at_end)
1113     {
1114       desc->niter_expr =
1115         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1116       desc->noloop_assumptions = NULL_RTX;
1117       --loop->nb_iterations_upper_bound;
1118       if (loop->any_estimate
1119           && loop->nb_iterations_estimate != 0)
1120         --loop->nb_iterations_estimate;
1121       else
1122         loop->any_estimate = false;
1123       if (loop->any_likely_upper_bound
1124           && loop->nb_iterations_likely_upper_bound != 0)
1125         --loop->nb_iterations_likely_upper_bound;
1126       else
1127         loop->any_likely_upper_bound = false;
1128     }
1129
1130   if (dump_file)
1131     fprintf (dump_file,
1132              ";; Unrolled loop %d times, counting # of iterations "
1133              "in runtime, %i insns\n",
1134              max_unroll, num_loop_insns (loop));
1135 }
1136
1137 /* Decide whether to unroll LOOP stupidly and how much.  */
1138 static void
1139 decide_unroll_stupid (struct loop *loop, int flags)
1140 {
1141   unsigned nunroll, nunroll_by_av, i;
1142   struct niter_desc *desc;
1143   widest_int iterations;
1144
1145   if (!(flags & UAP_UNROLL_ALL))
1146     {
1147       /* We were not asked to, just return back silently.  */
1148       return;
1149     }
1150
1151   if (dump_file)
1152     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1153
1154   /* nunroll = total number of copies of the original loop body in
1155      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1156   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1157   nunroll_by_av
1158     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1159   if (nunroll > nunroll_by_av)
1160     nunroll = nunroll_by_av;
1161   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1162     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1163
1164   if (targetm.loop_unroll_adjust)
1165     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1166
1167   /* Skip big loops.  */
1168   if (nunroll <= 1)
1169     {
1170       if (dump_file)
1171         fprintf (dump_file, ";; Not considering loop, is too big\n");
1172       return;
1173     }
1174
1175   /* Check for simple loops.  */
1176   desc = get_simple_loop_desc (loop);
1177
1178   /* Check simpleness.  */
1179   if (desc->simple_p && !desc->assumptions)
1180     {
1181       if (dump_file)
1182         fprintf (dump_file, ";; The loop is simple\n");
1183       return;
1184     }
1185
1186   /* Do not unroll loops with branches inside -- it increases number
1187      of mispredicts.
1188      TODO: this heuristic needs tunning; call inside the loop body
1189      is also relatively good reason to not unroll.  */
1190   if (num_loop_branches (loop) > 1)
1191     {
1192       if (dump_file)
1193         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1194       return;
1195     }
1196
1197   /* Check whether the loop rolls.  */
1198   if ((get_estimated_loop_iterations (loop, &iterations)
1199        || get_likely_max_loop_iterations (loop, &iterations))
1200       && wi::ltu_p (iterations, 2 * nunroll))
1201     {
1202       if (dump_file)
1203         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1204       return;
1205     }
1206
1207   /* Success.  Now force nunroll to be power of 2, as it seems that this
1208      improves results (partially because of better alignments, partially
1209      because of some dark magic).  */
1210   for (i = 1; 2 * i <= nunroll; i *= 2)
1211     continue;
1212
1213   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1214   loop->lpt_decision.times = i - 1;
1215 }
1216
1217 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1218
1219    while (cond)
1220      body;
1221
1222    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1223
1224    while (cond)
1225      {
1226        body;
1227        if (!cond) break;
1228        body;
1229        if (!cond) break;
1230        body;
1231        if (!cond) break;
1232        body;
1233      }
1234    */
1235 static void
1236 unroll_loop_stupid (struct loop *loop)
1237 {
1238   unsigned nunroll = loop->lpt_decision.times;
1239   struct niter_desc *desc = get_simple_loop_desc (loop);
1240   struct opt_info *opt_info = NULL;
1241   bool ok;
1242
1243   if (flag_split_ivs_in_unroller
1244       || flag_variable_expansion_in_unroller)
1245     opt_info = analyze_insns_in_loop (loop);
1246
1247   auto_sbitmap wont_exit (nunroll + 1);
1248   bitmap_clear (wont_exit);
1249   opt_info_start_duplication (opt_info);
1250
1251   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1252                                       nunroll, wont_exit,
1253                                       NULL, NULL,
1254                                       DLTHE_FLAG_UPDATE_FREQ
1255                                       | (opt_info
1256                                          ? DLTHE_RECORD_COPY_NUMBER
1257                                            : 0));
1258   gcc_assert (ok);
1259
1260   if (opt_info)
1261     {
1262       apply_opt_in_copies (opt_info, nunroll, true, true);
1263       free_opt_info (opt_info);
1264     }
1265
1266   if (desc->simple_p)
1267     {
1268       /* We indeed may get here provided that there are nontrivial assumptions
1269          for a loop to be really simple.  We could update the counts, but the
1270          problem is that we are unable to decide which exit will be taken
1271          (not really true in case the number of iterations is constant,
1272          but no one will do anything with this information, so we do not
1273          worry about it).  */
1274       desc->simple_p = false;
1275     }
1276
1277   if (dump_file)
1278     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1279              nunroll, num_loop_insns (loop));
1280 }
1281
1282 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1283    Set *DEBUG_USES to the number of debug insns that reference the
1284    variable.  */
1285
1286 static bool
1287 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1288                                   int *debug_uses)
1289 {
1290   basic_block *body, bb;
1291   unsigned i;
1292   int count_ref = 0;
1293   rtx_insn *insn;
1294
1295   body = get_loop_body (loop);
1296   for (i = 0; i < loop->num_nodes; i++)
1297     {
1298       bb = body[i];
1299
1300       FOR_BB_INSNS (bb, insn)
1301         if (!rtx_referenced_p (reg, insn))
1302           continue;
1303         else if (DEBUG_INSN_P (insn))
1304           ++*debug_uses;
1305         else if (++count_ref > 1)
1306           break;
1307     }
1308   free (body);
1309   return (count_ref  == 1);
1310 }
1311
1312 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1313
1314 static void
1315 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1316 {
1317   basic_block *body, bb;
1318   unsigned i;
1319   rtx_insn *insn;
1320
1321   body = get_loop_body (loop);
1322   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1323     {
1324       bb = body[i];
1325
1326       FOR_BB_INSNS (bb, insn)
1327         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1328           continue;
1329         else
1330           {
1331             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1332                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1333             if (!--debug_uses)
1334               break;
1335           }
1336     }
1337   free (body);
1338 }
1339
1340 /* Determine whether INSN contains an accumulator
1341    which can be expanded into separate copies,
1342    one for each copy of the LOOP body.
1343
1344    for (i = 0 ; i < n; i++)
1345      sum += a[i];
1346
1347    ==>
1348
1349    sum += a[i]
1350    ....
1351    i = i+1;
1352    sum1 += a[i]
1353    ....
1354    i = i+1
1355    sum2 += a[i];
1356    ....
1357
1358    Return NULL if INSN contains no opportunity for expansion of accumulator.
1359    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1360    information and return a pointer to it.
1361 */
1362
1363 static struct var_to_expand *
1364 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1365 {
1366   rtx set, dest, src;
1367   struct var_to_expand *ves;
1368   unsigned accum_pos;
1369   enum rtx_code code;
1370   int debug_uses = 0;
1371
1372   set = single_set (insn);
1373   if (!set)
1374     return NULL;
1375
1376   dest = SET_DEST (set);
1377   src = SET_SRC (set);
1378   code = GET_CODE (src);
1379
1380   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1381     return NULL;
1382
1383   if (FLOAT_MODE_P (GET_MODE (dest)))
1384     {
1385       if (!flag_associative_math)
1386         return NULL;
1387       /* In the case of FMA, we're also changing the rounding.  */
1388       if (code == FMA && !flag_unsafe_math_optimizations)
1389         return NULL;
1390     }
1391
1392   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1393      in MD.  But if there is no optab to generate the insn, we can not
1394      perform the variable expansion.  This can happen if an MD provides
1395      an insn but not a named pattern to generate it, for example to avoid
1396      producing code that needs additional mode switches like for x87/mmx.
1397
1398      So we check have_insn_for which looks for an optab for the operation
1399      in SRC.  If it doesn't exist, we can't perform the expansion even
1400      though INSN is valid.  */
1401   if (!have_insn_for (code, GET_MODE (src)))
1402     return NULL;
1403
1404   if (!REG_P (dest)
1405       && !(GET_CODE (dest) == SUBREG
1406            && REG_P (SUBREG_REG (dest))))
1407     return NULL;
1408
1409   /* Find the accumulator use within the operation.  */
1410   if (code == FMA)
1411     {
1412       /* We only support accumulation via FMA in the ADD position.  */
1413       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1414         return NULL;
1415       accum_pos = 2;
1416     }
1417   else if (rtx_equal_p (dest, XEXP (src, 0)))
1418     accum_pos = 0;
1419   else if (rtx_equal_p (dest, XEXP (src, 1)))
1420     {
1421       /* The method of expansion that we are using; which includes the
1422          initialization of the expansions with zero and the summation of
1423          the expansions at the end of the computation will yield wrong
1424          results for (x = something - x) thus avoid using it in that case.  */
1425       if (code == MINUS)
1426         return NULL;
1427       accum_pos = 1;
1428     }
1429   else
1430     return NULL;
1431
1432   /* It must not otherwise be used.  */
1433   if (code == FMA)
1434     {
1435       if (rtx_referenced_p (dest, XEXP (src, 0))
1436           || rtx_referenced_p (dest, XEXP (src, 1)))
1437         return NULL;
1438     }
1439   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1440     return NULL;
1441
1442   /* It must be used in exactly one insn.  */
1443   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1444     return NULL;
1445
1446   if (dump_file)
1447     {
1448       fprintf (dump_file, "\n;; Expanding Accumulator ");
1449       print_rtl (dump_file, dest);
1450       fprintf (dump_file, "\n");
1451     }
1452
1453   if (debug_uses)
1454     /* Instead of resetting the debug insns, we could replace each
1455        debug use in the loop with the sum or product of all expanded
1456        accummulators.  Since we'll only know of all expansions at the
1457        end, we'd have to keep track of which vars_to_expand a debug
1458        insn in the loop references, take note of each copy of the
1459        debug insn during unrolling, and when it's all done, compute
1460        the sum or product of each variable and adjust the original
1461        debug insn and each copy thereof.  What a pain!  */
1462     reset_debug_uses_in_loop (loop, dest, debug_uses);
1463
1464   /* Record the accumulator to expand.  */
1465   ves = XNEW (struct var_to_expand);
1466   ves->insn = insn;
1467   ves->reg = copy_rtx (dest);
1468   ves->var_expansions.create (1);
1469   ves->next = NULL;
1470   ves->op = GET_CODE (src);
1471   ves->expansion_count = 0;
1472   ves->reuse_expansion = 0;
1473   return ves;
1474 }
1475
1476 /* Determine whether there is an induction variable in INSN that
1477    we would like to split during unrolling.
1478
1479    I.e. replace
1480
1481    i = i + 1;
1482    ...
1483    i = i + 1;
1484    ...
1485    i = i + 1;
1486    ...
1487
1488    type chains by
1489
1490    i0 = i + 1
1491    ...
1492    i = i0 + 1
1493    ...
1494    i = i0 + 2
1495    ...
1496
1497    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1498    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1499    pointer to it.  */
1500
1501 static struct iv_to_split *
1502 analyze_iv_to_split_insn (rtx_insn *insn)
1503 {
1504   rtx set, dest;
1505   struct rtx_iv iv;
1506   struct iv_to_split *ivts;
1507   bool ok;
1508
1509   /* For now we just split the basic induction variables.  Later this may be
1510      extended for example by selecting also addresses of memory references.  */
1511   set = single_set (insn);
1512   if (!set)
1513     return NULL;
1514
1515   dest = SET_DEST (set);
1516   if (!REG_P (dest))
1517     return NULL;
1518
1519   if (!biv_p (insn, dest))
1520     return NULL;
1521
1522   ok = iv_analyze_result (insn, dest, &iv);
1523
1524   /* This used to be an assert under the assumption that if biv_p returns
1525      true that iv_analyze_result must also return true.  However, that
1526      assumption is not strictly correct as evidenced by pr25569.
1527
1528      Returning NULL when iv_analyze_result returns false is safe and
1529      avoids the problems in pr25569 until the iv_analyze_* routines
1530      can be fixed, which is apparently hard and time consuming
1531      according to their author.  */
1532   if (! ok)
1533     return NULL;
1534
1535   if (iv.step == const0_rtx
1536       || iv.mode != iv.extend_mode)
1537     return NULL;
1538
1539   /* Record the insn to split.  */
1540   ivts = XNEW (struct iv_to_split);
1541   ivts->insn = insn;
1542   ivts->orig_var = dest;
1543   ivts->base_var = NULL_RTX;
1544   ivts->step = iv.step;
1545   ivts->next = NULL;
1546
1547   return ivts;
1548 }
1549
1550 /* Determines which of insns in LOOP can be optimized.
1551    Return a OPT_INFO struct with the relevant hash tables filled
1552    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1553    is undefined for the return value.  */
1554
1555 static struct opt_info *
1556 analyze_insns_in_loop (struct loop *loop)
1557 {
1558   basic_block *body, bb;
1559   unsigned i;
1560   struct opt_info *opt_info = XCNEW (struct opt_info);
1561   rtx_insn *insn;
1562   struct iv_to_split *ivts = NULL;
1563   struct var_to_expand *ves = NULL;
1564   iv_to_split **slot1;
1565   var_to_expand **slot2;
1566   vec<edge> edges = get_loop_exit_edges (loop);
1567   edge exit;
1568   bool can_apply = false;
1569
1570   iv_analysis_loop_init (loop);
1571
1572   body = get_loop_body (loop);
1573
1574   if (flag_split_ivs_in_unroller)
1575     {
1576       opt_info->insns_to_split
1577         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1578       opt_info->iv_to_split_head = NULL;
1579       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1580     }
1581
1582   /* Record the loop exit bb and loop preheader before the unrolling.  */
1583   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1584
1585   if (edges.length () == 1)
1586     {
1587       exit = edges[0];
1588       if (!(exit->flags & EDGE_COMPLEX))
1589         {
1590           opt_info->loop_exit = split_edge (exit);
1591           can_apply = true;
1592         }
1593     }
1594
1595   if (flag_variable_expansion_in_unroller
1596       && can_apply)
1597     {
1598       opt_info->insns_with_var_to_expand
1599         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1600       opt_info->var_to_expand_head = NULL;
1601       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1602     }
1603
1604   for (i = 0; i < loop->num_nodes; i++)
1605     {
1606       bb = body[i];
1607       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1608         continue;
1609
1610       FOR_BB_INSNS (bb, insn)
1611       {
1612         if (!INSN_P (insn))
1613           continue;
1614
1615         if (opt_info->insns_to_split)
1616           ivts = analyze_iv_to_split_insn (insn);
1617
1618         if (ivts)
1619           {
1620             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1621             gcc_assert (*slot1 == NULL);
1622             *slot1 = ivts;
1623             *opt_info->iv_to_split_tail = ivts;
1624             opt_info->iv_to_split_tail = &ivts->next;
1625             continue;
1626           }
1627
1628         if (opt_info->insns_with_var_to_expand)
1629           ves = analyze_insn_to_expand_var (loop, insn);
1630
1631         if (ves)
1632           {
1633             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1634             gcc_assert (*slot2 == NULL);
1635             *slot2 = ves;
1636             *opt_info->var_to_expand_tail = ves;
1637             opt_info->var_to_expand_tail = &ves->next;
1638           }
1639       }
1640     }
1641
1642   edges.release ();
1643   free (body);
1644   return opt_info;
1645 }
1646
1647 /* Called just before loop duplication.  Records start of duplicated area
1648    to OPT_INFO.  */
1649
1650 static void
1651 opt_info_start_duplication (struct opt_info *opt_info)
1652 {
1653   if (opt_info)
1654     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1655 }
1656
1657 /* Determine the number of iterations between initialization of the base
1658    variable and the current copy (N_COPY).  N_COPIES is the total number
1659    of newly created copies.  UNROLLING is true if we are unrolling
1660    (not peeling) the loop.  */
1661
1662 static unsigned
1663 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1664 {
1665   if (unrolling)
1666     {
1667       /* If we are unrolling, initialization is done in the original loop
1668          body (number 0).  */
1669       return n_copy;
1670     }
1671   else
1672     {
1673       /* If we are peeling, the copy in that the initialization occurs has
1674          number 1.  The original loop (number 0) is the last.  */
1675       if (n_copy)
1676         return n_copy - 1;
1677       else
1678         return n_copies;
1679     }
1680 }
1681
1682 /* Allocate basic variable for the induction variable chain.  */
1683
1684 static void
1685 allocate_basic_variable (struct iv_to_split *ivts)
1686 {
1687   rtx expr = SET_SRC (single_set (ivts->insn));
1688
1689   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1690 }
1691
1692 /* Insert initialization of basic variable of IVTS before INSN, taking
1693    the initial value from INSN.  */
1694
1695 static void
1696 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1697 {
1698   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1699   rtx_insn *seq;
1700
1701   start_sequence ();
1702   expr = force_operand (expr, ivts->base_var);
1703   if (expr != ivts->base_var)
1704     emit_move_insn (ivts->base_var, expr);
1705   seq = get_insns ();
1706   end_sequence ();
1707
1708   emit_insn_before (seq, insn);
1709 }
1710
1711 /* Replace the use of induction variable described in IVTS in INSN
1712    by base variable + DELTA * step.  */
1713
1714 static void
1715 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1716 {
1717   rtx expr, *loc, incr, var;
1718   rtx_insn *seq;
1719   machine_mode mode = GET_MODE (ivts->base_var);
1720   rtx src, dest, set;
1721
1722   /* Construct base + DELTA * step.  */
1723   if (!delta)
1724     expr = ivts->base_var;
1725   else
1726     {
1727       incr = simplify_gen_binary (MULT, mode,
1728                                   ivts->step, gen_int_mode (delta, mode));
1729       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1730                                   ivts->base_var, incr);
1731     }
1732
1733   /* Figure out where to do the replacement.  */
1734   loc = &SET_SRC (single_set (insn));
1735
1736   /* If we can make the replacement right away, we're done.  */
1737   if (validate_change (insn, loc, expr, 0))
1738     return;
1739
1740   /* Otherwise, force EXPR into a register and try again.  */
1741   start_sequence ();
1742   var = gen_reg_rtx (mode);
1743   expr = force_operand (expr, var);
1744   if (expr != var)
1745     emit_move_insn (var, expr);
1746   seq = get_insns ();
1747   end_sequence ();
1748   emit_insn_before (seq, insn);
1749
1750   if (validate_change (insn, loc, var, 0))
1751     return;
1752
1753   /* The last chance.  Try recreating the assignment in insn
1754      completely from scratch.  */
1755   set = single_set (insn);
1756   gcc_assert (set);
1757
1758   start_sequence ();
1759   *loc = var;
1760   src = copy_rtx (SET_SRC (set));
1761   dest = copy_rtx (SET_DEST (set));
1762   src = force_operand (src, dest);
1763   if (src != dest)
1764     emit_move_insn (dest, src);
1765   seq = get_insns ();
1766   end_sequence ();
1767
1768   emit_insn_before (seq, insn);
1769   delete_insn (insn);
1770 }
1771
1772
1773 /* Return one expansion of the accumulator recorded in struct VE.  */
1774
1775 static rtx
1776 get_expansion (struct var_to_expand *ve)
1777 {
1778   rtx reg;
1779
1780   if (ve->reuse_expansion == 0)
1781     reg = ve->reg;
1782   else
1783     reg = ve->var_expansions[ve->reuse_expansion - 1];
1784
1785   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1786     ve->reuse_expansion = 0;
1787   else
1788     ve->reuse_expansion++;
1789
1790   return reg;
1791 }
1792
1793
1794 /* Given INSN replace the uses of the accumulator recorded in VE
1795    with a new register.  */
1796
1797 static void
1798 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1799 {
1800   rtx new_reg, set;
1801   bool really_new_expansion = false;
1802
1803   set = single_set (insn);
1804   gcc_assert (set);
1805
1806   /* Generate a new register only if the expansion limit has not been
1807      reached.  Else reuse an already existing expansion.  */
1808   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1809     {
1810       really_new_expansion = true;
1811       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1812     }
1813   else
1814     new_reg = get_expansion (ve);
1815
1816   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1817   if (apply_change_group ())
1818     if (really_new_expansion)
1819       {
1820         ve->var_expansions.safe_push (new_reg);
1821         ve->expansion_count++;
1822       }
1823 }
1824
1825 /* Initialize the variable expansions in loop preheader.  PLACE is the
1826    loop-preheader basic block where the initialization of the
1827    expansions should take place.  The expansions are initialized with
1828    (-0) when the operation is plus or minus to honor sign zero.  This
1829    way we can prevent cases where the sign of the final result is
1830    effected by the sign of the expansion.  Here is an example to
1831    demonstrate this:
1832
1833    for (i = 0 ; i < n; i++)
1834      sum += something;
1835
1836    ==>
1837
1838    sum += something
1839    ....
1840    i = i+1;
1841    sum1 += something
1842    ....
1843    i = i+1
1844    sum2 += something;
1845    ....
1846
1847    When SUM is initialized with -zero and SOMETHING is also -zero; the
1848    final result of sum should be -zero thus the expansions sum1 and sum2
1849    should be initialized with -zero as well (otherwise we will get +zero
1850    as the final result).  */
1851
1852 static void
1853 insert_var_expansion_initialization (struct var_to_expand *ve,
1854                                      basic_block place)
1855 {
1856   rtx_insn *seq;
1857   rtx var, zero_init;
1858   unsigned i;
1859   machine_mode mode = GET_MODE (ve->reg);
1860   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1861
1862   if (ve->var_expansions.length () == 0)
1863     return;
1864
1865   start_sequence ();
1866   switch (ve->op)
1867     {
1868     case FMA:
1869       /* Note that we only accumulate FMA via the ADD operand.  */
1870     case PLUS:
1871     case MINUS:
1872       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1873         {
1874           if (honor_signed_zero_p)
1875             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1876           else
1877             zero_init = CONST0_RTX (mode);
1878           emit_move_insn (var, zero_init);
1879         }
1880       break;
1881
1882     case MULT:
1883       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1884         {
1885           zero_init = CONST1_RTX (GET_MODE (var));
1886           emit_move_insn (var, zero_init);
1887         }
1888       break;
1889
1890     default:
1891       gcc_unreachable ();
1892     }
1893
1894   seq = get_insns ();
1895   end_sequence ();
1896
1897   emit_insn_after (seq, BB_END (place));
1898 }
1899
1900 /* Combine the variable expansions at the loop exit.  PLACE is the
1901    loop exit basic block where the summation of the expansions should
1902    take place.  */
1903
1904 static void
1905 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1906 {
1907   rtx sum = ve->reg;
1908   rtx expr, var;
1909   rtx_insn *seq, *insn;
1910   unsigned i;
1911
1912   if (ve->var_expansions.length () == 0)
1913     return;
1914
1915   start_sequence ();
1916   switch (ve->op)
1917     {
1918     case FMA:
1919       /* Note that we only accumulate FMA via the ADD operand.  */
1920     case PLUS:
1921     case MINUS:
1922       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1923         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1924       break;
1925
1926     case MULT:
1927       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1928         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1929       break;
1930
1931     default:
1932       gcc_unreachable ();
1933     }
1934
1935   expr = force_operand (sum, ve->reg);
1936   if (expr != ve->reg)
1937     emit_move_insn (ve->reg, expr);
1938   seq = get_insns ();
1939   end_sequence ();
1940
1941   insn = BB_HEAD (place);
1942   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1943     insn = NEXT_INSN (insn);
1944
1945   emit_insn_after (seq, insn);
1946 }
1947
1948 /* Strip away REG_EQUAL notes for IVs we're splitting.
1949
1950    Updating REG_EQUAL notes for IVs we split is tricky: We
1951    cannot tell until after unrolling, DF-rescanning, and liveness
1952    updating, whether an EQ_USE is reached by the split IV while
1953    the IV reg is still live.  See PR55006.
1954
1955    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1956    because RTL loop-iv requires us to defer rescanning insns and
1957    any notes attached to them.  So resort to old techniques...  */
1958
1959 static void
1960 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1961 {
1962   struct iv_to_split *ivts;
1963   rtx note = find_reg_equal_equiv_note (insn);
1964   if (! note)
1965     return;
1966   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1967     if (reg_mentioned_p (ivts->orig_var, note))
1968       {
1969         remove_note (insn, note);
1970         return;
1971       }
1972 }
1973
1974 /* Apply loop optimizations in loop copies using the
1975    data which gathered during the unrolling.  Structure
1976    OPT_INFO record that data.
1977
1978    UNROLLING is true if we unrolled (not peeled) the loop.
1979    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1980    the loop (as it should happen in complete unrolling, but not in ordinary
1981    peeling of the loop).  */
1982
1983 static void
1984 apply_opt_in_copies (struct opt_info *opt_info,
1985                      unsigned n_copies, bool unrolling,
1986                      bool rewrite_original_loop)
1987 {
1988   unsigned i, delta;
1989   basic_block bb, orig_bb;
1990   rtx_insn *insn, *orig_insn, *next;
1991   struct iv_to_split ivts_templ, *ivts;
1992   struct var_to_expand ve_templ, *ves;
1993
1994   /* Sanity check -- we need to put initialization in the original loop
1995      body.  */
1996   gcc_assert (!unrolling || rewrite_original_loop);
1997
1998   /* Allocate the basic variables (i0).  */
1999   if (opt_info->insns_to_split)
2000     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2001       allocate_basic_variable (ivts);
2002
2003   for (i = opt_info->first_new_block;
2004        i < (unsigned) last_basic_block_for_fn (cfun);
2005        i++)
2006     {
2007       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2008       orig_bb = get_bb_original (bb);
2009
2010       /* bb->aux holds position in copy sequence initialized by
2011          duplicate_loop_to_header_edge.  */
2012       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2013                                         unrolling);
2014       bb->aux = 0;
2015       orig_insn = BB_HEAD (orig_bb);
2016       FOR_BB_INSNS_SAFE (bb, insn, next)
2017         {
2018           if (!INSN_P (insn)
2019               || (DEBUG_INSN_P (insn)
2020                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2021             continue;
2022
2023           while (!INSN_P (orig_insn)
2024                  || (DEBUG_INSN_P (orig_insn)
2025                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2026                          == LABEL_DECL)))
2027             orig_insn = NEXT_INSN (orig_insn);
2028
2029           ivts_templ.insn = orig_insn;
2030           ve_templ.insn = orig_insn;
2031
2032           /* Apply splitting iv optimization.  */
2033           if (opt_info->insns_to_split)
2034             {
2035               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2036
2037               ivts = opt_info->insns_to_split->find (&ivts_templ);
2038
2039               if (ivts)
2040                 {
2041                   gcc_assert (GET_CODE (PATTERN (insn))
2042                               == GET_CODE (PATTERN (orig_insn)));
2043
2044                   if (!delta)
2045                     insert_base_initialization (ivts, insn);
2046                   split_iv (ivts, insn, delta);
2047                 }
2048             }
2049           /* Apply variable expansion optimization.  */
2050           if (unrolling && opt_info->insns_with_var_to_expand)
2051             {
2052               ves = (struct var_to_expand *)
2053                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2054               if (ves)
2055                 {
2056                   gcc_assert (GET_CODE (PATTERN (insn))
2057                               == GET_CODE (PATTERN (orig_insn)));
2058                   expand_var_during_unrolling (ves, insn);
2059                 }
2060             }
2061           orig_insn = NEXT_INSN (orig_insn);
2062         }
2063     }
2064
2065   if (!rewrite_original_loop)
2066     return;
2067
2068   /* Initialize the variable expansions in the loop preheader
2069      and take care of combining them at the loop exit.  */
2070   if (opt_info->insns_with_var_to_expand)
2071     {
2072       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2073         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2074       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2075         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2076     }
2077
2078   /* Rewrite also the original loop body.  Find them as originals of the blocks
2079      in the last copied iteration, i.e. those that have
2080      get_bb_copy (get_bb_original (bb)) == bb.  */
2081   for (i = opt_info->first_new_block;
2082        i < (unsigned) last_basic_block_for_fn (cfun);
2083        i++)
2084     {
2085       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2086       orig_bb = get_bb_original (bb);
2087       if (get_bb_copy (orig_bb) != bb)
2088         continue;
2089
2090       delta = determine_split_iv_delta (0, n_copies, unrolling);
2091       for (orig_insn = BB_HEAD (orig_bb);
2092            orig_insn != NEXT_INSN (BB_END (bb));
2093            orig_insn = next)
2094         {
2095           next = NEXT_INSN (orig_insn);
2096
2097           if (!INSN_P (orig_insn))
2098             continue;
2099
2100           ivts_templ.insn = orig_insn;
2101           if (opt_info->insns_to_split)
2102             {
2103               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2104
2105               ivts = (struct iv_to_split *)
2106                 opt_info->insns_to_split->find (&ivts_templ);
2107               if (ivts)
2108                 {
2109                   if (!delta)
2110                     insert_base_initialization (ivts, orig_insn);
2111                   split_iv (ivts, orig_insn, delta);
2112                   continue;
2113                 }
2114             }
2115
2116         }
2117     }
2118 }
2119
2120 /* Release OPT_INFO.  */
2121
2122 static void
2123 free_opt_info (struct opt_info *opt_info)
2124 {
2125   delete opt_info->insns_to_split;
2126   opt_info->insns_to_split = NULL;
2127   if (opt_info->insns_with_var_to_expand)
2128     {
2129       struct var_to_expand *ves;
2130
2131       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2132         ves->var_expansions.release ();
2133       delete opt_info->insns_with_var_to_expand;
2134       opt_info->insns_with_var_to_expand = NULL;
2135     }
2136   free (opt_info);
2137 }