gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "hard-reg-set.h"
  27 #include "obstack.h"
  28 #include "profile.h"
  29 #include "predict.h"
  30 #include "vec.h"
  31 #include "hashtab.h"
  32 #include "hash-set.h"
  33 #include "machmode.h"
  34 #include "input.h"
  35 #include "function.h"
  36 #include "dominance.h"
  37 #include "cfg.h"
  38 #include "cfgrtl.h"
  39 #include "basic-block.h"
  40 #include "cfgloop.h"
  41 #include "params.h"
  42 #include "insn-codes.h"
  43 #include "optabs.h"
  44 #include "expr.h"
  45 #include "hash-table.h"
  46 #include "recog.h"
  47 #include "target.h"
  48 #include "dumpfile.h"
  49
  50 /* This pass performs loop unrolling.  We only perform this
  51    optimization on innermost loops (with single exception) because
  52    the impact on performance is greatest here, and we want to avoid
  53    unnecessary code size growth.  The gain is caused by greater sequentiality
  54    of code, better code to optimize for further passes and in some cases
  55    by fewer testings of exit conditions.  The main problem is code growth,
  56    that impacts performance negatively due to effect of caches.
  57
  58    What we do:
  59
  60    -- unrolling of loops that roll constant times; this is almost always
  61       win, as we get rid of exit condition tests.
  62    -- unrolling of loops that roll number of times that we can compute
  63       in runtime; we also get rid of exit condition tests here, but there
  64       is the extra expense for calculating the number of iterations
  65    -- simple unrolling of remaining loops; this is performed only if we
  66       are asked to, as the gain is questionable in this case and often
  67       it may even slow down the code
  68    For more detailed descriptions of each of those, see comments at
  69    appropriate function below.
  70
  71    There is a lot of parameters (defined and described in params.def) that
  72    control how much we unroll.
  73
  74    ??? A great problem is that we don't have a good way how to determine
  75    how many times we should unroll the loop; the experiments I have made
  76    showed that this choice may affect performance in order of several %.
  77    */
  78
  79 /* Information about induction variables to split.  */
  80
  81 struct iv_to_split
  82 {
  83   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  84   rtx orig_var;         /* The variable (register) for the IV before split.  */
  85   rtx base_var;         /* The variable on that the values in the further
  86                            iterations are based.  */
  87   rtx step;             /* Step of the induction variable.  */
  88   struct iv_to_split *next; /* Next entry in walking order.  */
  89 };
  90
  91 /* Information about accumulators to expand.  */
  92
  93 struct var_to_expand
  94 {
  95   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  96   rtx reg;                         /* The accumulator which is expanded.  */
  97   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  98   struct var_to_expand *next;      /* Next entry in walking order.  */
  99   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
 100                                       or multiplication.  */
 101   int expansion_count;             /* Count the number of expansions generated so far.  */
 102   int reuse_expansion;             /* The expansion we intend to reuse to expand
 103                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 104                                       the original accumulator.  Else use
 105                                       var_expansions[REUSE_EXPANSION - 1].  */
 106 };
 107
 108 /* Hashtable helper for iv_to_split.  */
 109
 110 struct iv_split_hasher : typed_free_remove <iv_to_split>
 111 {
 112   typedef iv_to_split value_type;
 113   typedef iv_to_split compare_type;
 114   static inline hashval_t hash (const value_type *);
 115   static inline bool equal (const value_type *, const compare_type *);
 116 };
 117
 118
 119 /* A hash function for information about insns to split.  */
 120
 121 inline hashval_t
 122 iv_split_hasher::hash (const value_type *ivts)
 123 {
 124   return (hashval_t) INSN_UID (ivts->insn);
 125 }
 126
 127 /* An equality functions for information about insns to split.  */
 128
 129 inline bool
 130 iv_split_hasher::equal (const value_type *i1, const compare_type *i2)
 131 {
 132   return i1->insn == i2->insn;
 133 }
 134
 135 /* Hashtable helper for iv_to_split.  */
 136
 137 struct var_expand_hasher : typed_free_remove <var_to_expand>
 138 {
 139   typedef var_to_expand value_type;
 140   typedef var_to_expand compare_type;
 141   static inline hashval_t hash (const value_type *);
 142   static inline bool equal (const value_type *, const compare_type *);
 143 };
 144
 145 /* Return a hash for VES.  */
 146
 147 inline hashval_t
 148 var_expand_hasher::hash (const value_type *ves)
 149 {
 150   return (hashval_t) INSN_UID (ves->insn);
 151 }
 152
 153 /* Return true if I1 and I2 refer to the same instruction.  */
 154
 155 inline bool
 156 var_expand_hasher::equal (const value_type *i1, const compare_type *i2)
 157 {
 158   return i1->insn == i2->insn;
 159 }
 160
 161 /* Information about optimization applied in
 162    the unrolled loop.  */
 163
 164 struct opt_info
 165 {
 166   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 167                                                   split.  */
 168   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 169   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 170   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 171                                         insns with accumulators to expand.  */
 172   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 173   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 174   unsigned first_new_block;        /* The first basic block that was
 175                                       duplicated.  */
 176   basic_block loop_exit;           /* The loop exit basic block.  */
 177   basic_block loop_preheader;      /* The loop preheader basic block.  */
 178 };
 179
 180 static void decide_unroll_stupid (struct loop *, int);
 181 static void decide_unroll_constant_iterations (struct loop *, int);
 182 static void decide_unroll_runtime_iterations (struct loop *, int);
 183 static void unroll_loop_stupid (struct loop *);
 184 static void decide_unrolling (int);
 185 static void unroll_loop_constant_iterations (struct loop *);
 186 static void unroll_loop_runtime_iterations (struct loop *);
 187 static struct opt_info *analyze_insns_in_loop (struct loop *);
 188 static void opt_info_start_duplication (struct opt_info *);
 189 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 190 static void free_opt_info (struct opt_info *);
 191 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 192 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 193 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 194 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 195 static void insert_var_expansion_initialization (struct var_to_expand *,
 196                                                  basic_block);
 197 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 198                                              basic_block);
 199 static rtx get_expansion (struct var_to_expand *);
 200
 201 /* Emit a message summarizing the unroll that will be
 202    performed for LOOP, along with the loop's location LOCUS, if
 203    appropriate given the dump or -fopt-info settings.  */
 204
 205 static void
 206 report_unroll (struct loop *loop, location_t locus)
 207 {
 208   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 209
 210   if (loop->lpt_decision.decision == LPT_NONE)
 211     return;
 212
 213   if (!dump_enabled_p ())
 214     return;
 215
 216   dump_printf_loc (report_flags, locus,
 217                    "loop unrolled %d times",
 218                    loop->lpt_decision.times);
 219   if (profile_info)
 220     dump_printf (report_flags,
 221                  " (header execution count %d)",
 222                  (int)loop->header->count);
 223
 224   dump_printf (report_flags, "\n");
 225 }
 226
 227 /* Decide whether unroll loops and how much.  */
 228 static void
 229 decide_unrolling (int flags)
 230 {
 231   struct loop *loop;
 232
 233   /* Scan the loops, inner ones first.  */
 234   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 235     {
 236       loop->lpt_decision.decision = LPT_NONE;
 237       location_t locus = get_loop_location (loop);
 238
 239       if (dump_enabled_p ())
 240         dump_printf_loc (TDF_RTL, locus,
 241                          ";; *** Considering loop %d at BB %d for "
 242                          "unrolling ***\n",
 243                          loop->num, loop->header->index);
 244
 245       /* Do not peel cold areas.  */
 246       if (optimize_loop_for_size_p (loop))
 247         {
 248           if (dump_file)
 249             fprintf (dump_file, ";; Not considering loop, cold area\n");
 250           continue;
 251         }
 252
 253       /* Can the loop be manipulated?  */
 254       if (!can_duplicate_loop_p (loop))
 255         {
 256           if (dump_file)
 257             fprintf (dump_file,
 258                      ";; Not considering loop, cannot duplicate\n");
 259           continue;
 260         }
 261
 262       /* Skip non-innermost loops.  */
 263       if (loop->inner)
 264         {
 265           if (dump_file)
 266             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 267           continue;
 268         }
 269
 270       loop->ninsns = num_loop_insns (loop);
 271       loop->av_ninsns = average_num_loop_insns (loop);
 272
 273       /* Try transformations one by one in decreasing order of
 274          priority.  */
 275
 276       decide_unroll_constant_iterations (loop, flags);
 277       if (loop->lpt_decision.decision == LPT_NONE)
 278         decide_unroll_runtime_iterations (loop, flags);
 279       if (loop->lpt_decision.decision == LPT_NONE)
 280         decide_unroll_stupid (loop, flags);
 281
 282       report_unroll (loop, locus);
 283     }
 284 }
 285
 286 /* Unroll LOOPS.  */
 287 void
 288 unroll_loops (int flags)
 289 {
 290   struct loop *loop;
 291   bool changed = false;
 292
 293   /* Now decide rest of unrolling.  */
 294   decide_unrolling (flags);
 295
 296   /* Scan the loops, inner ones first.  */
 297   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 298     {
 299       /* And perform the appropriate transformations.  */
 300       switch (loop->lpt_decision.decision)
 301         {
 302         case LPT_UNROLL_CONSTANT:
 303           unroll_loop_constant_iterations (loop);
 304           changed = true;
 305           break;
 306         case LPT_UNROLL_RUNTIME:
 307           unroll_loop_runtime_iterations (loop);
 308           changed = true;
 309           break;
 310         case LPT_UNROLL_STUPID:
 311           unroll_loop_stupid (loop);
 312           changed = true;
 313           break;
 314         case LPT_NONE:
 315           break;
 316         default:
 317           gcc_unreachable ();
 318         }
 319     }
 320
 321     if (changed)
 322       {
 323         calculate_dominance_info (CDI_DOMINATORS);
 324         fix_loop_structure (NULL);
 325       }
 326
 327   iv_analysis_done ();
 328 }
 329
 330 /* Check whether exit of the LOOP is at the end of loop body.  */
 331
 332 static bool
 333 loop_exit_at_end_p (struct loop *loop)
 334 {
 335   struct niter_desc *desc = get_simple_loop_desc (loop);
 336   rtx_insn *insn;
 337
 338   /* We should never have conditional in latch block.  */
 339   gcc_assert (desc->in_edge->dest != loop->header);
 340
 341   if (desc->in_edge->dest != loop->latch)
 342     return false;
 343
 344   /* Check that the latch is empty.  */
 345   FOR_BB_INSNS (loop->latch, insn)
 346     {
 347       if (INSN_P (insn) && active_insn_p (insn))
 348         return false;
 349     }
 350
 351   return true;
 352 }
 353
 354 /* Decide whether to unroll LOOP iterating constant number of times
 355    and how much.  */
 356
 357 static void
 358 decide_unroll_constant_iterations (struct loop *loop, int flags)
 359 {
 360   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 361   struct niter_desc *desc;
 362   widest_int iterations;
 363
 364   if (!(flags & UAP_UNROLL))
 365     {
 366       /* We were not asked to, just return back silently.  */
 367       return;
 368     }
 369
 370   if (dump_file)
 371     fprintf (dump_file,
 372              "\n;; Considering unrolling loop with constant "
 373              "number of iterations\n");
 374
 375   /* nunroll = total number of copies of the original loop body in
 376      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 377   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 378   nunroll_by_av
 379     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 380   if (nunroll > nunroll_by_av)
 381     nunroll = nunroll_by_av;
 382   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 383     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 384
 385   if (targetm.loop_unroll_adjust)
 386     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 387
 388   /* Skip big loops.  */
 389   if (nunroll <= 1)
 390     {
 391       if (dump_file)
 392         fprintf (dump_file, ";; Not considering loop, is too big\n");
 393       return;
 394     }
 395
 396   /* Check for simple loops.  */
 397   desc = get_simple_loop_desc (loop);
 398
 399   /* Check number of iterations.  */
 400   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 401     {
 402       if (dump_file)
 403         fprintf (dump_file,
 404                  ";; Unable to prove that the loop iterates constant times\n");
 405       return;
 406     }
 407
 408   /* Check whether the loop rolls enough to consider.
 409      Consult also loop bounds and profile; in the case the loop has more
 410      than one exit it may well loop less than determined maximal number
 411      of iterations.  */
 412   if (desc->niter < 2 * nunroll
 413       || ((get_estimated_loop_iterations (loop, &iterations)
 414            || get_max_loop_iterations (loop, &iterations))
 415           && wi::ltu_p (iterations, 2 * nunroll)))
 416     {
 417       if (dump_file)
 418         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 419       return;
 420     }
 421
 422   /* Success; now compute number of iterations to unroll.  We alter
 423      nunroll so that as few as possible copies of loop body are
 424      necessary, while still not decreasing the number of unrollings
 425      too much (at most by 1).  */
 426   best_copies = 2 * nunroll + 10;
 427
 428   i = 2 * nunroll + 2;
 429   if (i - 1 >= desc->niter)
 430     i = desc->niter - 2;
 431
 432   for (; i >= nunroll - 1; i--)
 433     {
 434       unsigned exit_mod = desc->niter % (i + 1);
 435
 436       if (!loop_exit_at_end_p (loop))
 437         n_copies = exit_mod + i + 1;
 438       else if (exit_mod != (unsigned) i
 439                || desc->noloop_assumptions != NULL_RTX)
 440         n_copies = exit_mod + i + 2;
 441       else
 442         n_copies = i + 1;
 443
 444       if (n_copies < best_copies)
 445         {
 446           best_copies = n_copies;
 447           best_unroll = i;
 448         }
 449     }
 450
 451   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 452   loop->lpt_decision.times = best_unroll;
 453 }
 454
 455 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 456    The transformation does this:
 457
 458    for (i = 0; i < 102; i++)
 459      body;
 460
 461    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 462
 463    i = 0;
 464    body; i++;
 465    body; i++;
 466    while (i < 102)
 467      {
 468        body; i++;
 469        body; i++;
 470        body; i++;
 471        body; i++;
 472      }
 473   */
 474 static void
 475 unroll_loop_constant_iterations (struct loop *loop)
 476 {
 477   unsigned HOST_WIDE_INT niter;
 478   unsigned exit_mod;
 479   sbitmap wont_exit;
 480   unsigned i;
 481   edge e;
 482   unsigned max_unroll = loop->lpt_decision.times;
 483   struct niter_desc *desc = get_simple_loop_desc (loop);
 484   bool exit_at_end = loop_exit_at_end_p (loop);
 485   struct opt_info *opt_info = NULL;
 486   bool ok;
 487
 488   niter = desc->niter;
 489
 490   /* Should not get here (such loop should be peeled instead).  */
 491   gcc_assert (niter > max_unroll + 1);
 492
 493   exit_mod = niter % (max_unroll + 1);
 494
 495   wont_exit = sbitmap_alloc (max_unroll + 1);
 496   bitmap_ones (wont_exit);
 497
 498   auto_vec<edge> remove_edges;
 499   if (flag_split_ivs_in_unroller
 500       || flag_variable_expansion_in_unroller)
 501     opt_info = analyze_insns_in_loop (loop);
 502
 503   if (!exit_at_end)
 504     {
 505       /* The exit is not at the end of the loop; leave exit test
 506          in the first copy, so that the loops that start with test
 507          of exit condition have continuous body after unrolling.  */
 508
 509       if (dump_file)
 510         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 511
 512       /* Peel exit_mod iterations.  */
 513       bitmap_clear_bit (wont_exit, 0);
 514       if (desc->noloop_assumptions)
 515         bitmap_clear_bit (wont_exit, 1);
 516
 517       if (exit_mod)
 518         {
 519           opt_info_start_duplication (opt_info);
 520           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 521                                               exit_mod,
 522                                               wont_exit, desc->out_edge,
 523                                               &remove_edges,
 524                                               DLTHE_FLAG_UPDATE_FREQ
 525                                               | (opt_info && exit_mod > 1
 526                                                  ? DLTHE_RECORD_COPY_NUMBER
 527                                                    : 0));
 528           gcc_assert (ok);
 529
 530           if (opt_info && exit_mod > 1)
 531             apply_opt_in_copies (opt_info, exit_mod, false, false);
 532
 533           desc->noloop_assumptions = NULL_RTX;
 534           desc->niter -= exit_mod;
 535           loop->nb_iterations_upper_bound -= exit_mod;
 536           if (loop->any_estimate
 537               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 538             loop->nb_iterations_estimate -= exit_mod;
 539           else
 540             loop->any_estimate = false;
 541         }
 542
 543       bitmap_set_bit (wont_exit, 1);
 544     }
 545   else
 546     {
 547       /* Leave exit test in last copy, for the same reason as above if
 548          the loop tests the condition at the end of loop body.  */
 549
 550       if (dump_file)
 551         fprintf (dump_file, ";; Condition at end of loop.\n");
 552
 553       /* We know that niter >= max_unroll + 2; so we do not need to care of
 554          case when we would exit before reaching the loop.  So just peel
 555          exit_mod + 1 iterations.  */
 556       if (exit_mod != max_unroll
 557           || desc->noloop_assumptions)
 558         {
 559           bitmap_clear_bit (wont_exit, 0);
 560           if (desc->noloop_assumptions)
 561             bitmap_clear_bit (wont_exit, 1);
 562
 563           opt_info_start_duplication (opt_info);
 564           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 565                                               exit_mod + 1,
 566                                               wont_exit, desc->out_edge,
 567                                               &remove_edges,
 568                                               DLTHE_FLAG_UPDATE_FREQ
 569                                               | (opt_info && exit_mod > 0
 570                                                  ? DLTHE_RECORD_COPY_NUMBER
 571                                                    : 0));
 572           gcc_assert (ok);
 573
 574           if (opt_info && exit_mod > 0)
 575             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 576
 577           desc->niter -= exit_mod + 1;
 578           loop->nb_iterations_upper_bound -= exit_mod + 1;
 579           if (loop->any_estimate
 580               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 581             loop->nb_iterations_estimate -= exit_mod + 1;
 582           else
 583             loop->any_estimate = false;
 584           desc->noloop_assumptions = NULL_RTX;
 585
 586           bitmap_set_bit (wont_exit, 0);
 587           bitmap_set_bit (wont_exit, 1);
 588         }
 589
 590       bitmap_clear_bit (wont_exit, max_unroll);
 591     }
 592
 593   /* Now unroll the loop.  */
 594
 595   opt_info_start_duplication (opt_info);
 596   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 597                                       max_unroll,
 598                                       wont_exit, desc->out_edge,
 599                                       &remove_edges,
 600                                       DLTHE_FLAG_UPDATE_FREQ
 601                                       | (opt_info
 602                                          ? DLTHE_RECORD_COPY_NUMBER
 603                                            : 0));
 604   gcc_assert (ok);
 605
 606   if (opt_info)
 607     {
 608       apply_opt_in_copies (opt_info, max_unroll, true, true);
 609       free_opt_info (opt_info);
 610     }
 611
 612   free (wont_exit);
 613
 614   if (exit_at_end)
 615     {
 616       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 617       /* Find a new in and out edge; they are in the last copy we have made.  */
 618
 619       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 620         {
 621           desc->out_edge = EDGE_SUCC (exit_block, 0);
 622           desc->in_edge = EDGE_SUCC (exit_block, 1);
 623         }
 624       else
 625         {
 626           desc->out_edge = EDGE_SUCC (exit_block, 1);
 627           desc->in_edge = EDGE_SUCC (exit_block, 0);
 628         }
 629     }
 630
 631   desc->niter /= max_unroll + 1;
 632   loop->nb_iterations_upper_bound
 633     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 634   if (loop->any_estimate)
 635     loop->nb_iterations_estimate
 636       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 637   desc->niter_expr = GEN_INT (desc->niter);
 638
 639   /* Remove the edges.  */
 640   FOR_EACH_VEC_ELT (remove_edges, i, e)
 641     remove_path (e);
 642
 643   if (dump_file)
 644     fprintf (dump_file,
 645              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 646              max_unroll, num_loop_insns (loop));
 647 }
 648
 649 /* Decide whether to unroll LOOP iterating runtime computable number of times
 650    and how much.  */
 651 static void
 652 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 653 {
 654   unsigned nunroll, nunroll_by_av, i;
 655   struct niter_desc *desc;
 656   widest_int iterations;
 657
 658   if (!(flags & UAP_UNROLL))
 659     {
 660       /* We were not asked to, just return back silently.  */
 661       return;
 662     }
 663
 664   if (dump_file)
 665     fprintf (dump_file,
 666              "\n;; Considering unrolling loop with runtime "
 667              "computable number of iterations\n");
 668
 669   /* nunroll = total number of copies of the original loop body in
 670      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 671   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 672   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 673   if (nunroll > nunroll_by_av)
 674     nunroll = nunroll_by_av;
 675   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 676     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 677
 678   if (targetm.loop_unroll_adjust)
 679     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 680
 681   /* Skip big loops.  */
 682   if (nunroll <= 1)
 683     {
 684       if (dump_file)
 685         fprintf (dump_file, ";; Not considering loop, is too big\n");
 686       return;
 687     }
 688
 689   /* Check for simple loops.  */
 690   desc = get_simple_loop_desc (loop);
 691
 692   /* Check simpleness.  */
 693   if (!desc->simple_p || desc->assumptions)
 694     {
 695       if (dump_file)
 696         fprintf (dump_file,
 697                  ";; Unable to prove that the number of iterations "
 698                  "can be counted in runtime\n");
 699       return;
 700     }
 701
 702   if (desc->const_iter)
 703     {
 704       if (dump_file)
 705         fprintf (dump_file, ";; Loop iterates constant times\n");
 706       return;
 707     }
 708
 709   /* Check whether the loop rolls.  */
 710   if ((get_estimated_loop_iterations (loop, &iterations)
 711        || get_max_loop_iterations (loop, &iterations))
 712       && wi::ltu_p (iterations, 2 * nunroll))
 713     {
 714       if (dump_file)
 715         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 716       return;
 717     }
 718
 719   /* Success; now force nunroll to be power of 2, as we are unable to
 720      cope with overflows in computation of number of iterations.  */
 721   for (i = 1; 2 * i <= nunroll; i *= 2)
 722     continue;
 723
 724   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 725   loop->lpt_decision.times = i - 1;
 726 }
 727
 728 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 729    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 730    and NULL is returned instead.  */
 731
 732 basic_block
 733 split_edge_and_insert (edge e, rtx_insn *insns)
 734 {
 735   basic_block bb;
 736
 737   if (!insns)
 738     return NULL;
 739   bb = split_edge (e);
 740   emit_insn_after (insns, BB_END (bb));
 741
 742   /* ??? We used to assume that INSNS can contain control flow insns, and
 743      that we had to try to find sub basic blocks in BB to maintain a valid
 744      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 745      and call break_superblocks when going out of cfglayout mode.  But it
 746      turns out that this never happens; and that if it does ever happen,
 747      the verify_flow_info at the end of the RTL loop passes would fail.
 748
 749      There are two reasons why we expected we could have control flow insns
 750      in INSNS.  The first is when a comparison has to be done in parts, and
 751      the second is when the number of iterations is computed for loops with
 752      the number of iterations known at runtime.  In both cases, test cases
 753      to get control flow in INSNS appear to be impossible to construct:
 754
 755       * If do_compare_rtx_and_jump needs several branches to do comparison
 756         in a mode that needs comparison by parts, we cannot analyze the
 757         number of iterations of the loop, and we never get to unrolling it.
 758
 759       * The code in expand_divmod that was suspected to cause creation of
 760         branching code seems to be only accessed for signed division.  The
 761         divisions used by # of iterations analysis are always unsigned.
 762         Problems might arise on architectures that emits branching code
 763         for some operations that may appear in the unroller (especially
 764         for division), but we have no such architectures.
 765
 766      Considering all this, it was decided that we should for now assume
 767      that INSNS can in theory contain control flow insns, but in practice
 768      it never does.  So we don't handle the theoretical case, and should
 769      a real failure ever show up, we have a pretty good clue for how to
 770      fix it.  */
 771
 772   return bb;
 773 }
 774
 775 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 776    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 777    in order to create a jump.  */
 778
 779 static rtx_insn *
 780 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob,
 781                       rtx_insn *cinsn)
 782 {
 783   rtx_insn *seq, *jump;
 784   rtx cond;
 785   machine_mode mode;
 786
 787   mode = GET_MODE (op0);
 788   if (mode == VOIDmode)
 789     mode = GET_MODE (op1);
 790
 791   start_sequence ();
 792   if (GET_MODE_CLASS (mode) == MODE_CC)
 793     {
 794       /* A hack -- there seems to be no easy generic way how to make a
 795          conditional jump from a ccmode comparison.  */
 796       gcc_assert (cinsn);
 797       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 798       gcc_assert (GET_CODE (cond) == comp);
 799       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 800       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 801       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 802       jump = get_last_insn ();
 803       gcc_assert (JUMP_P (jump));
 804       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 805       LABEL_NUSES (JUMP_LABEL (jump))++;
 806       redirect_jump (jump, label, 0);
 807     }
 808   else
 809     {
 810       gcc_assert (!cinsn);
 811
 812       op0 = force_operand (op0, NULL_RTX);
 813       op1 = force_operand (op1, NULL_RTX);
 814       do_compare_rtx_and_jump (op0, op1, comp, 0,
 815                                mode, NULL_RTX, NULL_RTX, label, -1);
 816       jump = get_last_insn ();
 817       gcc_assert (JUMP_P (jump));
 818       JUMP_LABEL (jump) = label;
 819       LABEL_NUSES (label)++;
 820     }
 821   add_int_reg_note (jump, REG_BR_PROB, prob);
 822
 823   seq = get_insns ();
 824   end_sequence ();
 825
 826   return seq;
 827 }
 828
 829 /* Unroll LOOP for which we are able to count number of iterations in runtime
 830    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 831    extra care for case n < 0):
 832
 833    for (i = 0; i < n; i++)
 834      body;
 835
 836    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 837
 838    i = 0;
 839    mod = n % 4;
 840
 841    switch (mod)
 842      {
 843        case 3:
 844          body; i++;
 845        case 2:
 846          body; i++;
 847        case 1:
 848          body; i++;
 849        case 0: ;
 850      }
 851
 852    while (i < n)
 853      {
 854        body; i++;
 855        body; i++;
 856        body; i++;
 857        body; i++;
 858      }
 859    */
 860 static void
 861 unroll_loop_runtime_iterations (struct loop *loop)
 862 {
 863   rtx old_niter, niter, tmp;
 864   rtx_insn *init_code, *branch_code;
 865   unsigned i, j, p;
 866   basic_block preheader, *body, swtch, ezc_swtch;
 867   sbitmap wont_exit;
 868   int may_exit_copy;
 869   unsigned n_peel;
 870   edge e;
 871   bool extra_zero_check, last_may_exit;
 872   unsigned max_unroll = loop->lpt_decision.times;
 873   struct niter_desc *desc = get_simple_loop_desc (loop);
 874   bool exit_at_end = loop_exit_at_end_p (loop);
 875   struct opt_info *opt_info = NULL;
 876   bool ok;
 877
 878   if (flag_split_ivs_in_unroller
 879       || flag_variable_expansion_in_unroller)
 880     opt_info = analyze_insns_in_loop (loop);
 881
 882   /* Remember blocks whose dominators will have to be updated.  */
 883   auto_vec<basic_block> dom_bbs;
 884
 885   body = get_loop_body (loop);
 886   for (i = 0; i < loop->num_nodes; i++)
 887     {
 888       vec<basic_block> ldom;
 889       basic_block bb;
 890
 891       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 892       FOR_EACH_VEC_ELT (ldom, j, bb)
 893         if (!flow_bb_inside_loop_p (loop, bb))
 894           dom_bbs.safe_push (bb);
 895
 896       ldom.release ();
 897     }
 898   free (body);
 899
 900   if (!exit_at_end)
 901     {
 902       /* Leave exit in first copy (for explanation why see comment in
 903          unroll_loop_constant_iterations).  */
 904       may_exit_copy = 0;
 905       n_peel = max_unroll - 1;
 906       extra_zero_check = true;
 907       last_may_exit = false;
 908     }
 909   else
 910     {
 911       /* Leave exit in last copy (for explanation why see comment in
 912          unroll_loop_constant_iterations).  */
 913       may_exit_copy = max_unroll;
 914       n_peel = max_unroll;
 915       extra_zero_check = false;
 916       last_may_exit = true;
 917     }
 918
 919   /* Get expression for number of iterations.  */
 920   start_sequence ();
 921   old_niter = niter = gen_reg_rtx (desc->mode);
 922   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 923   if (tmp != niter)
 924     emit_move_insn (niter, tmp);
 925
 926   /* Count modulo by ANDing it with max_unroll; we use the fact that
 927      the number of unrollings is a power of two, and thus this is correct
 928      even if there is overflow in the computation.  */
 929   niter = expand_simple_binop (desc->mode, AND,
 930                                niter, gen_int_mode (max_unroll, desc->mode),
 931                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 932
 933   init_code = get_insns ();
 934   end_sequence ();
 935   unshare_all_rtl_in_chain (init_code);
 936
 937   /* Precondition the loop.  */
 938   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 939
 940   auto_vec<edge> remove_edges;
 941
 942   wont_exit = sbitmap_alloc (max_unroll + 2);
 943
 944   /* Peel the first copy of loop body (almost always we must leave exit test
 945      here; the only exception is when we have extra zero check and the number
 946      of iterations is reliable.  Also record the place of (possible) extra
 947      zero check.  */
 948   bitmap_clear (wont_exit);
 949   if (extra_zero_check
 950       && !desc->noloop_assumptions)
 951     bitmap_set_bit (wont_exit, 1);
 952   ezc_swtch = loop_preheader_edge (loop)->src;
 953   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 954                                       1, wont_exit, desc->out_edge,
 955                                       &remove_edges,
 956                                       DLTHE_FLAG_UPDATE_FREQ);
 957   gcc_assert (ok);
 958
 959   /* Record the place where switch will be built for preconditioning.  */
 960   swtch = split_edge (loop_preheader_edge (loop));
 961
 962   for (i = 0; i < n_peel; i++)
 963     {
 964       /* Peel the copy.  */
 965       bitmap_clear (wont_exit);
 966       if (i != n_peel - 1 || !last_may_exit)
 967         bitmap_set_bit (wont_exit, 1);
 968       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 969                                           1, wont_exit, desc->out_edge,
 970                                           &remove_edges,
 971                                           DLTHE_FLAG_UPDATE_FREQ);
 972       gcc_assert (ok);
 973
 974       /* Create item for switch.  */
 975       j = n_peel - i - (extra_zero_check ? 0 : 1);
 976       p = REG_BR_PROB_BASE / (i + 2);
 977
 978       preheader = split_edge (loop_preheader_edge (loop));
 979       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 980                                           block_label (preheader), p,
 981                                           NULL);
 982
 983       /* We rely on the fact that the compare and jump cannot be optimized out,
 984          and hence the cfg we create is correct.  */
 985       gcc_assert (branch_code != NULL_RTX);
 986
 987       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
 988       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
 989       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
 990       e = make_edge (swtch, preheader,
 991                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
 992       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
 993       e->probability = p;
 994     }
 995
 996   if (extra_zero_check)
 997     {
 998       /* Add branch for zero iterations.  */
 999       p = REG_BR_PROB_BASE / (max_unroll + 1);
1000       swtch = ezc_swtch;
1001       preheader = split_edge (loop_preheader_edge (loop));
1002       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1003                                           block_label (preheader), p,
1004                                           NULL);
1005       gcc_assert (branch_code != NULL_RTX);
1006
1007       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1008       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1009       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1010       e = make_edge (swtch, preheader,
1011                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1012       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1013       e->probability = p;
1014     }
1015
1016   /* Recount dominators for outer blocks.  */
1017   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1018
1019   /* And unroll loop.  */
1020
1021   bitmap_ones (wont_exit);
1022   bitmap_clear_bit (wont_exit, may_exit_copy);
1023   opt_info_start_duplication (opt_info);
1024
1025   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1026                                       max_unroll,
1027                                       wont_exit, desc->out_edge,
1028                                       &remove_edges,
1029                                       DLTHE_FLAG_UPDATE_FREQ
1030                                       | (opt_info
1031                                          ? DLTHE_RECORD_COPY_NUMBER
1032                                            : 0));
1033   gcc_assert (ok);
1034
1035   if (opt_info)
1036     {
1037       apply_opt_in_copies (opt_info, max_unroll, true, true);
1038       free_opt_info (opt_info);
1039     }
1040
1041   free (wont_exit);
1042
1043   if (exit_at_end)
1044     {
1045       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1046       /* Find a new in and out edge; they are in the last copy we have
1047          made.  */
1048
1049       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1050         {
1051           desc->out_edge = EDGE_SUCC (exit_block, 0);
1052           desc->in_edge = EDGE_SUCC (exit_block, 1);
1053         }
1054       else
1055         {
1056           desc->out_edge = EDGE_SUCC (exit_block, 1);
1057           desc->in_edge = EDGE_SUCC (exit_block, 0);
1058         }
1059     }
1060
1061   /* Remove the edges.  */
1062   FOR_EACH_VEC_ELT (remove_edges, i, e)
1063     remove_path (e);
1064
1065   /* We must be careful when updating the number of iterations due to
1066      preconditioning and the fact that the value must be valid at entry
1067      of the loop.  After passing through the above code, we see that
1068      the correct new number of iterations is this:  */
1069   gcc_assert (!desc->const_iter);
1070   desc->niter_expr =
1071     simplify_gen_binary (UDIV, desc->mode, old_niter,
1072                          gen_int_mode (max_unroll + 1, desc->mode));
1073   loop->nb_iterations_upper_bound
1074     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1075   if (loop->any_estimate)
1076     loop->nb_iterations_estimate
1077       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1078   if (exit_at_end)
1079     {
1080       desc->niter_expr =
1081         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1082       desc->noloop_assumptions = NULL_RTX;
1083       --loop->nb_iterations_upper_bound;
1084       if (loop->any_estimate
1085           && loop->nb_iterations_estimate != 0)
1086         --loop->nb_iterations_estimate;
1087       else
1088         loop->any_estimate = false;
1089     }
1090
1091   if (dump_file)
1092     fprintf (dump_file,
1093              ";; Unrolled loop %d times, counting # of iterations "
1094              "in runtime, %i insns\n",
1095              max_unroll, num_loop_insns (loop));
1096 }
1097
1098 /* Decide whether to unroll LOOP stupidly and how much.  */
1099 static void
1100 decide_unroll_stupid (struct loop *loop, int flags)
1101 {
1102   unsigned nunroll, nunroll_by_av, i;
1103   struct niter_desc *desc;
1104   widest_int iterations;
1105
1106   if (!(flags & UAP_UNROLL_ALL))
1107     {
1108       /* We were not asked to, just return back silently.  */
1109       return;
1110     }
1111
1112   if (dump_file)
1113     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1114
1115   /* nunroll = total number of copies of the original loop body in
1116      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1117   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1118   nunroll_by_av
1119     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1120   if (nunroll > nunroll_by_av)
1121     nunroll = nunroll_by_av;
1122   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1123     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1124
1125   if (targetm.loop_unroll_adjust)
1126     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1127
1128   /* Skip big loops.  */
1129   if (nunroll <= 1)
1130     {
1131       if (dump_file)
1132         fprintf (dump_file, ";; Not considering loop, is too big\n");
1133       return;
1134     }
1135
1136   /* Check for simple loops.  */
1137   desc = get_simple_loop_desc (loop);
1138
1139   /* Check simpleness.  */
1140   if (desc->simple_p && !desc->assumptions)
1141     {
1142       if (dump_file)
1143         fprintf (dump_file, ";; The loop is simple\n");
1144       return;
1145     }
1146
1147   /* Do not unroll loops with branches inside -- it increases number
1148      of mispredicts.
1149      TODO: this heuristic needs tunning; call inside the loop body
1150      is also relatively good reason to not unroll.  */
1151   if (num_loop_branches (loop) > 1)
1152     {
1153       if (dump_file)
1154         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1155       return;
1156     }
1157
1158   /* Check whether the loop rolls.  */
1159   if ((get_estimated_loop_iterations (loop, &iterations)
1160        || get_max_loop_iterations (loop, &iterations))
1161       && wi::ltu_p (iterations, 2 * nunroll))
1162     {
1163       if (dump_file)
1164         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1165       return;
1166     }
1167
1168   /* Success.  Now force nunroll to be power of 2, as it seems that this
1169      improves results (partially because of better alignments, partially
1170      because of some dark magic).  */
1171   for (i = 1; 2 * i <= nunroll; i *= 2)
1172     continue;
1173
1174   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1175   loop->lpt_decision.times = i - 1;
1176 }
1177
1178 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1179
1180    while (cond)
1181      body;
1182
1183    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1184
1185    while (cond)
1186      {
1187        body;
1188        if (!cond) break;
1189        body;
1190        if (!cond) break;
1191        body;
1192        if (!cond) break;
1193        body;
1194      }
1195    */
1196 static void
1197 unroll_loop_stupid (struct loop *loop)
1198 {
1199   sbitmap wont_exit;
1200   unsigned nunroll = loop->lpt_decision.times;
1201   struct niter_desc *desc = get_simple_loop_desc (loop);
1202   struct opt_info *opt_info = NULL;
1203   bool ok;
1204
1205   if (flag_split_ivs_in_unroller
1206       || flag_variable_expansion_in_unroller)
1207     opt_info = analyze_insns_in_loop (loop);
1208
1209
1210   wont_exit = sbitmap_alloc (nunroll + 1);
1211   bitmap_clear (wont_exit);
1212   opt_info_start_duplication (opt_info);
1213
1214   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1215                                       nunroll, wont_exit,
1216                                       NULL, NULL,
1217                                       DLTHE_FLAG_UPDATE_FREQ
1218                                       | (opt_info
1219                                          ? DLTHE_RECORD_COPY_NUMBER
1220                                            : 0));
1221   gcc_assert (ok);
1222
1223   if (opt_info)
1224     {
1225       apply_opt_in_copies (opt_info, nunroll, true, true);
1226       free_opt_info (opt_info);
1227     }
1228
1229   free (wont_exit);
1230
1231   if (desc->simple_p)
1232     {
1233       /* We indeed may get here provided that there are nontrivial assumptions
1234          for a loop to be really simple.  We could update the counts, but the
1235          problem is that we are unable to decide which exit will be taken
1236          (not really true in case the number of iterations is constant,
1237          but no one will do anything with this information, so we do not
1238          worry about it).  */
1239       desc->simple_p = false;
1240     }
1241
1242   if (dump_file)
1243     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1244              nunroll, num_loop_insns (loop));
1245 }
1246
1247 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1248    Set *DEBUG_USES to the number of debug insns that reference the
1249    variable.  */
1250
1251 static bool
1252 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1253                                   int *debug_uses)
1254 {
1255   basic_block *body, bb;
1256   unsigned i;
1257   int count_ref = 0;
1258   rtx_insn *insn;
1259
1260   body = get_loop_body (loop);
1261   for (i = 0; i < loop->num_nodes; i++)
1262     {
1263       bb = body[i];
1264
1265       FOR_BB_INSNS (bb, insn)
1266         if (!rtx_referenced_p (reg, insn))
1267           continue;
1268         else if (DEBUG_INSN_P (insn))
1269           ++*debug_uses;
1270         else if (++count_ref > 1)
1271           break;
1272     }
1273   free (body);
1274   return (count_ref  == 1);
1275 }
1276
1277 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1278
1279 static void
1280 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1281 {
1282   basic_block *body, bb;
1283   unsigned i;
1284   rtx_insn *insn;
1285
1286   body = get_loop_body (loop);
1287   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1288     {
1289       bb = body[i];
1290
1291       FOR_BB_INSNS (bb, insn)
1292         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1293           continue;
1294         else
1295           {
1296             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1297                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1298             if (!--debug_uses)
1299               break;
1300           }
1301     }
1302   free (body);
1303 }
1304
1305 /* Determine whether INSN contains an accumulator
1306    which can be expanded into separate copies,
1307    one for each copy of the LOOP body.
1308
1309    for (i = 0 ; i < n; i++)
1310      sum += a[i];
1311
1312    ==>
1313
1314    sum += a[i]
1315    ....
1316    i = i+1;
1317    sum1 += a[i]
1318    ....
1319    i = i+1
1320    sum2 += a[i];
1321    ....
1322
1323    Return NULL if INSN contains no opportunity for expansion of accumulator.
1324    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1325    information and return a pointer to it.
1326 */
1327
1328 static struct var_to_expand *
1329 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1330 {
1331   rtx set, dest, src;
1332   struct var_to_expand *ves;
1333   unsigned accum_pos;
1334   enum rtx_code code;
1335   int debug_uses = 0;
1336
1337   set = single_set (insn);
1338   if (!set)
1339     return NULL;
1340
1341   dest = SET_DEST (set);
1342   src = SET_SRC (set);
1343   code = GET_CODE (src);
1344
1345   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1346     return NULL;
1347
1348   if (FLOAT_MODE_P (GET_MODE (dest)))
1349     {
1350       if (!flag_associative_math)
1351         return NULL;
1352       /* In the case of FMA, we're also changing the rounding.  */
1353       if (code == FMA && !flag_unsafe_math_optimizations)
1354         return NULL;
1355     }
1356
1357   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1358      in MD.  But if there is no optab to generate the insn, we can not
1359      perform the variable expansion.  This can happen if an MD provides
1360      an insn but not a named pattern to generate it, for example to avoid
1361      producing code that needs additional mode switches like for x87/mmx.
1362
1363      So we check have_insn_for which looks for an optab for the operation
1364      in SRC.  If it doesn't exist, we can't perform the expansion even
1365      though INSN is valid.  */
1366   if (!have_insn_for (code, GET_MODE (src)))
1367     return NULL;
1368
1369   if (!REG_P (dest)
1370       && !(GET_CODE (dest) == SUBREG
1371            && REG_P (SUBREG_REG (dest))))
1372     return NULL;
1373
1374   /* Find the accumulator use within the operation.  */
1375   if (code == FMA)
1376     {
1377       /* We only support accumulation via FMA in the ADD position.  */
1378       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1379         return NULL;
1380       accum_pos = 2;
1381     }
1382   else if (rtx_equal_p (dest, XEXP (src, 0)))
1383     accum_pos = 0;
1384   else if (rtx_equal_p (dest, XEXP (src, 1)))
1385     {
1386       /* The method of expansion that we are using; which includes the
1387          initialization of the expansions with zero and the summation of
1388          the expansions at the end of the computation will yield wrong
1389          results for (x = something - x) thus avoid using it in that case.  */
1390       if (code == MINUS)
1391         return NULL;
1392       accum_pos = 1;
1393     }
1394   else
1395     return NULL;
1396
1397   /* It must not otherwise be used.  */
1398   if (code == FMA)
1399     {
1400       if (rtx_referenced_p (dest, XEXP (src, 0))
1401           || rtx_referenced_p (dest, XEXP (src, 1)))
1402         return NULL;
1403     }
1404   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1405     return NULL;
1406
1407   /* It must be used in exactly one insn.  */
1408   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1409     return NULL;
1410
1411   if (dump_file)
1412     {
1413       fprintf (dump_file, "\n;; Expanding Accumulator ");
1414       print_rtl (dump_file, dest);
1415       fprintf (dump_file, "\n");
1416     }
1417
1418   if (debug_uses)
1419     /* Instead of resetting the debug insns, we could replace each
1420        debug use in the loop with the sum or product of all expanded
1421        accummulators.  Since we'll only know of all expansions at the
1422        end, we'd have to keep track of which vars_to_expand a debug
1423        insn in the loop references, take note of each copy of the
1424        debug insn during unrolling, and when it's all done, compute
1425        the sum or product of each variable and adjust the original
1426        debug insn and each copy thereof.  What a pain!  */
1427     reset_debug_uses_in_loop (loop, dest, debug_uses);
1428
1429   /* Record the accumulator to expand.  */
1430   ves = XNEW (struct var_to_expand);
1431   ves->insn = insn;
1432   ves->reg = copy_rtx (dest);
1433   ves->var_expansions.create (1);
1434   ves->next = NULL;
1435   ves->op = GET_CODE (src);
1436   ves->expansion_count = 0;
1437   ves->reuse_expansion = 0;
1438   return ves;
1439 }
1440
1441 /* Determine whether there is an induction variable in INSN that
1442    we would like to split during unrolling.
1443
1444    I.e. replace
1445
1446    i = i + 1;
1447    ...
1448    i = i + 1;
1449    ...
1450    i = i + 1;
1451    ...
1452
1453    type chains by
1454
1455    i0 = i + 1
1456    ...
1457    i = i0 + 1
1458    ...
1459    i = i0 + 2
1460    ...
1461
1462    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1463    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1464    pointer to it.  */
1465
1466 static struct iv_to_split *
1467 analyze_iv_to_split_insn (rtx_insn *insn)
1468 {
1469   rtx set, dest;
1470   struct rtx_iv iv;
1471   struct iv_to_split *ivts;
1472   bool ok;
1473
1474   /* For now we just split the basic induction variables.  Later this may be
1475      extended for example by selecting also addresses of memory references.  */
1476   set = single_set (insn);
1477   if (!set)
1478     return NULL;
1479
1480   dest = SET_DEST (set);
1481   if (!REG_P (dest))
1482     return NULL;
1483
1484   if (!biv_p (insn, dest))
1485     return NULL;
1486
1487   ok = iv_analyze_result (insn, dest, &iv);
1488
1489   /* This used to be an assert under the assumption that if biv_p returns
1490      true that iv_analyze_result must also return true.  However, that
1491      assumption is not strictly correct as evidenced by pr25569.
1492
1493      Returning NULL when iv_analyze_result returns false is safe and
1494      avoids the problems in pr25569 until the iv_analyze_* routines
1495      can be fixed, which is apparently hard and time consuming
1496      according to their author.  */
1497   if (! ok)
1498     return NULL;
1499
1500   if (iv.step == const0_rtx
1501       || iv.mode != iv.extend_mode)
1502     return NULL;
1503
1504   /* Record the insn to split.  */
1505   ivts = XNEW (struct iv_to_split);
1506   ivts->insn = insn;
1507   ivts->orig_var = dest;
1508   ivts->base_var = NULL_RTX;
1509   ivts->step = iv.step;
1510   ivts->next = NULL;
1511
1512   return ivts;
1513 }
1514
1515 /* Determines which of insns in LOOP can be optimized.
1516    Return a OPT_INFO struct with the relevant hash tables filled
1517    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1518    is undefined for the return value.  */
1519
1520 static struct opt_info *
1521 analyze_insns_in_loop (struct loop *loop)
1522 {
1523   basic_block *body, bb;
1524   unsigned i;
1525   struct opt_info *opt_info = XCNEW (struct opt_info);
1526   rtx_insn *insn;
1527   struct iv_to_split *ivts = NULL;
1528   struct var_to_expand *ves = NULL;
1529   iv_to_split **slot1;
1530   var_to_expand **slot2;
1531   vec<edge> edges = get_loop_exit_edges (loop);
1532   edge exit;
1533   bool can_apply = false;
1534
1535   iv_analysis_loop_init (loop);
1536
1537   body = get_loop_body (loop);
1538
1539   if (flag_split_ivs_in_unroller)
1540     {
1541       opt_info->insns_to_split
1542         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1543       opt_info->iv_to_split_head = NULL;
1544       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1545     }
1546
1547   /* Record the loop exit bb and loop preheader before the unrolling.  */
1548   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1549
1550   if (edges.length () == 1)
1551     {
1552       exit = edges[0];
1553       if (!(exit->flags & EDGE_COMPLEX))
1554         {
1555           opt_info->loop_exit = split_edge (exit);
1556           can_apply = true;
1557         }
1558     }
1559
1560   if (flag_variable_expansion_in_unroller
1561       && can_apply)
1562     {
1563       opt_info->insns_with_var_to_expand
1564         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1565       opt_info->var_to_expand_head = NULL;
1566       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1567     }
1568
1569   for (i = 0; i < loop->num_nodes; i++)
1570     {
1571       bb = body[i];
1572       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1573         continue;
1574
1575       FOR_BB_INSNS (bb, insn)
1576       {
1577         if (!INSN_P (insn))
1578           continue;
1579
1580         if (opt_info->insns_to_split)
1581           ivts = analyze_iv_to_split_insn (insn);
1582
1583         if (ivts)
1584           {
1585             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1586             gcc_assert (*slot1 == NULL);
1587             *slot1 = ivts;
1588             *opt_info->iv_to_split_tail = ivts;
1589             opt_info->iv_to_split_tail = &ivts->next;
1590             continue;
1591           }
1592
1593         if (opt_info->insns_with_var_to_expand)
1594           ves = analyze_insn_to_expand_var (loop, insn);
1595
1596         if (ves)
1597           {
1598             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1599             gcc_assert (*slot2 == NULL);
1600             *slot2 = ves;
1601             *opt_info->var_to_expand_tail = ves;
1602             opt_info->var_to_expand_tail = &ves->next;
1603           }
1604       }
1605     }
1606
1607   edges.release ();
1608   free (body);
1609   return opt_info;
1610 }
1611
1612 /* Called just before loop duplication.  Records start of duplicated area
1613    to OPT_INFO.  */
1614
1615 static void
1616 opt_info_start_duplication (struct opt_info *opt_info)
1617 {
1618   if (opt_info)
1619     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1620 }
1621
1622 /* Determine the number of iterations between initialization of the base
1623    variable and the current copy (N_COPY).  N_COPIES is the total number
1624    of newly created copies.  UNROLLING is true if we are unrolling
1625    (not peeling) the loop.  */
1626
1627 static unsigned
1628 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1629 {
1630   if (unrolling)
1631     {
1632       /* If we are unrolling, initialization is done in the original loop
1633          body (number 0).  */
1634       return n_copy;
1635     }
1636   else
1637     {
1638       /* If we are peeling, the copy in that the initialization occurs has
1639          number 1.  The original loop (number 0) is the last.  */
1640       if (n_copy)
1641         return n_copy - 1;
1642       else
1643         return n_copies;
1644     }
1645 }
1646
1647 /* Allocate basic variable for the induction variable chain.  */
1648
1649 static void
1650 allocate_basic_variable (struct iv_to_split *ivts)
1651 {
1652   rtx expr = SET_SRC (single_set (ivts->insn));
1653
1654   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1655 }
1656
1657 /* Insert initialization of basic variable of IVTS before INSN, taking
1658    the initial value from INSN.  */
1659
1660 static void
1661 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1662 {
1663   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1664   rtx_insn *seq;
1665
1666   start_sequence ();
1667   expr = force_operand (expr, ivts->base_var);
1668   if (expr != ivts->base_var)
1669     emit_move_insn (ivts->base_var, expr);
1670   seq = get_insns ();
1671   end_sequence ();
1672
1673   emit_insn_before (seq, insn);
1674 }
1675
1676 /* Replace the use of induction variable described in IVTS in INSN
1677    by base variable + DELTA * step.  */
1678
1679 static void
1680 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1681 {
1682   rtx expr, *loc, incr, var;
1683   rtx_insn *seq;
1684   machine_mode mode = GET_MODE (ivts->base_var);
1685   rtx src, dest, set;
1686
1687   /* Construct base + DELTA * step.  */
1688   if (!delta)
1689     expr = ivts->base_var;
1690   else
1691     {
1692       incr = simplify_gen_binary (MULT, mode,
1693                                   ivts->step, gen_int_mode (delta, mode));
1694       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1695                                   ivts->base_var, incr);
1696     }
1697
1698   /* Figure out where to do the replacement.  */
1699   loc = &SET_SRC (single_set (insn));
1700
1701   /* If we can make the replacement right away, we're done.  */
1702   if (validate_change (insn, loc, expr, 0))
1703     return;
1704
1705   /* Otherwise, force EXPR into a register and try again.  */
1706   start_sequence ();
1707   var = gen_reg_rtx (mode);
1708   expr = force_operand (expr, var);
1709   if (expr != var)
1710     emit_move_insn (var, expr);
1711   seq = get_insns ();
1712   end_sequence ();
1713   emit_insn_before (seq, insn);
1714
1715   if (validate_change (insn, loc, var, 0))
1716     return;
1717
1718   /* The last chance.  Try recreating the assignment in insn
1719      completely from scratch.  */
1720   set = single_set (insn);
1721   gcc_assert (set);
1722
1723   start_sequence ();
1724   *loc = var;
1725   src = copy_rtx (SET_SRC (set));
1726   dest = copy_rtx (SET_DEST (set));
1727   src = force_operand (src, dest);
1728   if (src != dest)
1729     emit_move_insn (dest, src);
1730   seq = get_insns ();
1731   end_sequence ();
1732
1733   emit_insn_before (seq, insn);
1734   delete_insn (insn);
1735 }
1736
1737
1738 /* Return one expansion of the accumulator recorded in struct VE.  */
1739
1740 static rtx
1741 get_expansion (struct var_to_expand *ve)
1742 {
1743   rtx reg;
1744
1745   if (ve->reuse_expansion == 0)
1746     reg = ve->reg;
1747   else
1748     reg = ve->var_expansions[ve->reuse_expansion - 1];
1749
1750   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1751     ve->reuse_expansion = 0;
1752   else
1753     ve->reuse_expansion++;
1754
1755   return reg;
1756 }
1757
1758
1759 /* Given INSN replace the uses of the accumulator recorded in VE
1760    with a new register.  */
1761
1762 static void
1763 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1764 {
1765   rtx new_reg, set;
1766   bool really_new_expansion = false;
1767
1768   set = single_set (insn);
1769   gcc_assert (set);
1770
1771   /* Generate a new register only if the expansion limit has not been
1772      reached.  Else reuse an already existing expansion.  */
1773   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1774     {
1775       really_new_expansion = true;
1776       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1777     }
1778   else
1779     new_reg = get_expansion (ve);
1780
1781   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1782   if (apply_change_group ())
1783     if (really_new_expansion)
1784       {
1785         ve->var_expansions.safe_push (new_reg);
1786         ve->expansion_count++;
1787       }
1788 }
1789
1790 /* Initialize the variable expansions in loop preheader.  PLACE is the
1791    loop-preheader basic block where the initialization of the
1792    expansions should take place.  The expansions are initialized with
1793    (-0) when the operation is plus or minus to honor sign zero.  This
1794    way we can prevent cases where the sign of the final result is
1795    effected by the sign of the expansion.  Here is an example to
1796    demonstrate this:
1797
1798    for (i = 0 ; i < n; i++)
1799      sum += something;
1800
1801    ==>
1802
1803    sum += something
1804    ....
1805    i = i+1;
1806    sum1 += something
1807    ....
1808    i = i+1
1809    sum2 += something;
1810    ....
1811
1812    When SUM is initialized with -zero and SOMETHING is also -zero; the
1813    final result of sum should be -zero thus the expansions sum1 and sum2
1814    should be initialized with -zero as well (otherwise we will get +zero
1815    as the final result).  */
1816
1817 static void
1818 insert_var_expansion_initialization (struct var_to_expand *ve,
1819                                      basic_block place)
1820 {
1821   rtx_insn *seq;
1822   rtx var, zero_init;
1823   unsigned i;
1824   machine_mode mode = GET_MODE (ve->reg);
1825   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1826
1827   if (ve->var_expansions.length () == 0)
1828     return;
1829
1830   start_sequence ();
1831   switch (ve->op)
1832     {
1833     case FMA:
1834       /* Note that we only accumulate FMA via the ADD operand.  */
1835     case PLUS:
1836     case MINUS:
1837       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1838         {
1839           if (honor_signed_zero_p)
1840             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1841           else
1842             zero_init = CONST0_RTX (mode);
1843           emit_move_insn (var, zero_init);
1844         }
1845       break;
1846
1847     case MULT:
1848       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1849         {
1850           zero_init = CONST1_RTX (GET_MODE (var));
1851           emit_move_insn (var, zero_init);
1852         }
1853       break;
1854
1855     default:
1856       gcc_unreachable ();
1857     }
1858
1859   seq = get_insns ();
1860   end_sequence ();
1861
1862   emit_insn_after (seq, BB_END (place));
1863 }
1864
1865 /* Combine the variable expansions at the loop exit.  PLACE is the
1866    loop exit basic block where the summation of the expansions should
1867    take place.  */
1868
1869 static void
1870 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1871 {
1872   rtx sum = ve->reg;
1873   rtx expr, var;
1874   rtx_insn *seq, *insn;
1875   unsigned i;
1876
1877   if (ve->var_expansions.length () == 0)
1878     return;
1879
1880   start_sequence ();
1881   switch (ve->op)
1882     {
1883     case FMA:
1884       /* Note that we only accumulate FMA via the ADD operand.  */
1885     case PLUS:
1886     case MINUS:
1887       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1888         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1889       break;
1890
1891     case MULT:
1892       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1893         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1894       break;
1895
1896     default:
1897       gcc_unreachable ();
1898     }
1899
1900   expr = force_operand (sum, ve->reg);
1901   if (expr != ve->reg)
1902     emit_move_insn (ve->reg, expr);
1903   seq = get_insns ();
1904   end_sequence ();
1905
1906   insn = BB_HEAD (place);
1907   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1908     insn = NEXT_INSN (insn);
1909
1910   emit_insn_after (seq, insn);
1911 }
1912
1913 /* Strip away REG_EQUAL notes for IVs we're splitting.
1914
1915    Updating REG_EQUAL notes for IVs we split is tricky: We
1916    cannot tell until after unrolling, DF-rescanning, and liveness
1917    updating, whether an EQ_USE is reached by the split IV while
1918    the IV reg is still live.  See PR55006.
1919
1920    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1921    because RTL loop-iv requires us to defer rescanning insns and
1922    any notes attached to them.  So resort to old techniques...  */
1923
1924 static void
1925 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1926 {
1927   struct iv_to_split *ivts;
1928   rtx note = find_reg_equal_equiv_note (insn);
1929   if (! note)
1930     return;
1931   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1932     if (reg_mentioned_p (ivts->orig_var, note))
1933       {
1934         remove_note (insn, note);
1935         return;
1936       }
1937 }
1938
1939 /* Apply loop optimizations in loop copies using the
1940    data which gathered during the unrolling.  Structure
1941    OPT_INFO record that data.
1942
1943    UNROLLING is true if we unrolled (not peeled) the loop.
1944    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1945    the loop (as it should happen in complete unrolling, but not in ordinary
1946    peeling of the loop).  */
1947
1948 static void
1949 apply_opt_in_copies (struct opt_info *opt_info,
1950                      unsigned n_copies, bool unrolling,
1951                      bool rewrite_original_loop)
1952 {
1953   unsigned i, delta;
1954   basic_block bb, orig_bb;
1955   rtx_insn *insn, *orig_insn, *next;
1956   struct iv_to_split ivts_templ, *ivts;
1957   struct var_to_expand ve_templ, *ves;
1958
1959   /* Sanity check -- we need to put initialization in the original loop
1960      body.  */
1961   gcc_assert (!unrolling || rewrite_original_loop);
1962
1963   /* Allocate the basic variables (i0).  */
1964   if (opt_info->insns_to_split)
1965     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1966       allocate_basic_variable (ivts);
1967
1968   for (i = opt_info->first_new_block;
1969        i < (unsigned) last_basic_block_for_fn (cfun);
1970        i++)
1971     {
1972       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1973       orig_bb = get_bb_original (bb);
1974
1975       /* bb->aux holds position in copy sequence initialized by
1976          duplicate_loop_to_header_edge.  */
1977       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1978                                         unrolling);
1979       bb->aux = 0;
1980       orig_insn = BB_HEAD (orig_bb);
1981       FOR_BB_INSNS_SAFE (bb, insn, next)
1982         {
1983           if (!INSN_P (insn)
1984               || (DEBUG_INSN_P (insn)
1985                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
1986             continue;
1987
1988           while (!INSN_P (orig_insn)
1989                  || (DEBUG_INSN_P (orig_insn)
1990                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
1991                          == LABEL_DECL)))
1992             orig_insn = NEXT_INSN (orig_insn);
1993
1994           ivts_templ.insn = orig_insn;
1995           ve_templ.insn = orig_insn;
1996
1997           /* Apply splitting iv optimization.  */
1998           if (opt_info->insns_to_split)
1999             {
2000               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2001
2002               ivts = opt_info->insns_to_split->find (&ivts_templ);
2003
2004               if (ivts)
2005                 {
2006                   gcc_assert (GET_CODE (PATTERN (insn))
2007                               == GET_CODE (PATTERN (orig_insn)));
2008
2009                   if (!delta)
2010                     insert_base_initialization (ivts, insn);
2011                   split_iv (ivts, insn, delta);
2012                 }
2013             }
2014           /* Apply variable expansion optimization.  */
2015           if (unrolling && opt_info->insns_with_var_to_expand)
2016             {
2017               ves = (struct var_to_expand *)
2018                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2019               if (ves)
2020                 {
2021                   gcc_assert (GET_CODE (PATTERN (insn))
2022                               == GET_CODE (PATTERN (orig_insn)));
2023                   expand_var_during_unrolling (ves, insn);
2024                 }
2025             }
2026           orig_insn = NEXT_INSN (orig_insn);
2027         }
2028     }
2029
2030   if (!rewrite_original_loop)
2031     return;
2032
2033   /* Initialize the variable expansions in the loop preheader
2034      and take care of combining them at the loop exit.  */
2035   if (opt_info->insns_with_var_to_expand)
2036     {
2037       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2038         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2039       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2040         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2041     }
2042
2043   /* Rewrite also the original loop body.  Find them as originals of the blocks
2044      in the last copied iteration, i.e. those that have
2045      get_bb_copy (get_bb_original (bb)) == bb.  */
2046   for (i = opt_info->first_new_block;
2047        i < (unsigned) last_basic_block_for_fn (cfun);
2048        i++)
2049     {
2050       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2051       orig_bb = get_bb_original (bb);
2052       if (get_bb_copy (orig_bb) != bb)
2053         continue;
2054
2055       delta = determine_split_iv_delta (0, n_copies, unrolling);
2056       for (orig_insn = BB_HEAD (orig_bb);
2057            orig_insn != NEXT_INSN (BB_END (bb));
2058            orig_insn = next)
2059         {
2060           next = NEXT_INSN (orig_insn);
2061
2062           if (!INSN_P (orig_insn))
2063             continue;
2064
2065           ivts_templ.insn = orig_insn;
2066           if (opt_info->insns_to_split)
2067             {
2068               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2069
2070               ivts = (struct iv_to_split *)
2071                 opt_info->insns_to_split->find (&ivts_templ);
2072               if (ivts)
2073                 {
2074                   if (!delta)
2075                     insert_base_initialization (ivts, orig_insn);
2076                   split_iv (ivts, orig_insn, delta);
2077                   continue;
2078                 }
2079             }
2080
2081         }
2082     }
2083 }
2084
2085 /* Release OPT_INFO.  */
2086
2087 static void
2088 free_opt_info (struct opt_info *opt_info)
2089 {
2090   delete opt_info->insns_to_split;
2091   opt_info->insns_to_split = NULL;
2092   if (opt_info->insns_with_var_to_expand)
2093     {
2094       struct var_to_expand *ves;
2095
2096       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2097         ves->var_expansions.release ();
2098       delete opt_info->insns_with_var_to_expand;
2099       opt_info->insns_with_var_to_expand = NULL;
2100     }
2101   free (opt_info);
2102 }