gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hash-set.h"
  26 #include "machmode.h"
  27 #include "vec.h"
  28 #include "double-int.h"
  29 #include "input.h"
  30 #include "alias.h"
  31 #include "symtab.h"
  32 #include "wide-int.h"
  33 #include "inchash.h"
  34 #include "tree.h"
  35 #include "hard-reg-set.h"
  36 #include "obstack.h"
  37 #include "profile.h"
  38 #include "predict.h"
  39 #include "function.h"
  40 #include "dominance.h"
  41 #include "cfg.h"
  42 #include "cfgrtl.h"
  43 #include "basic-block.h"
  44 #include "cfgloop.h"
  45 #include "params.h"
  46 #include "insn-codes.h"
  47 #include "optabs.h"
  48 #include "hashtab.h"
  49 #include "flags.h"
  50 #include "statistics.h"
  51 #include "real.h"
  52 #include "fixed-value.h"
  53 #include "insn-config.h"
  54 #include "expmed.h"
  55 #include "dojump.h"
  56 #include "explow.h"
  57 #include "calls.h"
  58 #include "emit-rtl.h"
  59 #include "varasm.h"
  60 #include "stmt.h"
  61 #include "expr.h"
  62 #include "hash-table.h"
  63 #include "recog.h"
  64 #include "target.h"
  65 #include "dumpfile.h"
  66
  67 /* This pass performs loop unrolling.  We only perform this
  68    optimization on innermost loops (with single exception) because
  69    the impact on performance is greatest here, and we want to avoid
  70    unnecessary code size growth.  The gain is caused by greater sequentiality
  71    of code, better code to optimize for further passes and in some cases
  72    by fewer testings of exit conditions.  The main problem is code growth,
  73    that impacts performance negatively due to effect of caches.
  74
  75    What we do:
  76
  77    -- unrolling of loops that roll constant times; this is almost always
  78       win, as we get rid of exit condition tests.
  79    -- unrolling of loops that roll number of times that we can compute
  80       in runtime; we also get rid of exit condition tests here, but there
  81       is the extra expense for calculating the number of iterations
  82    -- simple unrolling of remaining loops; this is performed only if we
  83       are asked to, as the gain is questionable in this case and often
  84       it may even slow down the code
  85    For more detailed descriptions of each of those, see comments at
  86    appropriate function below.
  87
  88    There is a lot of parameters (defined and described in params.def) that
  89    control how much we unroll.
  90
  91    ??? A great problem is that we don't have a good way how to determine
  92    how many times we should unroll the loop; the experiments I have made
  93    showed that this choice may affect performance in order of several %.
  94    */
  95
  96 /* Information about induction variables to split.  */
  97
  98 struct iv_to_split
  99 {
 100   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
 101   rtx orig_var;         /* The variable (register) for the IV before split.  */
 102   rtx base_var;         /* The variable on that the values in the further
 103                            iterations are based.  */
 104   rtx step;             /* Step of the induction variable.  */
 105   struct iv_to_split *next; /* Next entry in walking order.  */
 106 };
 107
 108 /* Information about accumulators to expand.  */
 109
 110 struct var_to_expand
 111 {
 112   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
 113   rtx reg;                         /* The accumulator which is expanded.  */
 114   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
 115   struct var_to_expand *next;      /* Next entry in walking order.  */
 116   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
 117                                       or multiplication.  */
 118   int expansion_count;             /* Count the number of expansions generated so far.  */
 119   int reuse_expansion;             /* The expansion we intend to reuse to expand
 120                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
 121                                       the original accumulator.  Else use
 122                                       var_expansions[REUSE_EXPANSION - 1].  */
 123 };
 124
 125 /* Hashtable helper for iv_to_split.  */
 126
 127 struct iv_split_hasher : typed_free_remove <iv_to_split>
 128 {
 129   typedef iv_to_split *value_type;
 130   typedef iv_to_split *compare_type;
 131   static inline hashval_t hash (const iv_to_split *);
 132   static inline bool equal (const iv_to_split *, const iv_to_split *);
 133 };
 134
 135
 136 /* A hash function for information about insns to split.  */
 137
 138 inline hashval_t
 139 iv_split_hasher::hash (const iv_to_split *ivts)
 140 {
 141   return (hashval_t) INSN_UID (ivts->insn);
 142 }
 143
 144 /* An equality functions for information about insns to split.  */
 145
 146 inline bool
 147 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 148 {
 149   return i1->insn == i2->insn;
 150 }
 151
 152 /* Hashtable helper for iv_to_split.  */
 153
 154 struct var_expand_hasher : typed_free_remove <var_to_expand>
 155 {
 156   typedef var_to_expand *value_type;
 157   typedef var_to_expand *compare_type;
 158   static inline hashval_t hash (const var_to_expand *);
 159   static inline bool equal (const var_to_expand *, const var_to_expand *);
 160 };
 161
 162 /* Return a hash for VES.  */
 163
 164 inline hashval_t
 165 var_expand_hasher::hash (const var_to_expand *ves)
 166 {
 167   return (hashval_t) INSN_UID (ves->insn);
 168 }
 169
 170 /* Return true if I1 and I2 refer to the same instruction.  */
 171
 172 inline bool
 173 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 174 {
 175   return i1->insn == i2->insn;
 176 }
 177
 178 /* Information about optimization applied in
 179    the unrolled loop.  */
 180
 181 struct opt_info
 182 {
 183   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 184                                                   split.  */
 185   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 186   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 187   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 188                                         insns with accumulators to expand.  */
 189   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 190   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 191   unsigned first_new_block;        /* The first basic block that was
 192                                       duplicated.  */
 193   basic_block loop_exit;           /* The loop exit basic block.  */
 194   basic_block loop_preheader;      /* The loop preheader basic block.  */
 195 };
 196
 197 static void decide_unroll_stupid (struct loop *, int);
 198 static void decide_unroll_constant_iterations (struct loop *, int);
 199 static void decide_unroll_runtime_iterations (struct loop *, int);
 200 static void unroll_loop_stupid (struct loop *);
 201 static void decide_unrolling (int);
 202 static void unroll_loop_constant_iterations (struct loop *);
 203 static void unroll_loop_runtime_iterations (struct loop *);
 204 static struct opt_info *analyze_insns_in_loop (struct loop *);
 205 static void opt_info_start_duplication (struct opt_info *);
 206 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 207 static void free_opt_info (struct opt_info *);
 208 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 209 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 210 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 211 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 212 static void insert_var_expansion_initialization (struct var_to_expand *,
 213                                                  basic_block);
 214 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 215                                              basic_block);
 216 static rtx get_expansion (struct var_to_expand *);
 217
 218 /* Emit a message summarizing the unroll that will be
 219    performed for LOOP, along with the loop's location LOCUS, if
 220    appropriate given the dump or -fopt-info settings.  */
 221
 222 static void
 223 report_unroll (struct loop *loop, location_t locus)
 224 {
 225   int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS;
 226
 227   if (loop->lpt_decision.decision == LPT_NONE)
 228     return;
 229
 230   if (!dump_enabled_p ())
 231     return;
 232
 233   dump_printf_loc (report_flags, locus,
 234                    "loop unrolled %d times",
 235                    loop->lpt_decision.times);
 236   if (profile_info)
 237     dump_printf (report_flags,
 238                  " (header execution count %d)",
 239                  (int)loop->header->count);
 240
 241   dump_printf (report_flags, "\n");
 242 }
 243
 244 /* Decide whether unroll loops and how much.  */
 245 static void
 246 decide_unrolling (int flags)
 247 {
 248   struct loop *loop;
 249
 250   /* Scan the loops, inner ones first.  */
 251   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 252     {
 253       loop->lpt_decision.decision = LPT_NONE;
 254       location_t locus = get_loop_location (loop);
 255
 256       if (dump_enabled_p ())
 257         dump_printf_loc (TDF_RTL, locus,
 258                          ";; *** Considering loop %d at BB %d for "
 259                          "unrolling ***\n",
 260                          loop->num, loop->header->index);
 261
 262       /* Do not peel cold areas.  */
 263       if (optimize_loop_for_size_p (loop))
 264         {
 265           if (dump_file)
 266             fprintf (dump_file, ";; Not considering loop, cold area\n");
 267           continue;
 268         }
 269
 270       /* Can the loop be manipulated?  */
 271       if (!can_duplicate_loop_p (loop))
 272         {
 273           if (dump_file)
 274             fprintf (dump_file,
 275                      ";; Not considering loop, cannot duplicate\n");
 276           continue;
 277         }
 278
 279       /* Skip non-innermost loops.  */
 280       if (loop->inner)
 281         {
 282           if (dump_file)
 283             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 284           continue;
 285         }
 286
 287       loop->ninsns = num_loop_insns (loop);
 288       loop->av_ninsns = average_num_loop_insns (loop);
 289
 290       /* Try transformations one by one in decreasing order of
 291          priority.  */
 292
 293       decide_unroll_constant_iterations (loop, flags);
 294       if (loop->lpt_decision.decision == LPT_NONE)
 295         decide_unroll_runtime_iterations (loop, flags);
 296       if (loop->lpt_decision.decision == LPT_NONE)
 297         decide_unroll_stupid (loop, flags);
 298
 299       report_unroll (loop, locus);
 300     }
 301 }
 302
 303 /* Unroll LOOPS.  */
 304 void
 305 unroll_loops (int flags)
 306 {
 307   struct loop *loop;
 308   bool changed = false;
 309
 310   /* Now decide rest of unrolling.  */
 311   decide_unrolling (flags);
 312
 313   /* Scan the loops, inner ones first.  */
 314   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 315     {
 316       /* And perform the appropriate transformations.  */
 317       switch (loop->lpt_decision.decision)
 318         {
 319         case LPT_UNROLL_CONSTANT:
 320           unroll_loop_constant_iterations (loop);
 321           changed = true;
 322           break;
 323         case LPT_UNROLL_RUNTIME:
 324           unroll_loop_runtime_iterations (loop);
 325           changed = true;
 326           break;
 327         case LPT_UNROLL_STUPID:
 328           unroll_loop_stupid (loop);
 329           changed = true;
 330           break;
 331         case LPT_NONE:
 332           break;
 333         default:
 334           gcc_unreachable ();
 335         }
 336     }
 337
 338     if (changed)
 339       {
 340         calculate_dominance_info (CDI_DOMINATORS);
 341         fix_loop_structure (NULL);
 342       }
 343
 344   iv_analysis_done ();
 345 }
 346
 347 /* Check whether exit of the LOOP is at the end of loop body.  */
 348
 349 static bool
 350 loop_exit_at_end_p (struct loop *loop)
 351 {
 352   struct niter_desc *desc = get_simple_loop_desc (loop);
 353   rtx_insn *insn;
 354
 355   /* We should never have conditional in latch block.  */
 356   gcc_assert (desc->in_edge->dest != loop->header);
 357
 358   if (desc->in_edge->dest != loop->latch)
 359     return false;
 360
 361   /* Check that the latch is empty.  */
 362   FOR_BB_INSNS (loop->latch, insn)
 363     {
 364       if (INSN_P (insn) && active_insn_p (insn))
 365         return false;
 366     }
 367
 368   return true;
 369 }
 370
 371 /* Decide whether to unroll LOOP iterating constant number of times
 372    and how much.  */
 373
 374 static void
 375 decide_unroll_constant_iterations (struct loop *loop, int flags)
 376 {
 377   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 378   struct niter_desc *desc;
 379   widest_int iterations;
 380
 381   if (!(flags & UAP_UNROLL))
 382     {
 383       /* We were not asked to, just return back silently.  */
 384       return;
 385     }
 386
 387   if (dump_file)
 388     fprintf (dump_file,
 389              "\n;; Considering unrolling loop with constant "
 390              "number of iterations\n");
 391
 392   /* nunroll = total number of copies of the original loop body in
 393      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 394   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 395   nunroll_by_av
 396     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 397   if (nunroll > nunroll_by_av)
 398     nunroll = nunroll_by_av;
 399   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 400     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 401
 402   if (targetm.loop_unroll_adjust)
 403     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 404
 405   /* Skip big loops.  */
 406   if (nunroll <= 1)
 407     {
 408       if (dump_file)
 409         fprintf (dump_file, ";; Not considering loop, is too big\n");
 410       return;
 411     }
 412
 413   /* Check for simple loops.  */
 414   desc = get_simple_loop_desc (loop);
 415
 416   /* Check number of iterations.  */
 417   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 418     {
 419       if (dump_file)
 420         fprintf (dump_file,
 421                  ";; Unable to prove that the loop iterates constant times\n");
 422       return;
 423     }
 424
 425   /* Check whether the loop rolls enough to consider.
 426      Consult also loop bounds and profile; in the case the loop has more
 427      than one exit it may well loop less than determined maximal number
 428      of iterations.  */
 429   if (desc->niter < 2 * nunroll
 430       || ((get_estimated_loop_iterations (loop, &iterations)
 431            || get_max_loop_iterations (loop, &iterations))
 432           && wi::ltu_p (iterations, 2 * nunroll)))
 433     {
 434       if (dump_file)
 435         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 436       return;
 437     }
 438
 439   /* Success; now compute number of iterations to unroll.  We alter
 440      nunroll so that as few as possible copies of loop body are
 441      necessary, while still not decreasing the number of unrollings
 442      too much (at most by 1).  */
 443   best_copies = 2 * nunroll + 10;
 444
 445   i = 2 * nunroll + 2;
 446   if (i - 1 >= desc->niter)
 447     i = desc->niter - 2;
 448
 449   for (; i >= nunroll - 1; i--)
 450     {
 451       unsigned exit_mod = desc->niter % (i + 1);
 452
 453       if (!loop_exit_at_end_p (loop))
 454         n_copies = exit_mod + i + 1;
 455       else if (exit_mod != (unsigned) i
 456                || desc->noloop_assumptions != NULL_RTX)
 457         n_copies = exit_mod + i + 2;
 458       else
 459         n_copies = i + 1;
 460
 461       if (n_copies < best_copies)
 462         {
 463           best_copies = n_copies;
 464           best_unroll = i;
 465         }
 466     }
 467
 468   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 469   loop->lpt_decision.times = best_unroll;
 470 }
 471
 472 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 473    The transformation does this:
 474
 475    for (i = 0; i < 102; i++)
 476      body;
 477
 478    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 479
 480    i = 0;
 481    body; i++;
 482    body; i++;
 483    while (i < 102)
 484      {
 485        body; i++;
 486        body; i++;
 487        body; i++;
 488        body; i++;
 489      }
 490   */
 491 static void
 492 unroll_loop_constant_iterations (struct loop *loop)
 493 {
 494   unsigned HOST_WIDE_INT niter;
 495   unsigned exit_mod;
 496   sbitmap wont_exit;
 497   unsigned i;
 498   edge e;
 499   unsigned max_unroll = loop->lpt_decision.times;
 500   struct niter_desc *desc = get_simple_loop_desc (loop);
 501   bool exit_at_end = loop_exit_at_end_p (loop);
 502   struct opt_info *opt_info = NULL;
 503   bool ok;
 504
 505   niter = desc->niter;
 506
 507   /* Should not get here (such loop should be peeled instead).  */
 508   gcc_assert (niter > max_unroll + 1);
 509
 510   exit_mod = niter % (max_unroll + 1);
 511
 512   wont_exit = sbitmap_alloc (max_unroll + 1);
 513   bitmap_ones (wont_exit);
 514
 515   auto_vec<edge> remove_edges;
 516   if (flag_split_ivs_in_unroller
 517       || flag_variable_expansion_in_unroller)
 518     opt_info = analyze_insns_in_loop (loop);
 519
 520   if (!exit_at_end)
 521     {
 522       /* The exit is not at the end of the loop; leave exit test
 523          in the first copy, so that the loops that start with test
 524          of exit condition have continuous body after unrolling.  */
 525
 526       if (dump_file)
 527         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 528
 529       /* Peel exit_mod iterations.  */
 530       bitmap_clear_bit (wont_exit, 0);
 531       if (desc->noloop_assumptions)
 532         bitmap_clear_bit (wont_exit, 1);
 533
 534       if (exit_mod)
 535         {
 536           opt_info_start_duplication (opt_info);
 537           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 538                                               exit_mod,
 539                                               wont_exit, desc->out_edge,
 540                                               &remove_edges,
 541                                               DLTHE_FLAG_UPDATE_FREQ
 542                                               | (opt_info && exit_mod > 1
 543                                                  ? DLTHE_RECORD_COPY_NUMBER
 544                                                    : 0));
 545           gcc_assert (ok);
 546
 547           if (opt_info && exit_mod > 1)
 548             apply_opt_in_copies (opt_info, exit_mod, false, false);
 549
 550           desc->noloop_assumptions = NULL_RTX;
 551           desc->niter -= exit_mod;
 552           loop->nb_iterations_upper_bound -= exit_mod;
 553           if (loop->any_estimate
 554               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 555             loop->nb_iterations_estimate -= exit_mod;
 556           else
 557             loop->any_estimate = false;
 558         }
 559
 560       bitmap_set_bit (wont_exit, 1);
 561     }
 562   else
 563     {
 564       /* Leave exit test in last copy, for the same reason as above if
 565          the loop tests the condition at the end of loop body.  */
 566
 567       if (dump_file)
 568         fprintf (dump_file, ";; Condition at end of loop.\n");
 569
 570       /* We know that niter >= max_unroll + 2; so we do not need to care of
 571          case when we would exit before reaching the loop.  So just peel
 572          exit_mod + 1 iterations.  */
 573       if (exit_mod != max_unroll
 574           || desc->noloop_assumptions)
 575         {
 576           bitmap_clear_bit (wont_exit, 0);
 577           if (desc->noloop_assumptions)
 578             bitmap_clear_bit (wont_exit, 1);
 579
 580           opt_info_start_duplication (opt_info);
 581           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 582                                               exit_mod + 1,
 583                                               wont_exit, desc->out_edge,
 584                                               &remove_edges,
 585                                               DLTHE_FLAG_UPDATE_FREQ
 586                                               | (opt_info && exit_mod > 0
 587                                                  ? DLTHE_RECORD_COPY_NUMBER
 588                                                    : 0));
 589           gcc_assert (ok);
 590
 591           if (opt_info && exit_mod > 0)
 592             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 593
 594           desc->niter -= exit_mod + 1;
 595           loop->nb_iterations_upper_bound -= exit_mod + 1;
 596           if (loop->any_estimate
 597               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 598             loop->nb_iterations_estimate -= exit_mod + 1;
 599           else
 600             loop->any_estimate = false;
 601           desc->noloop_assumptions = NULL_RTX;
 602
 603           bitmap_set_bit (wont_exit, 0);
 604           bitmap_set_bit (wont_exit, 1);
 605         }
 606
 607       bitmap_clear_bit (wont_exit, max_unroll);
 608     }
 609
 610   /* Now unroll the loop.  */
 611
 612   opt_info_start_duplication (opt_info);
 613   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 614                                       max_unroll,
 615                                       wont_exit, desc->out_edge,
 616                                       &remove_edges,
 617                                       DLTHE_FLAG_UPDATE_FREQ
 618                                       | (opt_info
 619                                          ? DLTHE_RECORD_COPY_NUMBER
 620                                            : 0));
 621   gcc_assert (ok);
 622
 623   if (opt_info)
 624     {
 625       apply_opt_in_copies (opt_info, max_unroll, true, true);
 626       free_opt_info (opt_info);
 627     }
 628
 629   free (wont_exit);
 630
 631   if (exit_at_end)
 632     {
 633       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 634       /* Find a new in and out edge; they are in the last copy we have made.  */
 635
 636       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 637         {
 638           desc->out_edge = EDGE_SUCC (exit_block, 0);
 639           desc->in_edge = EDGE_SUCC (exit_block, 1);
 640         }
 641       else
 642         {
 643           desc->out_edge = EDGE_SUCC (exit_block, 1);
 644           desc->in_edge = EDGE_SUCC (exit_block, 0);
 645         }
 646     }
 647
 648   desc->niter /= max_unroll + 1;
 649   loop->nb_iterations_upper_bound
 650     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 651   if (loop->any_estimate)
 652     loop->nb_iterations_estimate
 653       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 654   desc->niter_expr = GEN_INT (desc->niter);
 655
 656   /* Remove the edges.  */
 657   FOR_EACH_VEC_ELT (remove_edges, i, e)
 658     remove_path (e);
 659
 660   if (dump_file)
 661     fprintf (dump_file,
 662              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 663              max_unroll, num_loop_insns (loop));
 664 }
 665
 666 /* Decide whether to unroll LOOP iterating runtime computable number of times
 667    and how much.  */
 668 static void
 669 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 670 {
 671   unsigned nunroll, nunroll_by_av, i;
 672   struct niter_desc *desc;
 673   widest_int iterations;
 674
 675   if (!(flags & UAP_UNROLL))
 676     {
 677       /* We were not asked to, just return back silently.  */
 678       return;
 679     }
 680
 681   if (dump_file)
 682     fprintf (dump_file,
 683              "\n;; Considering unrolling loop with runtime "
 684              "computable number of iterations\n");
 685
 686   /* nunroll = total number of copies of the original loop body in
 687      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 688   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 689   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 690   if (nunroll > nunroll_by_av)
 691     nunroll = nunroll_by_av;
 692   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 693     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 694
 695   if (targetm.loop_unroll_adjust)
 696     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 697
 698   /* Skip big loops.  */
 699   if (nunroll <= 1)
 700     {
 701       if (dump_file)
 702         fprintf (dump_file, ";; Not considering loop, is too big\n");
 703       return;
 704     }
 705
 706   /* Check for simple loops.  */
 707   desc = get_simple_loop_desc (loop);
 708
 709   /* Check simpleness.  */
 710   if (!desc->simple_p || desc->assumptions)
 711     {
 712       if (dump_file)
 713         fprintf (dump_file,
 714                  ";; Unable to prove that the number of iterations "
 715                  "can be counted in runtime\n");
 716       return;
 717     }
 718
 719   if (desc->const_iter)
 720     {
 721       if (dump_file)
 722         fprintf (dump_file, ";; Loop iterates constant times\n");
 723       return;
 724     }
 725
 726   /* Check whether the loop rolls.  */
 727   if ((get_estimated_loop_iterations (loop, &iterations)
 728        || get_max_loop_iterations (loop, &iterations))
 729       && wi::ltu_p (iterations, 2 * nunroll))
 730     {
 731       if (dump_file)
 732         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 733       return;
 734     }
 735
 736   /* Success; now force nunroll to be power of 2, as we are unable to
 737      cope with overflows in computation of number of iterations.  */
 738   for (i = 1; 2 * i <= nunroll; i *= 2)
 739     continue;
 740
 741   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 742   loop->lpt_decision.times = i - 1;
 743 }
 744
 745 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 746    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 747    and NULL is returned instead.  */
 748
 749 basic_block
 750 split_edge_and_insert (edge e, rtx_insn *insns)
 751 {
 752   basic_block bb;
 753
 754   if (!insns)
 755     return NULL;
 756   bb = split_edge (e);
 757   emit_insn_after (insns, BB_END (bb));
 758
 759   /* ??? We used to assume that INSNS can contain control flow insns, and
 760      that we had to try to find sub basic blocks in BB to maintain a valid
 761      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 762      and call break_superblocks when going out of cfglayout mode.  But it
 763      turns out that this never happens; and that if it does ever happen,
 764      the verify_flow_info at the end of the RTL loop passes would fail.
 765
 766      There are two reasons why we expected we could have control flow insns
 767      in INSNS.  The first is when a comparison has to be done in parts, and
 768      the second is when the number of iterations is computed for loops with
 769      the number of iterations known at runtime.  In both cases, test cases
 770      to get control flow in INSNS appear to be impossible to construct:
 771
 772       * If do_compare_rtx_and_jump needs several branches to do comparison
 773         in a mode that needs comparison by parts, we cannot analyze the
 774         number of iterations of the loop, and we never get to unrolling it.
 775
 776       * The code in expand_divmod that was suspected to cause creation of
 777         branching code seems to be only accessed for signed division.  The
 778         divisions used by # of iterations analysis are always unsigned.
 779         Problems might arise on architectures that emits branching code
 780         for some operations that may appear in the unroller (especially
 781         for division), but we have no such architectures.
 782
 783      Considering all this, it was decided that we should for now assume
 784      that INSNS can in theory contain control flow insns, but in practice
 785      it never does.  So we don't handle the theoretical case, and should
 786      a real failure ever show up, we have a pretty good clue for how to
 787      fix it.  */
 788
 789   return bb;
 790 }
 791
 792 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 793    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 794    in order to create a jump.  */
 795
 796 static rtx_insn *
 797 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 798                       rtx_code_label *label, int prob, rtx_insn *cinsn)
 799 {
 800   rtx_insn *seq;
 801   rtx_jump_insn *jump;
 802   rtx cond;
 803   machine_mode mode;
 804
 805   mode = GET_MODE (op0);
 806   if (mode == VOIDmode)
 807     mode = GET_MODE (op1);
 808
 809   start_sequence ();
 810   if (GET_MODE_CLASS (mode) == MODE_CC)
 811     {
 812       /* A hack -- there seems to be no easy generic way how to make a
 813          conditional jump from a ccmode comparison.  */
 814       gcc_assert (cinsn);
 815       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 816       gcc_assert (GET_CODE (cond) == comp);
 817       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 818       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 819       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 820       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 821       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 822       LABEL_NUSES (JUMP_LABEL (jump))++;
 823       redirect_jump (jump, label, 0);
 824     }
 825   else
 826     {
 827       gcc_assert (!cinsn);
 828
 829       op0 = force_operand (op0, NULL_RTX);
 830       op1 = force_operand (op1, NULL_RTX);
 831       do_compare_rtx_and_jump (op0, op1, comp, 0,
 832                                mode, NULL_RTX, NULL, label, -1);
 833       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 834       jump->set_jump_target (label);
 835       LABEL_NUSES (label)++;
 836     }
 837   add_int_reg_note (jump, REG_BR_PROB, prob);
 838
 839   seq = get_insns ();
 840   end_sequence ();
 841
 842   return seq;
 843 }
 844
 845 /* Unroll LOOP for which we are able to count number of iterations in runtime
 846    LOOP->LPT_DECISION.TIMES times.  The transformation does this (with some
 847    extra care for case n < 0):
 848
 849    for (i = 0; i < n; i++)
 850      body;
 851
 852    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 853
 854    i = 0;
 855    mod = n % 4;
 856
 857    switch (mod)
 858      {
 859        case 3:
 860          body; i++;
 861        case 2:
 862          body; i++;
 863        case 1:
 864          body; i++;
 865        case 0: ;
 866      }
 867
 868    while (i < n)
 869      {
 870        body; i++;
 871        body; i++;
 872        body; i++;
 873        body; i++;
 874      }
 875    */
 876 static void
 877 unroll_loop_runtime_iterations (struct loop *loop)
 878 {
 879   rtx old_niter, niter, tmp;
 880   rtx_insn *init_code, *branch_code;
 881   unsigned i, j, p;
 882   basic_block preheader, *body, swtch, ezc_swtch;
 883   sbitmap wont_exit;
 884   int may_exit_copy;
 885   unsigned n_peel;
 886   edge e;
 887   bool extra_zero_check, last_may_exit;
 888   unsigned max_unroll = loop->lpt_decision.times;
 889   struct niter_desc *desc = get_simple_loop_desc (loop);
 890   bool exit_at_end = loop_exit_at_end_p (loop);
 891   struct opt_info *opt_info = NULL;
 892   bool ok;
 893
 894   if (flag_split_ivs_in_unroller
 895       || flag_variable_expansion_in_unroller)
 896     opt_info = analyze_insns_in_loop (loop);
 897
 898   /* Remember blocks whose dominators will have to be updated.  */
 899   auto_vec<basic_block> dom_bbs;
 900
 901   body = get_loop_body (loop);
 902   for (i = 0; i < loop->num_nodes; i++)
 903     {
 904       vec<basic_block> ldom;
 905       basic_block bb;
 906
 907       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 908       FOR_EACH_VEC_ELT (ldom, j, bb)
 909         if (!flow_bb_inside_loop_p (loop, bb))
 910           dom_bbs.safe_push (bb);
 911
 912       ldom.release ();
 913     }
 914   free (body);
 915
 916   if (!exit_at_end)
 917     {
 918       /* Leave exit in first copy (for explanation why see comment in
 919          unroll_loop_constant_iterations).  */
 920       may_exit_copy = 0;
 921       n_peel = max_unroll - 1;
 922       extra_zero_check = true;
 923       last_may_exit = false;
 924     }
 925   else
 926     {
 927       /* Leave exit in last copy (for explanation why see comment in
 928          unroll_loop_constant_iterations).  */
 929       may_exit_copy = max_unroll;
 930       n_peel = max_unroll;
 931       extra_zero_check = false;
 932       last_may_exit = true;
 933     }
 934
 935   /* Get expression for number of iterations.  */
 936   start_sequence ();
 937   old_niter = niter = gen_reg_rtx (desc->mode);
 938   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 939   if (tmp != niter)
 940     emit_move_insn (niter, tmp);
 941
 942   /* Count modulo by ANDing it with max_unroll; we use the fact that
 943      the number of unrollings is a power of two, and thus this is correct
 944      even if there is overflow in the computation.  */
 945   niter = expand_simple_binop (desc->mode, AND,
 946                                niter, gen_int_mode (max_unroll, desc->mode),
 947                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 948
 949   init_code = get_insns ();
 950   end_sequence ();
 951   unshare_all_rtl_in_chain (init_code);
 952
 953   /* Precondition the loop.  */
 954   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 955
 956   auto_vec<edge> remove_edges;
 957
 958   wont_exit = sbitmap_alloc (max_unroll + 2);
 959
 960   /* Peel the first copy of loop body (almost always we must leave exit test
 961      here; the only exception is when we have extra zero check and the number
 962      of iterations is reliable.  Also record the place of (possible) extra
 963      zero check.  */
 964   bitmap_clear (wont_exit);
 965   if (extra_zero_check
 966       && !desc->noloop_assumptions)
 967     bitmap_set_bit (wont_exit, 1);
 968   ezc_swtch = loop_preheader_edge (loop)->src;
 969   ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 970                                       1, wont_exit, desc->out_edge,
 971                                       &remove_edges,
 972                                       DLTHE_FLAG_UPDATE_FREQ);
 973   gcc_assert (ok);
 974
 975   /* Record the place where switch will be built for preconditioning.  */
 976   swtch = split_edge (loop_preheader_edge (loop));
 977
 978   for (i = 0; i < n_peel; i++)
 979     {
 980       /* Peel the copy.  */
 981       bitmap_clear (wont_exit);
 982       if (i != n_peel - 1 || !last_may_exit)
 983         bitmap_set_bit (wont_exit, 1);
 984       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 985                                           1, wont_exit, desc->out_edge,
 986                                           &remove_edges,
 987                                           DLTHE_FLAG_UPDATE_FREQ);
 988       gcc_assert (ok);
 989
 990       /* Create item for switch.  */
 991       j = n_peel - i - (extra_zero_check ? 0 : 1);
 992       p = REG_BR_PROB_BASE / (i + 2);
 993
 994       preheader = split_edge (loop_preheader_edge (loop));
 995       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
 996                                           block_label (preheader), p,
 997                                           NULL);
 998
 999       /* We rely on the fact that the compare and jump cannot be optimized out,
1000          and hence the cfg we create is correct.  */
1001       gcc_assert (branch_code != NULL_RTX);
1002
1003       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1004       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1005       single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1006       e = make_edge (swtch, preheader,
1007                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1008       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1009       e->probability = p;
1010     }
1011
1012   if (extra_zero_check)
1013     {
1014       /* Add branch for zero iterations.  */
1015       p = REG_BR_PROB_BASE / (max_unroll + 1);
1016       swtch = ezc_swtch;
1017       preheader = split_edge (loop_preheader_edge (loop));
1018       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1019                                           block_label (preheader), p,
1020                                           NULL);
1021       gcc_assert (branch_code != NULL_RTX);
1022
1023       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1024       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1025       single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p;
1026       e = make_edge (swtch, preheader,
1027                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1028       e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p);
1029       e->probability = p;
1030     }
1031
1032   /* Recount dominators for outer blocks.  */
1033   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1034
1035   /* And unroll loop.  */
1036
1037   bitmap_ones (wont_exit);
1038   bitmap_clear_bit (wont_exit, may_exit_copy);
1039   opt_info_start_duplication (opt_info);
1040
1041   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1042                                       max_unroll,
1043                                       wont_exit, desc->out_edge,
1044                                       &remove_edges,
1045                                       DLTHE_FLAG_UPDATE_FREQ
1046                                       | (opt_info
1047                                          ? DLTHE_RECORD_COPY_NUMBER
1048                                            : 0));
1049   gcc_assert (ok);
1050
1051   if (opt_info)
1052     {
1053       apply_opt_in_copies (opt_info, max_unroll, true, true);
1054       free_opt_info (opt_info);
1055     }
1056
1057   free (wont_exit);
1058
1059   if (exit_at_end)
1060     {
1061       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1062       /* Find a new in and out edge; they are in the last copy we have
1063          made.  */
1064
1065       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1066         {
1067           desc->out_edge = EDGE_SUCC (exit_block, 0);
1068           desc->in_edge = EDGE_SUCC (exit_block, 1);
1069         }
1070       else
1071         {
1072           desc->out_edge = EDGE_SUCC (exit_block, 1);
1073           desc->in_edge = EDGE_SUCC (exit_block, 0);
1074         }
1075     }
1076
1077   /* Remove the edges.  */
1078   FOR_EACH_VEC_ELT (remove_edges, i, e)
1079     remove_path (e);
1080
1081   /* We must be careful when updating the number of iterations due to
1082      preconditioning and the fact that the value must be valid at entry
1083      of the loop.  After passing through the above code, we see that
1084      the correct new number of iterations is this:  */
1085   gcc_assert (!desc->const_iter);
1086   desc->niter_expr =
1087     simplify_gen_binary (UDIV, desc->mode, old_niter,
1088                          gen_int_mode (max_unroll + 1, desc->mode));
1089   loop->nb_iterations_upper_bound
1090     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1091   if (loop->any_estimate)
1092     loop->nb_iterations_estimate
1093       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1094   if (exit_at_end)
1095     {
1096       desc->niter_expr =
1097         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1098       desc->noloop_assumptions = NULL_RTX;
1099       --loop->nb_iterations_upper_bound;
1100       if (loop->any_estimate
1101           && loop->nb_iterations_estimate != 0)
1102         --loop->nb_iterations_estimate;
1103       else
1104         loop->any_estimate = false;
1105     }
1106
1107   if (dump_file)
1108     fprintf (dump_file,
1109              ";; Unrolled loop %d times, counting # of iterations "
1110              "in runtime, %i insns\n",
1111              max_unroll, num_loop_insns (loop));
1112 }
1113
1114 /* Decide whether to unroll LOOP stupidly and how much.  */
1115 static void
1116 decide_unroll_stupid (struct loop *loop, int flags)
1117 {
1118   unsigned nunroll, nunroll_by_av, i;
1119   struct niter_desc *desc;
1120   widest_int iterations;
1121
1122   if (!(flags & UAP_UNROLL_ALL))
1123     {
1124       /* We were not asked to, just return back silently.  */
1125       return;
1126     }
1127
1128   if (dump_file)
1129     fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
1130
1131   /* nunroll = total number of copies of the original loop body in
1132      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1133   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1134   nunroll_by_av
1135     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1136   if (nunroll > nunroll_by_av)
1137     nunroll = nunroll_by_av;
1138   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1139     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1140
1141   if (targetm.loop_unroll_adjust)
1142     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1143
1144   /* Skip big loops.  */
1145   if (nunroll <= 1)
1146     {
1147       if (dump_file)
1148         fprintf (dump_file, ";; Not considering loop, is too big\n");
1149       return;
1150     }
1151
1152   /* Check for simple loops.  */
1153   desc = get_simple_loop_desc (loop);
1154
1155   /* Check simpleness.  */
1156   if (desc->simple_p && !desc->assumptions)
1157     {
1158       if (dump_file)
1159         fprintf (dump_file, ";; The loop is simple\n");
1160       return;
1161     }
1162
1163   /* Do not unroll loops with branches inside -- it increases number
1164      of mispredicts.
1165      TODO: this heuristic needs tunning; call inside the loop body
1166      is also relatively good reason to not unroll.  */
1167   if (num_loop_branches (loop) > 1)
1168     {
1169       if (dump_file)
1170         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1171       return;
1172     }
1173
1174   /* Check whether the loop rolls.  */
1175   if ((get_estimated_loop_iterations (loop, &iterations)
1176        || get_max_loop_iterations (loop, &iterations))
1177       && wi::ltu_p (iterations, 2 * nunroll))
1178     {
1179       if (dump_file)
1180         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1181       return;
1182     }
1183
1184   /* Success.  Now force nunroll to be power of 2, as it seems that this
1185      improves results (partially because of better alignments, partially
1186      because of some dark magic).  */
1187   for (i = 1; 2 * i <= nunroll; i *= 2)
1188     continue;
1189
1190   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1191   loop->lpt_decision.times = i - 1;
1192 }
1193
1194 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1195
1196    while (cond)
1197      body;
1198
1199    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1200
1201    while (cond)
1202      {
1203        body;
1204        if (!cond) break;
1205        body;
1206        if (!cond) break;
1207        body;
1208        if (!cond) break;
1209        body;
1210      }
1211    */
1212 static void
1213 unroll_loop_stupid (struct loop *loop)
1214 {
1215   sbitmap wont_exit;
1216   unsigned nunroll = loop->lpt_decision.times;
1217   struct niter_desc *desc = get_simple_loop_desc (loop);
1218   struct opt_info *opt_info = NULL;
1219   bool ok;
1220
1221   if (flag_split_ivs_in_unroller
1222       || flag_variable_expansion_in_unroller)
1223     opt_info = analyze_insns_in_loop (loop);
1224
1225
1226   wont_exit = sbitmap_alloc (nunroll + 1);
1227   bitmap_clear (wont_exit);
1228   opt_info_start_duplication (opt_info);
1229
1230   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1231                                       nunroll, wont_exit,
1232                                       NULL, NULL,
1233                                       DLTHE_FLAG_UPDATE_FREQ
1234                                       | (opt_info
1235                                          ? DLTHE_RECORD_COPY_NUMBER
1236                                            : 0));
1237   gcc_assert (ok);
1238
1239   if (opt_info)
1240     {
1241       apply_opt_in_copies (opt_info, nunroll, true, true);
1242       free_opt_info (opt_info);
1243     }
1244
1245   free (wont_exit);
1246
1247   if (desc->simple_p)
1248     {
1249       /* We indeed may get here provided that there are nontrivial assumptions
1250          for a loop to be really simple.  We could update the counts, but the
1251          problem is that we are unable to decide which exit will be taken
1252          (not really true in case the number of iterations is constant,
1253          but no one will do anything with this information, so we do not
1254          worry about it).  */
1255       desc->simple_p = false;
1256     }
1257
1258   if (dump_file)
1259     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1260              nunroll, num_loop_insns (loop));
1261 }
1262
1263 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1264    Set *DEBUG_USES to the number of debug insns that reference the
1265    variable.  */
1266
1267 static bool
1268 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1269                                   int *debug_uses)
1270 {
1271   basic_block *body, bb;
1272   unsigned i;
1273   int count_ref = 0;
1274   rtx_insn *insn;
1275
1276   body = get_loop_body (loop);
1277   for (i = 0; i < loop->num_nodes; i++)
1278     {
1279       bb = body[i];
1280
1281       FOR_BB_INSNS (bb, insn)
1282         if (!rtx_referenced_p (reg, insn))
1283           continue;
1284         else if (DEBUG_INSN_P (insn))
1285           ++*debug_uses;
1286         else if (++count_ref > 1)
1287           break;
1288     }
1289   free (body);
1290   return (count_ref  == 1);
1291 }
1292
1293 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1294
1295 static void
1296 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1297 {
1298   basic_block *body, bb;
1299   unsigned i;
1300   rtx_insn *insn;
1301
1302   body = get_loop_body (loop);
1303   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1304     {
1305       bb = body[i];
1306
1307       FOR_BB_INSNS (bb, insn)
1308         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1309           continue;
1310         else
1311           {
1312             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1313                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1314             if (!--debug_uses)
1315               break;
1316           }
1317     }
1318   free (body);
1319 }
1320
1321 /* Determine whether INSN contains an accumulator
1322    which can be expanded into separate copies,
1323    one for each copy of the LOOP body.
1324
1325    for (i = 0 ; i < n; i++)
1326      sum += a[i];
1327
1328    ==>
1329
1330    sum += a[i]
1331    ....
1332    i = i+1;
1333    sum1 += a[i]
1334    ....
1335    i = i+1
1336    sum2 += a[i];
1337    ....
1338
1339    Return NULL if INSN contains no opportunity for expansion of accumulator.
1340    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1341    information and return a pointer to it.
1342 */
1343
1344 static struct var_to_expand *
1345 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1346 {
1347   rtx set, dest, src;
1348   struct var_to_expand *ves;
1349   unsigned accum_pos;
1350   enum rtx_code code;
1351   int debug_uses = 0;
1352
1353   set = single_set (insn);
1354   if (!set)
1355     return NULL;
1356
1357   dest = SET_DEST (set);
1358   src = SET_SRC (set);
1359   code = GET_CODE (src);
1360
1361   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1362     return NULL;
1363
1364   if (FLOAT_MODE_P (GET_MODE (dest)))
1365     {
1366       if (!flag_associative_math)
1367         return NULL;
1368       /* In the case of FMA, we're also changing the rounding.  */
1369       if (code == FMA && !flag_unsafe_math_optimizations)
1370         return NULL;
1371     }
1372
1373   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1374      in MD.  But if there is no optab to generate the insn, we can not
1375      perform the variable expansion.  This can happen if an MD provides
1376      an insn but not a named pattern to generate it, for example to avoid
1377      producing code that needs additional mode switches like for x87/mmx.
1378
1379      So we check have_insn_for which looks for an optab for the operation
1380      in SRC.  If it doesn't exist, we can't perform the expansion even
1381      though INSN is valid.  */
1382   if (!have_insn_for (code, GET_MODE (src)))
1383     return NULL;
1384
1385   if (!REG_P (dest)
1386       && !(GET_CODE (dest) == SUBREG
1387            && REG_P (SUBREG_REG (dest))))
1388     return NULL;
1389
1390   /* Find the accumulator use within the operation.  */
1391   if (code == FMA)
1392     {
1393       /* We only support accumulation via FMA in the ADD position.  */
1394       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1395         return NULL;
1396       accum_pos = 2;
1397     }
1398   else if (rtx_equal_p (dest, XEXP (src, 0)))
1399     accum_pos = 0;
1400   else if (rtx_equal_p (dest, XEXP (src, 1)))
1401     {
1402       /* The method of expansion that we are using; which includes the
1403          initialization of the expansions with zero and the summation of
1404          the expansions at the end of the computation will yield wrong
1405          results for (x = something - x) thus avoid using it in that case.  */
1406       if (code == MINUS)
1407         return NULL;
1408       accum_pos = 1;
1409     }
1410   else
1411     return NULL;
1412
1413   /* It must not otherwise be used.  */
1414   if (code == FMA)
1415     {
1416       if (rtx_referenced_p (dest, XEXP (src, 0))
1417           || rtx_referenced_p (dest, XEXP (src, 1)))
1418         return NULL;
1419     }
1420   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1421     return NULL;
1422
1423   /* It must be used in exactly one insn.  */
1424   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1425     return NULL;
1426
1427   if (dump_file)
1428     {
1429       fprintf (dump_file, "\n;; Expanding Accumulator ");
1430       print_rtl (dump_file, dest);
1431       fprintf (dump_file, "\n");
1432     }
1433
1434   if (debug_uses)
1435     /* Instead of resetting the debug insns, we could replace each
1436        debug use in the loop with the sum or product of all expanded
1437        accummulators.  Since we'll only know of all expansions at the
1438        end, we'd have to keep track of which vars_to_expand a debug
1439        insn in the loop references, take note of each copy of the
1440        debug insn during unrolling, and when it's all done, compute
1441        the sum or product of each variable and adjust the original
1442        debug insn and each copy thereof.  What a pain!  */
1443     reset_debug_uses_in_loop (loop, dest, debug_uses);
1444
1445   /* Record the accumulator to expand.  */
1446   ves = XNEW (struct var_to_expand);
1447   ves->insn = insn;
1448   ves->reg = copy_rtx (dest);
1449   ves->var_expansions.create (1);
1450   ves->next = NULL;
1451   ves->op = GET_CODE (src);
1452   ves->expansion_count = 0;
1453   ves->reuse_expansion = 0;
1454   return ves;
1455 }
1456
1457 /* Determine whether there is an induction variable in INSN that
1458    we would like to split during unrolling.
1459
1460    I.e. replace
1461
1462    i = i + 1;
1463    ...
1464    i = i + 1;
1465    ...
1466    i = i + 1;
1467    ...
1468
1469    type chains by
1470
1471    i0 = i + 1
1472    ...
1473    i = i0 + 1
1474    ...
1475    i = i0 + 2
1476    ...
1477
1478    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1479    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1480    pointer to it.  */
1481
1482 static struct iv_to_split *
1483 analyze_iv_to_split_insn (rtx_insn *insn)
1484 {
1485   rtx set, dest;
1486   struct rtx_iv iv;
1487   struct iv_to_split *ivts;
1488   bool ok;
1489
1490   /* For now we just split the basic induction variables.  Later this may be
1491      extended for example by selecting also addresses of memory references.  */
1492   set = single_set (insn);
1493   if (!set)
1494     return NULL;
1495
1496   dest = SET_DEST (set);
1497   if (!REG_P (dest))
1498     return NULL;
1499
1500   if (!biv_p (insn, dest))
1501     return NULL;
1502
1503   ok = iv_analyze_result (insn, dest, &iv);
1504
1505   /* This used to be an assert under the assumption that if biv_p returns
1506      true that iv_analyze_result must also return true.  However, that
1507      assumption is not strictly correct as evidenced by pr25569.
1508
1509      Returning NULL when iv_analyze_result returns false is safe and
1510      avoids the problems in pr25569 until the iv_analyze_* routines
1511      can be fixed, which is apparently hard and time consuming
1512      according to their author.  */
1513   if (! ok)
1514     return NULL;
1515
1516   if (iv.step == const0_rtx
1517       || iv.mode != iv.extend_mode)
1518     return NULL;
1519
1520   /* Record the insn to split.  */
1521   ivts = XNEW (struct iv_to_split);
1522   ivts->insn = insn;
1523   ivts->orig_var = dest;
1524   ivts->base_var = NULL_RTX;
1525   ivts->step = iv.step;
1526   ivts->next = NULL;
1527
1528   return ivts;
1529 }
1530
1531 /* Determines which of insns in LOOP can be optimized.
1532    Return a OPT_INFO struct with the relevant hash tables filled
1533    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1534    is undefined for the return value.  */
1535
1536 static struct opt_info *
1537 analyze_insns_in_loop (struct loop *loop)
1538 {
1539   basic_block *body, bb;
1540   unsigned i;
1541   struct opt_info *opt_info = XCNEW (struct opt_info);
1542   rtx_insn *insn;
1543   struct iv_to_split *ivts = NULL;
1544   struct var_to_expand *ves = NULL;
1545   iv_to_split **slot1;
1546   var_to_expand **slot2;
1547   vec<edge> edges = get_loop_exit_edges (loop);
1548   edge exit;
1549   bool can_apply = false;
1550
1551   iv_analysis_loop_init (loop);
1552
1553   body = get_loop_body (loop);
1554
1555   if (flag_split_ivs_in_unroller)
1556     {
1557       opt_info->insns_to_split
1558         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1559       opt_info->iv_to_split_head = NULL;
1560       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1561     }
1562
1563   /* Record the loop exit bb and loop preheader before the unrolling.  */
1564   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1565
1566   if (edges.length () == 1)
1567     {
1568       exit = edges[0];
1569       if (!(exit->flags & EDGE_COMPLEX))
1570         {
1571           opt_info->loop_exit = split_edge (exit);
1572           can_apply = true;
1573         }
1574     }
1575
1576   if (flag_variable_expansion_in_unroller
1577       && can_apply)
1578     {
1579       opt_info->insns_with_var_to_expand
1580         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1581       opt_info->var_to_expand_head = NULL;
1582       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1583     }
1584
1585   for (i = 0; i < loop->num_nodes; i++)
1586     {
1587       bb = body[i];
1588       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1589         continue;
1590
1591       FOR_BB_INSNS (bb, insn)
1592       {
1593         if (!INSN_P (insn))
1594           continue;
1595
1596         if (opt_info->insns_to_split)
1597           ivts = analyze_iv_to_split_insn (insn);
1598
1599         if (ivts)
1600           {
1601             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1602             gcc_assert (*slot1 == NULL);
1603             *slot1 = ivts;
1604             *opt_info->iv_to_split_tail = ivts;
1605             opt_info->iv_to_split_tail = &ivts->next;
1606             continue;
1607           }
1608
1609         if (opt_info->insns_with_var_to_expand)
1610           ves = analyze_insn_to_expand_var (loop, insn);
1611
1612         if (ves)
1613           {
1614             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1615             gcc_assert (*slot2 == NULL);
1616             *slot2 = ves;
1617             *opt_info->var_to_expand_tail = ves;
1618             opt_info->var_to_expand_tail = &ves->next;
1619           }
1620       }
1621     }
1622
1623   edges.release ();
1624   free (body);
1625   return opt_info;
1626 }
1627
1628 /* Called just before loop duplication.  Records start of duplicated area
1629    to OPT_INFO.  */
1630
1631 static void
1632 opt_info_start_duplication (struct opt_info *opt_info)
1633 {
1634   if (opt_info)
1635     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1636 }
1637
1638 /* Determine the number of iterations between initialization of the base
1639    variable and the current copy (N_COPY).  N_COPIES is the total number
1640    of newly created copies.  UNROLLING is true if we are unrolling
1641    (not peeling) the loop.  */
1642
1643 static unsigned
1644 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1645 {
1646   if (unrolling)
1647     {
1648       /* If we are unrolling, initialization is done in the original loop
1649          body (number 0).  */
1650       return n_copy;
1651     }
1652   else
1653     {
1654       /* If we are peeling, the copy in that the initialization occurs has
1655          number 1.  The original loop (number 0) is the last.  */
1656       if (n_copy)
1657         return n_copy - 1;
1658       else
1659         return n_copies;
1660     }
1661 }
1662
1663 /* Allocate basic variable for the induction variable chain.  */
1664
1665 static void
1666 allocate_basic_variable (struct iv_to_split *ivts)
1667 {
1668   rtx expr = SET_SRC (single_set (ivts->insn));
1669
1670   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1671 }
1672
1673 /* Insert initialization of basic variable of IVTS before INSN, taking
1674    the initial value from INSN.  */
1675
1676 static void
1677 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1678 {
1679   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1680   rtx_insn *seq;
1681
1682   start_sequence ();
1683   expr = force_operand (expr, ivts->base_var);
1684   if (expr != ivts->base_var)
1685     emit_move_insn (ivts->base_var, expr);
1686   seq = get_insns ();
1687   end_sequence ();
1688
1689   emit_insn_before (seq, insn);
1690 }
1691
1692 /* Replace the use of induction variable described in IVTS in INSN
1693    by base variable + DELTA * step.  */
1694
1695 static void
1696 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1697 {
1698   rtx expr, *loc, incr, var;
1699   rtx_insn *seq;
1700   machine_mode mode = GET_MODE (ivts->base_var);
1701   rtx src, dest, set;
1702
1703   /* Construct base + DELTA * step.  */
1704   if (!delta)
1705     expr = ivts->base_var;
1706   else
1707     {
1708       incr = simplify_gen_binary (MULT, mode,
1709                                   ivts->step, gen_int_mode (delta, mode));
1710       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1711                                   ivts->base_var, incr);
1712     }
1713
1714   /* Figure out where to do the replacement.  */
1715   loc = &SET_SRC (single_set (insn));
1716
1717   /* If we can make the replacement right away, we're done.  */
1718   if (validate_change (insn, loc, expr, 0))
1719     return;
1720
1721   /* Otherwise, force EXPR into a register and try again.  */
1722   start_sequence ();
1723   var = gen_reg_rtx (mode);
1724   expr = force_operand (expr, var);
1725   if (expr != var)
1726     emit_move_insn (var, expr);
1727   seq = get_insns ();
1728   end_sequence ();
1729   emit_insn_before (seq, insn);
1730
1731   if (validate_change (insn, loc, var, 0))
1732     return;
1733
1734   /* The last chance.  Try recreating the assignment in insn
1735      completely from scratch.  */
1736   set = single_set (insn);
1737   gcc_assert (set);
1738
1739   start_sequence ();
1740   *loc = var;
1741   src = copy_rtx (SET_SRC (set));
1742   dest = copy_rtx (SET_DEST (set));
1743   src = force_operand (src, dest);
1744   if (src != dest)
1745     emit_move_insn (dest, src);
1746   seq = get_insns ();
1747   end_sequence ();
1748
1749   emit_insn_before (seq, insn);
1750   delete_insn (insn);
1751 }
1752
1753
1754 /* Return one expansion of the accumulator recorded in struct VE.  */
1755
1756 static rtx
1757 get_expansion (struct var_to_expand *ve)
1758 {
1759   rtx reg;
1760
1761   if (ve->reuse_expansion == 0)
1762     reg = ve->reg;
1763   else
1764     reg = ve->var_expansions[ve->reuse_expansion - 1];
1765
1766   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1767     ve->reuse_expansion = 0;
1768   else
1769     ve->reuse_expansion++;
1770
1771   return reg;
1772 }
1773
1774
1775 /* Given INSN replace the uses of the accumulator recorded in VE
1776    with a new register.  */
1777
1778 static void
1779 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1780 {
1781   rtx new_reg, set;
1782   bool really_new_expansion = false;
1783
1784   set = single_set (insn);
1785   gcc_assert (set);
1786
1787   /* Generate a new register only if the expansion limit has not been
1788      reached.  Else reuse an already existing expansion.  */
1789   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1790     {
1791       really_new_expansion = true;
1792       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1793     }
1794   else
1795     new_reg = get_expansion (ve);
1796
1797   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1798   if (apply_change_group ())
1799     if (really_new_expansion)
1800       {
1801         ve->var_expansions.safe_push (new_reg);
1802         ve->expansion_count++;
1803       }
1804 }
1805
1806 /* Initialize the variable expansions in loop preheader.  PLACE is the
1807    loop-preheader basic block where the initialization of the
1808    expansions should take place.  The expansions are initialized with
1809    (-0) when the operation is plus or minus to honor sign zero.  This
1810    way we can prevent cases where the sign of the final result is
1811    effected by the sign of the expansion.  Here is an example to
1812    demonstrate this:
1813
1814    for (i = 0 ; i < n; i++)
1815      sum += something;
1816
1817    ==>
1818
1819    sum += something
1820    ....
1821    i = i+1;
1822    sum1 += something
1823    ....
1824    i = i+1
1825    sum2 += something;
1826    ....
1827
1828    When SUM is initialized with -zero and SOMETHING is also -zero; the
1829    final result of sum should be -zero thus the expansions sum1 and sum2
1830    should be initialized with -zero as well (otherwise we will get +zero
1831    as the final result).  */
1832
1833 static void
1834 insert_var_expansion_initialization (struct var_to_expand *ve,
1835                                      basic_block place)
1836 {
1837   rtx_insn *seq;
1838   rtx var, zero_init;
1839   unsigned i;
1840   machine_mode mode = GET_MODE (ve->reg);
1841   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1842
1843   if (ve->var_expansions.length () == 0)
1844     return;
1845
1846   start_sequence ();
1847   switch (ve->op)
1848     {
1849     case FMA:
1850       /* Note that we only accumulate FMA via the ADD operand.  */
1851     case PLUS:
1852     case MINUS:
1853       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1854         {
1855           if (honor_signed_zero_p)
1856             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1857           else
1858             zero_init = CONST0_RTX (mode);
1859           emit_move_insn (var, zero_init);
1860         }
1861       break;
1862
1863     case MULT:
1864       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1865         {
1866           zero_init = CONST1_RTX (GET_MODE (var));
1867           emit_move_insn (var, zero_init);
1868         }
1869       break;
1870
1871     default:
1872       gcc_unreachable ();
1873     }
1874
1875   seq = get_insns ();
1876   end_sequence ();
1877
1878   emit_insn_after (seq, BB_END (place));
1879 }
1880
1881 /* Combine the variable expansions at the loop exit.  PLACE is the
1882    loop exit basic block where the summation of the expansions should
1883    take place.  */
1884
1885 static void
1886 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1887 {
1888   rtx sum = ve->reg;
1889   rtx expr, var;
1890   rtx_insn *seq, *insn;
1891   unsigned i;
1892
1893   if (ve->var_expansions.length () == 0)
1894     return;
1895
1896   start_sequence ();
1897   switch (ve->op)
1898     {
1899     case FMA:
1900       /* Note that we only accumulate FMA via the ADD operand.  */
1901     case PLUS:
1902     case MINUS:
1903       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1904         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1905       break;
1906
1907     case MULT:
1908       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1909         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1910       break;
1911
1912     default:
1913       gcc_unreachable ();
1914     }
1915
1916   expr = force_operand (sum, ve->reg);
1917   if (expr != ve->reg)
1918     emit_move_insn (ve->reg, expr);
1919   seq = get_insns ();
1920   end_sequence ();
1921
1922   insn = BB_HEAD (place);
1923   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1924     insn = NEXT_INSN (insn);
1925
1926   emit_insn_after (seq, insn);
1927 }
1928
1929 /* Strip away REG_EQUAL notes for IVs we're splitting.
1930
1931    Updating REG_EQUAL notes for IVs we split is tricky: We
1932    cannot tell until after unrolling, DF-rescanning, and liveness
1933    updating, whether an EQ_USE is reached by the split IV while
1934    the IV reg is still live.  See PR55006.
1935
1936    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1937    because RTL loop-iv requires us to defer rescanning insns and
1938    any notes attached to them.  So resort to old techniques...  */
1939
1940 static void
1941 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1942 {
1943   struct iv_to_split *ivts;
1944   rtx note = find_reg_equal_equiv_note (insn);
1945   if (! note)
1946     return;
1947   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1948     if (reg_mentioned_p (ivts->orig_var, note))
1949       {
1950         remove_note (insn, note);
1951         return;
1952       }
1953 }
1954
1955 /* Apply loop optimizations in loop copies using the
1956    data which gathered during the unrolling.  Structure
1957    OPT_INFO record that data.
1958
1959    UNROLLING is true if we unrolled (not peeled) the loop.
1960    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
1961    the loop (as it should happen in complete unrolling, but not in ordinary
1962    peeling of the loop).  */
1963
1964 static void
1965 apply_opt_in_copies (struct opt_info *opt_info,
1966                      unsigned n_copies, bool unrolling,
1967                      bool rewrite_original_loop)
1968 {
1969   unsigned i, delta;
1970   basic_block bb, orig_bb;
1971   rtx_insn *insn, *orig_insn, *next;
1972   struct iv_to_split ivts_templ, *ivts;
1973   struct var_to_expand ve_templ, *ves;
1974
1975   /* Sanity check -- we need to put initialization in the original loop
1976      body.  */
1977   gcc_assert (!unrolling || rewrite_original_loop);
1978
1979   /* Allocate the basic variables (i0).  */
1980   if (opt_info->insns_to_split)
1981     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1982       allocate_basic_variable (ivts);
1983
1984   for (i = opt_info->first_new_block;
1985        i < (unsigned) last_basic_block_for_fn (cfun);
1986        i++)
1987     {
1988       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1989       orig_bb = get_bb_original (bb);
1990
1991       /* bb->aux holds position in copy sequence initialized by
1992          duplicate_loop_to_header_edge.  */
1993       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
1994                                         unrolling);
1995       bb->aux = 0;
1996       orig_insn = BB_HEAD (orig_bb);
1997       FOR_BB_INSNS_SAFE (bb, insn, next)
1998         {
1999           if (!INSN_P (insn)
2000               || (DEBUG_INSN_P (insn)
2001                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2002             continue;
2003
2004           while (!INSN_P (orig_insn)
2005                  || (DEBUG_INSN_P (orig_insn)
2006                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2007                          == LABEL_DECL)))
2008             orig_insn = NEXT_INSN (orig_insn);
2009
2010           ivts_templ.insn = orig_insn;
2011           ve_templ.insn = orig_insn;
2012
2013           /* Apply splitting iv optimization.  */
2014           if (opt_info->insns_to_split)
2015             {
2016               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2017
2018               ivts = opt_info->insns_to_split->find (&ivts_templ);
2019
2020               if (ivts)
2021                 {
2022                   gcc_assert (GET_CODE (PATTERN (insn))
2023                               == GET_CODE (PATTERN (orig_insn)));
2024
2025                   if (!delta)
2026                     insert_base_initialization (ivts, insn);
2027                   split_iv (ivts, insn, delta);
2028                 }
2029             }
2030           /* Apply variable expansion optimization.  */
2031           if (unrolling && opt_info->insns_with_var_to_expand)
2032             {
2033               ves = (struct var_to_expand *)
2034                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2035               if (ves)
2036                 {
2037                   gcc_assert (GET_CODE (PATTERN (insn))
2038                               == GET_CODE (PATTERN (orig_insn)));
2039                   expand_var_during_unrolling (ves, insn);
2040                 }
2041             }
2042           orig_insn = NEXT_INSN (orig_insn);
2043         }
2044     }
2045
2046   if (!rewrite_original_loop)
2047     return;
2048
2049   /* Initialize the variable expansions in the loop preheader
2050      and take care of combining them at the loop exit.  */
2051   if (opt_info->insns_with_var_to_expand)
2052     {
2053       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2054         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2055       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2056         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2057     }
2058
2059   /* Rewrite also the original loop body.  Find them as originals of the blocks
2060      in the last copied iteration, i.e. those that have
2061      get_bb_copy (get_bb_original (bb)) == bb.  */
2062   for (i = opt_info->first_new_block;
2063        i < (unsigned) last_basic_block_for_fn (cfun);
2064        i++)
2065     {
2066       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2067       orig_bb = get_bb_original (bb);
2068       if (get_bb_copy (orig_bb) != bb)
2069         continue;
2070
2071       delta = determine_split_iv_delta (0, n_copies, unrolling);
2072       for (orig_insn = BB_HEAD (orig_bb);
2073            orig_insn != NEXT_INSN (BB_END (bb));
2074            orig_insn = next)
2075         {
2076           next = NEXT_INSN (orig_insn);
2077
2078           if (!INSN_P (orig_insn))
2079             continue;
2080
2081           ivts_templ.insn = orig_insn;
2082           if (opt_info->insns_to_split)
2083             {
2084               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2085
2086               ivts = (struct iv_to_split *)
2087                 opt_info->insns_to_split->find (&ivts_templ);
2088               if (ivts)
2089                 {
2090                   if (!delta)
2091                     insert_base_initialization (ivts, orig_insn);
2092                   split_iv (ivts, orig_insn, delta);
2093                   continue;
2094                 }
2095             }
2096
2097         }
2098     }
2099 }
2100
2101 /* Release OPT_INFO.  */
2102
2103 static void
2104 free_opt_info (struct opt_info *opt_info)
2105 {
2106   delete opt_info->insns_to_split;
2107   opt_info->insns_to_split = NULL;
2108   if (opt_info->insns_with_var_to_expand)
2109     {
2110       struct var_to_expand *ves;
2111
2112       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2113         ves->var_expansions.release ();
2114       delete opt_info->insns_with_var_to_expand;
2115       opt_info->insns_with_var_to_expand = NULL;
2116     }
2117   free (opt_info);
2118 }