gcc/loop-unroll.c

   1 /* Loop unrolling.
   2    Copyright (C) 2002-2018 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "target.h"
  25 #include "rtl.h"
  26 #include "tree.h"
  27 #include "cfghooks.h"
  28 #include "memmodel.h"
  29 #include "optabs.h"
  30 #include "emit-rtl.h"
  31 #include "recog.h"
  32 #include "profile.h"
  33 #include "cfgrtl.h"
  34 #include "cfgloop.h"
  35 #include "params.h"
  36 #include "dojump.h"
  37 #include "expr.h"
  38 #include "dumpfile.h"
  39
  40 /* This pass performs loop unrolling.  We only perform this
  41    optimization on innermost loops (with single exception) because
  42    the impact on performance is greatest here, and we want to avoid
  43    unnecessary code size growth.  The gain is caused by greater sequentiality
  44    of code, better code to optimize for further passes and in some cases
  45    by fewer testings of exit conditions.  The main problem is code growth,
  46    that impacts performance negatively due to effect of caches.
  47
  48    What we do:
  49
  50    -- unrolling of loops that roll constant times; this is almost always
  51       win, as we get rid of exit condition tests.
  52    -- unrolling of loops that roll number of times that we can compute
  53       in runtime; we also get rid of exit condition tests here, but there
  54       is the extra expense for calculating the number of iterations
  55    -- simple unrolling of remaining loops; this is performed only if we
  56       are asked to, as the gain is questionable in this case and often
  57       it may even slow down the code
  58    For more detailed descriptions of each of those, see comments at
  59    appropriate function below.
  60
  61    There is a lot of parameters (defined and described in params.def) that
  62    control how much we unroll.
  63
  64    ??? A great problem is that we don't have a good way how to determine
  65    how many times we should unroll the loop; the experiments I have made
  66    showed that this choice may affect performance in order of several %.
  67    */
  68
  69 /* Information about induction variables to split.  */
  70
  71 struct iv_to_split
  72 {
  73   rtx_insn *insn;       /* The insn in that the induction variable occurs.  */
  74   rtx orig_var;         /* The variable (register) for the IV before split.  */
  75   rtx base_var;         /* The variable on that the values in the further
  76                            iterations are based.  */
  77   rtx step;             /* Step of the induction variable.  */
  78   struct iv_to_split *next; /* Next entry in walking order.  */
  79 };
  80
  81 /* Information about accumulators to expand.  */
  82
  83 struct var_to_expand
  84 {
  85   rtx_insn *insn;                  /* The insn in that the variable expansion occurs.  */
  86   rtx reg;                         /* The accumulator which is expanded.  */
  87   vec<rtx> var_expansions;   /* The copies of the accumulator which is expanded.  */
  88   struct var_to_expand *next;      /* Next entry in walking order.  */
  89   enum rtx_code op;                /* The type of the accumulation - addition, subtraction
  90                                       or multiplication.  */
  91   int expansion_count;             /* Count the number of expansions generated so far.  */
  92   int reuse_expansion;             /* The expansion we intend to reuse to expand
  93                                       the accumulator.  If REUSE_EXPANSION is 0 reuse
  94                                       the original accumulator.  Else use
  95                                       var_expansions[REUSE_EXPANSION - 1].  */
  96 };
  97
  98 /* Hashtable helper for iv_to_split.  */
  99
 100 struct iv_split_hasher : free_ptr_hash <iv_to_split>
 101 {
 102   static inline hashval_t hash (const iv_to_split *);
 103   static inline bool equal (const iv_to_split *, const iv_to_split *);
 104 };
 105
 106
 107 /* A hash function for information about insns to split.  */
 108
 109 inline hashval_t
 110 iv_split_hasher::hash (const iv_to_split *ivts)
 111 {
 112   return (hashval_t) INSN_UID (ivts->insn);
 113 }
 114
 115 /* An equality functions for information about insns to split.  */
 116
 117 inline bool
 118 iv_split_hasher::equal (const iv_to_split *i1, const iv_to_split *i2)
 119 {
 120   return i1->insn == i2->insn;
 121 }
 122
 123 /* Hashtable helper for iv_to_split.  */
 124
 125 struct var_expand_hasher : free_ptr_hash <var_to_expand>
 126 {
 127   static inline hashval_t hash (const var_to_expand *);
 128   static inline bool equal (const var_to_expand *, const var_to_expand *);
 129 };
 130
 131 /* Return a hash for VES.  */
 132
 133 inline hashval_t
 134 var_expand_hasher::hash (const var_to_expand *ves)
 135 {
 136   return (hashval_t) INSN_UID (ves->insn);
 137 }
 138
 139 /* Return true if I1 and I2 refer to the same instruction.  */
 140
 141 inline bool
 142 var_expand_hasher::equal (const var_to_expand *i1, const var_to_expand *i2)
 143 {
 144   return i1->insn == i2->insn;
 145 }
 146
 147 /* Information about optimization applied in
 148    the unrolled loop.  */
 149
 150 struct opt_info
 151 {
 152   hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to
 153                                                   split.  */
 154   struct iv_to_split *iv_to_split_head; /* The first iv to split.  */
 155   struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list.  */
 156   hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of
 157                                         insns with accumulators to expand.  */
 158   struct var_to_expand *var_to_expand_head; /* The first var to expand.  */
 159   struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list.  */
 160   unsigned first_new_block;        /* The first basic block that was
 161                                       duplicated.  */
 162   basic_block loop_exit;           /* The loop exit basic block.  */
 163   basic_block loop_preheader;      /* The loop preheader basic block.  */
 164 };
 165
 166 static void decide_unroll_stupid (struct loop *, int);
 167 static void decide_unroll_constant_iterations (struct loop *, int);
 168 static void decide_unroll_runtime_iterations (struct loop *, int);
 169 static void unroll_loop_stupid (struct loop *);
 170 static void decide_unrolling (int);
 171 static void unroll_loop_constant_iterations (struct loop *);
 172 static void unroll_loop_runtime_iterations (struct loop *);
 173 static struct opt_info *analyze_insns_in_loop (struct loop *);
 174 static void opt_info_start_duplication (struct opt_info *);
 175 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool);
 176 static void free_opt_info (struct opt_info *);
 177 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *);
 178 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *);
 179 static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *);
 180 static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *);
 181 static void insert_var_expansion_initialization (struct var_to_expand *,
 182                                                  basic_block);
 183 static void combine_var_copies_in_loop_exit (struct var_to_expand *,
 184                                              basic_block);
 185 static rtx get_expansion (struct var_to_expand *);
 186
 187 /* Emit a message summarizing the unroll that will be
 188    performed for LOOP, along with the loop's location LOCUS, if
 189    appropriate given the dump or -fopt-info settings.  */
 190
 191 static void
 192 report_unroll (struct loop *loop, dump_location_t locus)
 193 {
 194   dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
 195
 196   if (loop->lpt_decision.decision == LPT_NONE)
 197     return;
 198
 199   if (!dump_enabled_p ())
 200     return;
 201
 202   dump_metadata_t metadata (report_flags, locus.get_impl_location ());
 203   dump_printf_loc (metadata, locus.get_user_location (),
 204                    "loop unrolled %d times",
 205                    loop->lpt_decision.times);
 206   if (profile_info && loop->header->count.initialized_p ())
 207     dump_printf (metadata,
 208                  " (header execution count %d)",
 209                  (int)loop->header->count.to_gcov_type ());
 210
 211   dump_printf (metadata, "\n");
 212 }
 213
 214 /* Decide whether unroll loops and how much.  */
 215 static void
 216 decide_unrolling (int flags)
 217 {
 218   struct loop *loop;
 219
 220   /* Scan the loops, inner ones first.  */
 221   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 222     {
 223       loop->lpt_decision.decision = LPT_NONE;
 224       dump_user_location_t locus = get_loop_location (loop);
 225
 226       if (dump_enabled_p ())
 227         dump_printf_loc (MSG_NOTE, locus,
 228                          "considering unrolling loop %d at BB %d\n",
 229                          loop->num, loop->header->index);
 230
 231       if (loop->unroll == 1)
 232         {
 233           if (dump_file)
 234             fprintf (dump_file,
 235                      ";; Not unrolling loop, user didn't want it unrolled\n");
 236           continue;
 237         }
 238
 239       /* Do not peel cold areas.  */
 240       if (optimize_loop_for_size_p (loop))
 241         {
 242           if (dump_file)
 243             fprintf (dump_file, ";; Not considering loop, cold area\n");
 244           continue;
 245         }
 246
 247       /* Can the loop be manipulated?  */
 248       if (!can_duplicate_loop_p (loop))
 249         {
 250           if (dump_file)
 251             fprintf (dump_file,
 252                      ";; Not considering loop, cannot duplicate\n");
 253           continue;
 254         }
 255
 256       /* Skip non-innermost loops.  */
 257       if (loop->inner)
 258         {
 259           if (dump_file)
 260             fprintf (dump_file, ";; Not considering loop, is not innermost\n");
 261           continue;
 262         }
 263
 264       loop->ninsns = num_loop_insns (loop);
 265       loop->av_ninsns = average_num_loop_insns (loop);
 266
 267       /* Try transformations one by one in decreasing order of priority.  */
 268       decide_unroll_constant_iterations (loop, flags);
 269       if (loop->lpt_decision.decision == LPT_NONE)
 270         decide_unroll_runtime_iterations (loop, flags);
 271       if (loop->lpt_decision.decision == LPT_NONE)
 272         decide_unroll_stupid (loop, flags);
 273
 274       report_unroll (loop, locus);
 275     }
 276 }
 277
 278 /* Unroll LOOPS.  */
 279 void
 280 unroll_loops (int flags)
 281 {
 282   struct loop *loop;
 283   bool changed = false;
 284
 285   /* Now decide rest of unrolling.  */
 286   decide_unrolling (flags);
 287
 288   /* Scan the loops, inner ones first.  */
 289   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
 290     {
 291       /* And perform the appropriate transformations.  */
 292       switch (loop->lpt_decision.decision)
 293         {
 294         case LPT_UNROLL_CONSTANT:
 295           unroll_loop_constant_iterations (loop);
 296           changed = true;
 297           break;
 298         case LPT_UNROLL_RUNTIME:
 299           unroll_loop_runtime_iterations (loop);
 300           changed = true;
 301           break;
 302         case LPT_UNROLL_STUPID:
 303           unroll_loop_stupid (loop);
 304           changed = true;
 305           break;
 306         case LPT_NONE:
 307           break;
 308         default:
 309           gcc_unreachable ();
 310         }
 311     }
 312
 313     if (changed)
 314       {
 315         calculate_dominance_info (CDI_DOMINATORS);
 316         fix_loop_structure (NULL);
 317       }
 318
 319   iv_analysis_done ();
 320 }
 321
 322 /* Check whether exit of the LOOP is at the end of loop body.  */
 323
 324 static bool
 325 loop_exit_at_end_p (struct loop *loop)
 326 {
 327   struct niter_desc *desc = get_simple_loop_desc (loop);
 328   rtx_insn *insn;
 329
 330   /* We should never have conditional in latch block.  */
 331   gcc_assert (desc->in_edge->dest != loop->header);
 332
 333   if (desc->in_edge->dest != loop->latch)
 334     return false;
 335
 336   /* Check that the latch is empty.  */
 337   FOR_BB_INSNS (loop->latch, insn)
 338     {
 339       if (INSN_P (insn) && active_insn_p (insn))
 340         return false;
 341     }
 342
 343   return true;
 344 }
 345
 346 /* Decide whether to unroll LOOP iterating constant number of times
 347    and how much.  */
 348
 349 static void
 350 decide_unroll_constant_iterations (struct loop *loop, int flags)
 351 {
 352   unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i;
 353   struct niter_desc *desc;
 354   widest_int iterations;
 355
 356   /* If we were not asked to unroll this loop, just return back silently.  */
 357   if (!(flags & UAP_UNROLL) && !loop->unroll)
 358     return;
 359
 360   if (dump_enabled_p ())
 361     dump_printf (MSG_NOTE,
 362                  "considering unrolling loop with constant "
 363                  "number of iterations\n");
 364
 365   /* nunroll = total number of copies of the original loop body in
 366      unrolled loop (i.e. if it is 2, we have to duplicate loop body once).  */
 367   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 368   nunroll_by_av
 369     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 370   if (nunroll > nunroll_by_av)
 371     nunroll = nunroll_by_av;
 372   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 373     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 374
 375   if (targetm.loop_unroll_adjust)
 376     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 377
 378   /* Skip big loops.  */
 379   if (nunroll <= 1)
 380     {
 381       if (dump_file)
 382         fprintf (dump_file, ";; Not considering loop, is too big\n");
 383       return;
 384     }
 385
 386   /* Check for simple loops.  */
 387   desc = get_simple_loop_desc (loop);
 388
 389   /* Check number of iterations.  */
 390   if (!desc->simple_p || !desc->const_iter || desc->assumptions)
 391     {
 392       if (dump_file)
 393         fprintf (dump_file,
 394                  ";; Unable to prove that the loop iterates constant times\n");
 395       return;
 396     }
 397
 398   /* Check for an explicit unrolling factor.  */
 399   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 400     {
 401       /* However we cannot unroll completely at the RTL level a loop with
 402          constant number of iterations; it should have been peeled instead.  */
 403       if ((unsigned) loop->unroll - 1 > desc->niter - 2)
 404         {
 405           if (dump_file)
 406             fprintf (dump_file, ";; Loop should have been peeled\n");
 407         }
 408       else
 409         {
 410           loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 411           loop->lpt_decision.times = loop->unroll - 1;
 412         }
 413       return;
 414     }
 415
 416   /* Check whether the loop rolls enough to consider.
 417      Consult also loop bounds and profile; in the case the loop has more
 418      than one exit it may well loop less than determined maximal number
 419      of iterations.  */
 420   if (desc->niter < 2 * nunroll
 421       || ((get_estimated_loop_iterations (loop, &iterations)
 422            || get_likely_max_loop_iterations (loop, &iterations))
 423           && wi::ltu_p (iterations, 2 * nunroll)))
 424     {
 425       if (dump_file)
 426         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 427       return;
 428     }
 429
 430   /* Success; now compute number of iterations to unroll.  We alter
 431      nunroll so that as few as possible copies of loop body are
 432      necessary, while still not decreasing the number of unrollings
 433      too much (at most by 1).  */
 434   best_copies = 2 * nunroll + 10;
 435
 436   i = 2 * nunroll + 2;
 437   if (i > desc->niter - 2)
 438     i = desc->niter - 2;
 439
 440   for (; i >= nunroll - 1; i--)
 441     {
 442       unsigned exit_mod = desc->niter % (i + 1);
 443
 444       if (!loop_exit_at_end_p (loop))
 445         n_copies = exit_mod + i + 1;
 446       else if (exit_mod != (unsigned) i
 447                || desc->noloop_assumptions != NULL_RTX)
 448         n_copies = exit_mod + i + 2;
 449       else
 450         n_copies = i + 1;
 451
 452       if (n_copies < best_copies)
 453         {
 454           best_copies = n_copies;
 455           best_unroll = i;
 456         }
 457     }
 458
 459   loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
 460   loop->lpt_decision.times = best_unroll;
 461 }
 462
 463 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times.
 464    The transformation does this:
 465
 466    for (i = 0; i < 102; i++)
 467      body;
 468
 469    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 470
 471    i = 0;
 472    body; i++;
 473    body; i++;
 474    while (i < 102)
 475      {
 476        body; i++;
 477        body; i++;
 478        body; i++;
 479        body; i++;
 480      }
 481   */
 482 static void
 483 unroll_loop_constant_iterations (struct loop *loop)
 484 {
 485   unsigned HOST_WIDE_INT niter;
 486   unsigned exit_mod;
 487   unsigned i;
 488   edge e;
 489   unsigned max_unroll = loop->lpt_decision.times;
 490   struct niter_desc *desc = get_simple_loop_desc (loop);
 491   bool exit_at_end = loop_exit_at_end_p (loop);
 492   struct opt_info *opt_info = NULL;
 493   bool ok;
 494
 495   niter = desc->niter;
 496
 497   /* Should not get here (such loop should be peeled instead).  */
 498   gcc_assert (niter > max_unroll + 1);
 499
 500   exit_mod = niter % (max_unroll + 1);
 501
 502   auto_sbitmap wont_exit (max_unroll + 2);
 503   bitmap_ones (wont_exit);
 504
 505   auto_vec<edge> remove_edges;
 506   if (flag_split_ivs_in_unroller
 507       || flag_variable_expansion_in_unroller)
 508     opt_info = analyze_insns_in_loop (loop);
 509
 510   if (!exit_at_end)
 511     {
 512       /* The exit is not at the end of the loop; leave exit test
 513          in the first copy, so that the loops that start with test
 514          of exit condition have continuous body after unrolling.  */
 515
 516       if (dump_file)
 517         fprintf (dump_file, ";; Condition at beginning of loop.\n");
 518
 519       /* Peel exit_mod iterations.  */
 520       bitmap_clear_bit (wont_exit, 0);
 521       if (desc->noloop_assumptions)
 522         bitmap_clear_bit (wont_exit, 1);
 523
 524       if (exit_mod)
 525         {
 526           opt_info_start_duplication (opt_info);
 527           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 528                                               exit_mod,
 529                                               wont_exit, desc->out_edge,
 530                                               &remove_edges,
 531                                               DLTHE_FLAG_UPDATE_FREQ
 532                                               | (opt_info && exit_mod > 1
 533                                                  ? DLTHE_RECORD_COPY_NUMBER
 534                                                    : 0));
 535           gcc_assert (ok);
 536
 537           if (opt_info && exit_mod > 1)
 538             apply_opt_in_copies (opt_info, exit_mod, false, false);
 539
 540           desc->noloop_assumptions = NULL_RTX;
 541           desc->niter -= exit_mod;
 542           loop->nb_iterations_upper_bound -= exit_mod;
 543           if (loop->any_estimate
 544               && wi::leu_p (exit_mod, loop->nb_iterations_estimate))
 545             loop->nb_iterations_estimate -= exit_mod;
 546           else
 547             loop->any_estimate = false;
 548           if (loop->any_likely_upper_bound
 549               && wi::leu_p (exit_mod, loop->nb_iterations_likely_upper_bound))
 550             loop->nb_iterations_likely_upper_bound -= exit_mod;
 551           else
 552             loop->any_likely_upper_bound = false;
 553         }
 554
 555       bitmap_set_bit (wont_exit, 1);
 556     }
 557   else
 558     {
 559       /* Leave exit test in last copy, for the same reason as above if
 560          the loop tests the condition at the end of loop body.  */
 561
 562       if (dump_file)
 563         fprintf (dump_file, ";; Condition at end of loop.\n");
 564
 565       /* We know that niter >= max_unroll + 2; so we do not need to care of
 566          case when we would exit before reaching the loop.  So just peel
 567          exit_mod + 1 iterations.  */
 568       if (exit_mod != max_unroll
 569           || desc->noloop_assumptions)
 570         {
 571           bitmap_clear_bit (wont_exit, 0);
 572           if (desc->noloop_assumptions)
 573             bitmap_clear_bit (wont_exit, 1);
 574
 575           opt_info_start_duplication (opt_info);
 576           ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 577                                               exit_mod + 1,
 578                                               wont_exit, desc->out_edge,
 579                                               &remove_edges,
 580                                               DLTHE_FLAG_UPDATE_FREQ
 581                                               | (opt_info && exit_mod > 0
 582                                                  ? DLTHE_RECORD_COPY_NUMBER
 583                                                    : 0));
 584           gcc_assert (ok);
 585
 586           if (opt_info && exit_mod > 0)
 587             apply_opt_in_copies (opt_info, exit_mod + 1, false, false);
 588
 589           desc->niter -= exit_mod + 1;
 590           loop->nb_iterations_upper_bound -= exit_mod + 1;
 591           if (loop->any_estimate
 592               && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate))
 593             loop->nb_iterations_estimate -= exit_mod + 1;
 594           else
 595             loop->any_estimate = false;
 596           if (loop->any_likely_upper_bound
 597               && wi::leu_p (exit_mod + 1, loop->nb_iterations_likely_upper_bound))
 598             loop->nb_iterations_likely_upper_bound -= exit_mod + 1;
 599           else
 600             loop->any_likely_upper_bound = false;
 601           desc->noloop_assumptions = NULL_RTX;
 602
 603           bitmap_set_bit (wont_exit, 0);
 604           bitmap_set_bit (wont_exit, 1);
 605         }
 606
 607       bitmap_clear_bit (wont_exit, max_unroll);
 608     }
 609
 610   /* Now unroll the loop.  */
 611
 612   opt_info_start_duplication (opt_info);
 613   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
 614                                       max_unroll,
 615                                       wont_exit, desc->out_edge,
 616                                       &remove_edges,
 617                                       DLTHE_FLAG_UPDATE_FREQ
 618                                       | (opt_info
 619                                          ? DLTHE_RECORD_COPY_NUMBER
 620                                            : 0));
 621   gcc_assert (ok);
 622
 623   if (opt_info)
 624     {
 625       apply_opt_in_copies (opt_info, max_unroll, true, true);
 626       free_opt_info (opt_info);
 627     }
 628
 629   if (exit_at_end)
 630     {
 631       basic_block exit_block = get_bb_copy (desc->in_edge->src);
 632       /* Find a new in and out edge; they are in the last copy we have made.  */
 633
 634       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
 635         {
 636           desc->out_edge = EDGE_SUCC (exit_block, 0);
 637           desc->in_edge = EDGE_SUCC (exit_block, 1);
 638         }
 639       else
 640         {
 641           desc->out_edge = EDGE_SUCC (exit_block, 1);
 642           desc->in_edge = EDGE_SUCC (exit_block, 0);
 643         }
 644     }
 645
 646   desc->niter /= max_unroll + 1;
 647   loop->nb_iterations_upper_bound
 648     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
 649   if (loop->any_estimate)
 650     loop->nb_iterations_estimate
 651       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
 652   if (loop->any_likely_upper_bound)
 653     loop->nb_iterations_likely_upper_bound
 654       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
 655   desc->niter_expr = GEN_INT (desc->niter);
 656
 657   /* Remove the edges.  */
 658   FOR_EACH_VEC_ELT (remove_edges, i, e)
 659     remove_path (e);
 660
 661   if (dump_file)
 662     fprintf (dump_file,
 663              ";; Unrolled loop %d times, constant # of iterations %i insns\n",
 664              max_unroll, num_loop_insns (loop));
 665 }
 666
 667 /* Decide whether to unroll LOOP iterating runtime computable number of times
 668    and how much.  */
 669 static void
 670 decide_unroll_runtime_iterations (struct loop *loop, int flags)
 671 {
 672   unsigned nunroll, nunroll_by_av, i;
 673   struct niter_desc *desc;
 674   widest_int iterations;
 675
 676   /* If we were not asked to unroll this loop, just return back silently.  */
 677   if (!(flags & UAP_UNROLL) && !loop->unroll)
 678     return;
 679
 680   if (dump_enabled_p ())
 681     dump_printf (MSG_NOTE,
 682                  "considering unrolling loop with runtime-"
 683                  "computable number of iterations\n");
 684
 685   /* nunroll = total number of copies of the original loop body in
 686      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
 687   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
 688   nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
 689   if (nunroll > nunroll_by_av)
 690     nunroll = nunroll_by_av;
 691   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
 692     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
 693
 694   if (targetm.loop_unroll_adjust)
 695     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
 696
 697   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
 698     nunroll = loop->unroll;
 699
 700   /* Skip big loops.  */
 701   if (nunroll <= 1)
 702     {
 703       if (dump_file)
 704         fprintf (dump_file, ";; Not considering loop, is too big\n");
 705       return;
 706     }
 707
 708   /* Check for simple loops.  */
 709   desc = get_simple_loop_desc (loop);
 710
 711   /* Check simpleness.  */
 712   if (!desc->simple_p || desc->assumptions)
 713     {
 714       if (dump_file)
 715         fprintf (dump_file,
 716                  ";; Unable to prove that the number of iterations "
 717                  "can be counted in runtime\n");
 718       return;
 719     }
 720
 721   if (desc->const_iter)
 722     {
 723       if (dump_file)
 724         fprintf (dump_file, ";; Loop iterates constant times\n");
 725       return;
 726     }
 727
 728   /* Check whether the loop rolls.  */
 729   if ((get_estimated_loop_iterations (loop, &iterations)
 730        || get_likely_max_loop_iterations (loop, &iterations))
 731       && wi::ltu_p (iterations, 2 * nunroll))
 732     {
 733       if (dump_file)
 734         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
 735       return;
 736     }
 737
 738   /* Success; now force nunroll to be power of 2, as code-gen
 739      requires it, we are unable to cope with overflows in
 740      computation of number of iterations.  */
 741   for (i = 1; 2 * i <= nunroll; i *= 2)
 742     continue;
 743
 744   loop->lpt_decision.decision = LPT_UNROLL_RUNTIME;
 745   loop->lpt_decision.times = i - 1;
 746 }
 747
 748 /* Splits edge E and inserts the sequence of instructions INSNS on it, and
 749    returns the newly created block.  If INSNS is NULL_RTX, nothing is changed
 750    and NULL is returned instead.  */
 751
 752 basic_block
 753 split_edge_and_insert (edge e, rtx_insn *insns)
 754 {
 755   basic_block bb;
 756
 757   if (!insns)
 758     return NULL;
 759   bb = split_edge (e);
 760   emit_insn_after (insns, BB_END (bb));
 761
 762   /* ??? We used to assume that INSNS can contain control flow insns, and
 763      that we had to try to find sub basic blocks in BB to maintain a valid
 764      CFG.  For this purpose we used to set the BB_SUPERBLOCK flag on BB
 765      and call break_superblocks when going out of cfglayout mode.  But it
 766      turns out that this never happens; and that if it does ever happen,
 767      the verify_flow_info at the end of the RTL loop passes would fail.
 768
 769      There are two reasons why we expected we could have control flow insns
 770      in INSNS.  The first is when a comparison has to be done in parts, and
 771      the second is when the number of iterations is computed for loops with
 772      the number of iterations known at runtime.  In both cases, test cases
 773      to get control flow in INSNS appear to be impossible to construct:
 774
 775       * If do_compare_rtx_and_jump needs several branches to do comparison
 776         in a mode that needs comparison by parts, we cannot analyze the
 777         number of iterations of the loop, and we never get to unrolling it.
 778
 779       * The code in expand_divmod that was suspected to cause creation of
 780         branching code seems to be only accessed for signed division.  The
 781         divisions used by # of iterations analysis are always unsigned.
 782         Problems might arise on architectures that emits branching code
 783         for some operations that may appear in the unroller (especially
 784         for division), but we have no such architectures.
 785
 786      Considering all this, it was decided that we should for now assume
 787      that INSNS can in theory contain control flow insns, but in practice
 788      it never does.  So we don't handle the theoretical case, and should
 789      a real failure ever show up, we have a pretty good clue for how to
 790      fix it.  */
 791
 792   return bb;
 793 }
 794
 795 /* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if
 796    true, with probability PROB.  If CINSN is not NULL, it is the insn to copy
 797    in order to create a jump.  */
 798
 799 static rtx_insn *
 800 compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp,
 801                       rtx_code_label *label, profile_probability prob,
 802                       rtx_insn *cinsn)
 803 {
 804   rtx_insn *seq;
 805   rtx_jump_insn *jump;
 806   rtx cond;
 807   machine_mode mode;
 808
 809   mode = GET_MODE (op0);
 810   if (mode == VOIDmode)
 811     mode = GET_MODE (op1);
 812
 813   start_sequence ();
 814   if (GET_MODE_CLASS (mode) == MODE_CC)
 815     {
 816       /* A hack -- there seems to be no easy generic way how to make a
 817          conditional jump from a ccmode comparison.  */
 818       gcc_assert (cinsn);
 819       cond = XEXP (SET_SRC (pc_set (cinsn)), 0);
 820       gcc_assert (GET_CODE (cond) == comp);
 821       gcc_assert (rtx_equal_p (op0, XEXP (cond, 0)));
 822       gcc_assert (rtx_equal_p (op1, XEXP (cond, 1)));
 823       emit_jump_insn (copy_insn (PATTERN (cinsn)));
 824       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 825       JUMP_LABEL (jump) = JUMP_LABEL (cinsn);
 826       LABEL_NUSES (JUMP_LABEL (jump))++;
 827       redirect_jump (jump, label, 0);
 828     }
 829   else
 830     {
 831       gcc_assert (!cinsn);
 832
 833       op0 = force_operand (op0, NULL_RTX);
 834       op1 = force_operand (op1, NULL_RTX);
 835       do_compare_rtx_and_jump (op0, op1, comp, 0,
 836                                mode, NULL_RTX, NULL, label,
 837                                profile_probability::uninitialized ());
 838       jump = as_a <rtx_jump_insn *> (get_last_insn ());
 839       jump->set_jump_target (label);
 840       LABEL_NUSES (label)++;
 841     }
 842   if (prob.initialized_p ())
 843     add_reg_br_prob_note (jump, prob);
 844
 845   seq = get_insns ();
 846   end_sequence ();
 847
 848   return seq;
 849 }
 850
 851 /* Unroll LOOP for which we are able to count number of iterations in
 852    runtime LOOP->LPT_DECISION.TIMES times.  The times value must be a
 853    power of two.  The transformation does this (with some extra care
 854    for case n < 0):
 855
 856    for (i = 0; i < n; i++)
 857      body;
 858
 859    ==>  (LOOP->LPT_DECISION.TIMES == 3)
 860
 861    i = 0;
 862    mod = n % 4;
 863
 864    switch (mod)
 865      {
 866        case 3:
 867          body; i++;
 868        case 2:
 869          body; i++;
 870        case 1:
 871          body; i++;
 872        case 0: ;
 873      }
 874
 875    while (i < n)
 876      {
 877        body; i++;
 878        body; i++;
 879        body; i++;
 880        body; i++;
 881      }
 882    */
 883 static void
 884 unroll_loop_runtime_iterations (struct loop *loop)
 885 {
 886   rtx old_niter, niter, tmp;
 887   rtx_insn *init_code, *branch_code;
 888   unsigned i, j;
 889   profile_probability p;
 890   basic_block preheader, *body, swtch, ezc_swtch = NULL;
 891   int may_exit_copy;
 892   profile_count iter_count, new_count;
 893   unsigned n_peel;
 894   edge e;
 895   bool extra_zero_check, last_may_exit;
 896   unsigned max_unroll = loop->lpt_decision.times;
 897   struct niter_desc *desc = get_simple_loop_desc (loop);
 898   bool exit_at_end = loop_exit_at_end_p (loop);
 899   struct opt_info *opt_info = NULL;
 900   bool ok;
 901
 902   if (flag_split_ivs_in_unroller
 903       || flag_variable_expansion_in_unroller)
 904     opt_info = analyze_insns_in_loop (loop);
 905
 906   /* Remember blocks whose dominators will have to be updated.  */
 907   auto_vec<basic_block> dom_bbs;
 908
 909   body = get_loop_body (loop);
 910   for (i = 0; i < loop->num_nodes; i++)
 911     {
 912       vec<basic_block> ldom;
 913       basic_block bb;
 914
 915       ldom = get_dominated_by (CDI_DOMINATORS, body[i]);
 916       FOR_EACH_VEC_ELT (ldom, j, bb)
 917         if (!flow_bb_inside_loop_p (loop, bb))
 918           dom_bbs.safe_push (bb);
 919
 920       ldom.release ();
 921     }
 922   free (body);
 923
 924   if (!exit_at_end)
 925     {
 926       /* Leave exit in first copy (for explanation why see comment in
 927          unroll_loop_constant_iterations).  */
 928       may_exit_copy = 0;
 929       n_peel = max_unroll - 1;
 930       extra_zero_check = true;
 931       last_may_exit = false;
 932     }
 933   else
 934     {
 935       /* Leave exit in last copy (for explanation why see comment in
 936          unroll_loop_constant_iterations).  */
 937       may_exit_copy = max_unroll;
 938       n_peel = max_unroll;
 939       extra_zero_check = false;
 940       last_may_exit = true;
 941     }
 942
 943   /* Get expression for number of iterations.  */
 944   start_sequence ();
 945   old_niter = niter = gen_reg_rtx (desc->mode);
 946   tmp = force_operand (copy_rtx (desc->niter_expr), niter);
 947   if (tmp != niter)
 948     emit_move_insn (niter, tmp);
 949
 950   /* For loops that exit at end and whose number of iterations is reliable,
 951      add one to niter to account for first pass through loop body before
 952      reaching exit test. */
 953   if (exit_at_end && !desc->noloop_assumptions)
 954     {
 955       niter = expand_simple_binop (desc->mode, PLUS,
 956                                    niter, const1_rtx,
 957                                    NULL_RTX, 0, OPTAB_LIB_WIDEN);
 958       old_niter = niter;
 959     }
 960
 961   /* Count modulo by ANDing it with max_unroll; we use the fact that
 962      the number of unrollings is a power of two, and thus this is correct
 963      even if there is overflow in the computation.  */
 964   niter = expand_simple_binop (desc->mode, AND,
 965                                niter, gen_int_mode (max_unroll, desc->mode),
 966                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
 967
 968   init_code = get_insns ();
 969   end_sequence ();
 970   unshare_all_rtl_in_chain (init_code);
 971
 972   /* Precondition the loop.  */
 973   split_edge_and_insert (loop_preheader_edge (loop), init_code);
 974
 975   auto_vec<edge> remove_edges;
 976
 977   auto_sbitmap wont_exit (max_unroll + 2);
 978
 979   if (extra_zero_check || desc->noloop_assumptions)
 980     {
 981       /* Peel the first copy of loop body.  Leave the exit test if the number
 982          of iterations is not reliable.  Also record the place of the extra zero
 983          check.  */
 984       bitmap_clear (wont_exit);
 985       if (!desc->noloop_assumptions)
 986         bitmap_set_bit (wont_exit, 1);
 987       ezc_swtch = loop_preheader_edge (loop)->src;
 988       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
 989                                           1, wont_exit, desc->out_edge,
 990                                           &remove_edges,
 991                                           DLTHE_FLAG_UPDATE_FREQ);
 992       gcc_assert (ok);
 993     }
 994
 995   /* Record the place where switch will be built for preconditioning.  */
 996   swtch = split_edge (loop_preheader_edge (loop));
 997
 998   /* Compute count increments for each switch block and initialize
 999      innermost switch block.  Switch blocks and peeled loop copies are built
1000      from innermost outward.  */
1001   iter_count = new_count = swtch->count.apply_scale (1, max_unroll + 1);
1002   swtch->count = new_count;
1003
1004   for (i = 0; i < n_peel; i++)
1005     {
1006       /* Peel the copy.  */
1007       bitmap_clear (wont_exit);
1008       if (i != n_peel - 1 || !last_may_exit)
1009         bitmap_set_bit (wont_exit, 1);
1010       ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop),
1011                                           1, wont_exit, desc->out_edge,
1012                                           &remove_edges,
1013                                           DLTHE_FLAG_UPDATE_FREQ);
1014       gcc_assert (ok);
1015
1016       /* Create item for switch.  */
1017       j = n_peel - i - (extra_zero_check ? 0 : 1);
1018       p = profile_probability::always ().apply_scale (1, i + 2);
1019
1020       preheader = split_edge (loop_preheader_edge (loop));
1021       /* Add in count of edge from switch block.  */
1022       preheader->count += iter_count;
1023       branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ,
1024                                           block_label (preheader), p,
1025                                           NULL);
1026
1027       /* We rely on the fact that the compare and jump cannot be optimized out,
1028          and hence the cfg we create is correct.  */
1029       gcc_assert (branch_code != NULL_RTX);
1030
1031       swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code);
1032       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1033       single_succ_edge (swtch)->probability = p.invert ();
1034       new_count += iter_count;
1035       swtch->count = new_count;
1036       e = make_edge (swtch, preheader,
1037                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1038       e->probability = p;
1039     }
1040
1041   if (extra_zero_check)
1042     {
1043       /* Add branch for zero iterations.  */
1044       p = profile_probability::always ().apply_scale (1, max_unroll + 1);
1045       swtch = ezc_swtch;
1046       preheader = split_edge (loop_preheader_edge (loop));
1047       /* Recompute count adjustments since initial peel copy may
1048          have exited and reduced those values that were computed above.  */
1049       iter_count = swtch->count.apply_scale (1, max_unroll + 1);
1050       /* Add in count of edge from switch block.  */
1051       preheader->count += iter_count;
1052       branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ,
1053                                           block_label (preheader), p,
1054                                           NULL);
1055       gcc_assert (branch_code != NULL_RTX);
1056
1057       swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code);
1058       set_immediate_dominator (CDI_DOMINATORS, preheader, swtch);
1059       single_succ_edge (swtch)->probability = p.invert ();
1060       e = make_edge (swtch, preheader,
1061                      single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP);
1062       e->probability = p;
1063     }
1064
1065   /* Recount dominators for outer blocks.  */
1066   iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
1067
1068   /* And unroll loop.  */
1069
1070   bitmap_ones (wont_exit);
1071   bitmap_clear_bit (wont_exit, may_exit_copy);
1072   opt_info_start_duplication (opt_info);
1073
1074   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1075                                       max_unroll,
1076                                       wont_exit, desc->out_edge,
1077                                       &remove_edges,
1078                                       DLTHE_FLAG_UPDATE_FREQ
1079                                       | (opt_info
1080                                          ? DLTHE_RECORD_COPY_NUMBER
1081                                            : 0));
1082   gcc_assert (ok);
1083
1084   if (opt_info)
1085     {
1086       apply_opt_in_copies (opt_info, max_unroll, true, true);
1087       free_opt_info (opt_info);
1088     }
1089
1090   if (exit_at_end)
1091     {
1092       basic_block exit_block = get_bb_copy (desc->in_edge->src);
1093       /* Find a new in and out edge; they are in the last copy we have
1094          made.  */
1095
1096       if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest)
1097         {
1098           desc->out_edge = EDGE_SUCC (exit_block, 0);
1099           desc->in_edge = EDGE_SUCC (exit_block, 1);
1100         }
1101       else
1102         {
1103           desc->out_edge = EDGE_SUCC (exit_block, 1);
1104           desc->in_edge = EDGE_SUCC (exit_block, 0);
1105         }
1106     }
1107
1108   /* Remove the edges.  */
1109   FOR_EACH_VEC_ELT (remove_edges, i, e)
1110     remove_path (e);
1111
1112   /* We must be careful when updating the number of iterations due to
1113      preconditioning and the fact that the value must be valid at entry
1114      of the loop.  After passing through the above code, we see that
1115      the correct new number of iterations is this:  */
1116   gcc_assert (!desc->const_iter);
1117   desc->niter_expr =
1118     simplify_gen_binary (UDIV, desc->mode, old_niter,
1119                          gen_int_mode (max_unroll + 1, desc->mode));
1120   loop->nb_iterations_upper_bound
1121     = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1);
1122   if (loop->any_estimate)
1123     loop->nb_iterations_estimate
1124       = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1);
1125   if (loop->any_likely_upper_bound)
1126     loop->nb_iterations_likely_upper_bound
1127       = wi::udiv_trunc (loop->nb_iterations_likely_upper_bound, max_unroll + 1);
1128   if (exit_at_end)
1129     {
1130       desc->niter_expr =
1131         simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx);
1132       desc->noloop_assumptions = NULL_RTX;
1133       --loop->nb_iterations_upper_bound;
1134       if (loop->any_estimate
1135           && loop->nb_iterations_estimate != 0)
1136         --loop->nb_iterations_estimate;
1137       else
1138         loop->any_estimate = false;
1139       if (loop->any_likely_upper_bound
1140           && loop->nb_iterations_likely_upper_bound != 0)
1141         --loop->nb_iterations_likely_upper_bound;
1142       else
1143         loop->any_likely_upper_bound = false;
1144     }
1145
1146   if (dump_file)
1147     fprintf (dump_file,
1148              ";; Unrolled loop %d times, counting # of iterations "
1149              "in runtime, %i insns\n",
1150              max_unroll, num_loop_insns (loop));
1151 }
1152
1153 /* Decide whether to unroll LOOP stupidly and how much.  */
1154 static void
1155 decide_unroll_stupid (struct loop *loop, int flags)
1156 {
1157   unsigned nunroll, nunroll_by_av, i;
1158   struct niter_desc *desc;
1159   widest_int iterations;
1160
1161   /* If we were not asked to unroll this loop, just return back silently.  */
1162   if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
1163     return;
1164
1165   if (dump_enabled_p ())
1166     dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
1167
1168   /* nunroll = total number of copies of the original loop body in
1169      unrolled loop (i.e. if it is 2, we have to duplicate loop body once.  */
1170   nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
1171   nunroll_by_av
1172     = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
1173   if (nunroll > nunroll_by_av)
1174     nunroll = nunroll_by_av;
1175   if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
1176     nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1177
1178   if (targetm.loop_unroll_adjust)
1179     nunroll = targetm.loop_unroll_adjust (nunroll, loop);
1180
1181   if (loop->unroll > 0 && loop->unroll < USHRT_MAX)
1182     nunroll = loop->unroll;
1183
1184   /* Skip big loops.  */
1185   if (nunroll <= 1)
1186     {
1187       if (dump_file)
1188         fprintf (dump_file, ";; Not considering loop, is too big\n");
1189       return;
1190     }
1191
1192   /* Check for simple loops.  */
1193   desc = get_simple_loop_desc (loop);
1194
1195   /* Check simpleness.  */
1196   if (desc->simple_p && !desc->assumptions)
1197     {
1198       if (dump_file)
1199         fprintf (dump_file, ";; Loop is simple\n");
1200       return;
1201     }
1202
1203   /* Do not unroll loops with branches inside -- it increases number
1204      of mispredicts.
1205      TODO: this heuristic needs tunning; call inside the loop body
1206      is also relatively good reason to not unroll.  */
1207   if (num_loop_branches (loop) > 1)
1208     {
1209       if (dump_file)
1210         fprintf (dump_file, ";; Not unrolling, contains branches\n");
1211       return;
1212     }
1213
1214   /* Check whether the loop rolls.  */
1215   if ((get_estimated_loop_iterations (loop, &iterations)
1216        || get_likely_max_loop_iterations (loop, &iterations))
1217       && wi::ltu_p (iterations, 2 * nunroll))
1218     {
1219       if (dump_file)
1220         fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n");
1221       return;
1222     }
1223
1224   /* Success.  Now force nunroll to be power of 2, as it seems that this
1225      improves results (partially because of better alignments, partially
1226      because of some dark magic).  */
1227   for (i = 1; 2 * i <= nunroll; i *= 2)
1228     continue;
1229
1230   loop->lpt_decision.decision = LPT_UNROLL_STUPID;
1231   loop->lpt_decision.times = i - 1;
1232 }
1233
1234 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times.  The transformation does this:
1235
1236    while (cond)
1237      body;
1238
1239    ==>  (LOOP->LPT_DECISION.TIMES == 3)
1240
1241    while (cond)
1242      {
1243        body;
1244        if (!cond) break;
1245        body;
1246        if (!cond) break;
1247        body;
1248        if (!cond) break;
1249        body;
1250      }
1251    */
1252 static void
1253 unroll_loop_stupid (struct loop *loop)
1254 {
1255   unsigned nunroll = loop->lpt_decision.times;
1256   struct niter_desc *desc = get_simple_loop_desc (loop);
1257   struct opt_info *opt_info = NULL;
1258   bool ok;
1259
1260   if (flag_split_ivs_in_unroller
1261       || flag_variable_expansion_in_unroller)
1262     opt_info = analyze_insns_in_loop (loop);
1263
1264   auto_sbitmap wont_exit (nunroll + 1);
1265   bitmap_clear (wont_exit);
1266   opt_info_start_duplication (opt_info);
1267
1268   ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop),
1269                                       nunroll, wont_exit,
1270                                       NULL, NULL,
1271                                       DLTHE_FLAG_UPDATE_FREQ
1272                                       | (opt_info
1273                                          ? DLTHE_RECORD_COPY_NUMBER
1274                                            : 0));
1275   gcc_assert (ok);
1276
1277   if (opt_info)
1278     {
1279       apply_opt_in_copies (opt_info, nunroll, true, true);
1280       free_opt_info (opt_info);
1281     }
1282
1283   if (desc->simple_p)
1284     {
1285       /* We indeed may get here provided that there are nontrivial assumptions
1286          for a loop to be really simple.  We could update the counts, but the
1287          problem is that we are unable to decide which exit will be taken
1288          (not really true in case the number of iterations is constant,
1289          but no one will do anything with this information, so we do not
1290          worry about it).  */
1291       desc->simple_p = false;
1292     }
1293
1294   if (dump_file)
1295     fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n",
1296              nunroll, num_loop_insns (loop));
1297 }
1298
1299 /* Returns true if REG is referenced in one nondebug insn in LOOP.
1300    Set *DEBUG_USES to the number of debug insns that reference the
1301    variable.  */
1302
1303 static bool
1304 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg,
1305                                   int *debug_uses)
1306 {
1307   basic_block *body, bb;
1308   unsigned i;
1309   int count_ref = 0;
1310   rtx_insn *insn;
1311
1312   body = get_loop_body (loop);
1313   for (i = 0; i < loop->num_nodes; i++)
1314     {
1315       bb = body[i];
1316
1317       FOR_BB_INSNS (bb, insn)
1318         if (!rtx_referenced_p (reg, insn))
1319           continue;
1320         else if (DEBUG_INSN_P (insn))
1321           ++*debug_uses;
1322         else if (++count_ref > 1)
1323           break;
1324     }
1325   free (body);
1326   return (count_ref  == 1);
1327 }
1328
1329 /* Reset the DEBUG_USES debug insns in LOOP that reference REG.  */
1330
1331 static void
1332 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses)
1333 {
1334   basic_block *body, bb;
1335   unsigned i;
1336   rtx_insn *insn;
1337
1338   body = get_loop_body (loop);
1339   for (i = 0; debug_uses && i < loop->num_nodes; i++)
1340     {
1341       bb = body[i];
1342
1343       FOR_BB_INSNS (bb, insn)
1344         if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn))
1345           continue;
1346         else
1347           {
1348             validate_change (insn, &INSN_VAR_LOCATION_LOC (insn),
1349                              gen_rtx_UNKNOWN_VAR_LOC (), 0);
1350             if (!--debug_uses)
1351               break;
1352           }
1353     }
1354   free (body);
1355 }
1356
1357 /* Determine whether INSN contains an accumulator
1358    which can be expanded into separate copies,
1359    one for each copy of the LOOP body.
1360
1361    for (i = 0 ; i < n; i++)
1362      sum += a[i];
1363
1364    ==>
1365
1366    sum += a[i]
1367    ....
1368    i = i+1;
1369    sum1 += a[i]
1370    ....
1371    i = i+1
1372    sum2 += a[i];
1373    ....
1374
1375    Return NULL if INSN contains no opportunity for expansion of accumulator.
1376    Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant
1377    information and return a pointer to it.
1378 */
1379
1380 static struct var_to_expand *
1381 analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn)
1382 {
1383   rtx set, dest, src;
1384   struct var_to_expand *ves;
1385   unsigned accum_pos;
1386   enum rtx_code code;
1387   int debug_uses = 0;
1388
1389   set = single_set (insn);
1390   if (!set)
1391     return NULL;
1392
1393   dest = SET_DEST (set);
1394   src = SET_SRC (set);
1395   code = GET_CODE (src);
1396
1397   if (code != PLUS && code != MINUS && code != MULT && code != FMA)
1398     return NULL;
1399
1400   if (FLOAT_MODE_P (GET_MODE (dest)))
1401     {
1402       if (!flag_associative_math)
1403         return NULL;
1404       /* In the case of FMA, we're also changing the rounding.  */
1405       if (code == FMA && !flag_unsafe_math_optimizations)
1406         return NULL;
1407     }
1408
1409   /* Hmm, this is a bit paradoxical.  We know that INSN is a valid insn
1410      in MD.  But if there is no optab to generate the insn, we can not
1411      perform the variable expansion.  This can happen if an MD provides
1412      an insn but not a named pattern to generate it, for example to avoid
1413      producing code that needs additional mode switches like for x87/mmx.
1414
1415      So we check have_insn_for which looks for an optab for the operation
1416      in SRC.  If it doesn't exist, we can't perform the expansion even
1417      though INSN is valid.  */
1418   if (!have_insn_for (code, GET_MODE (src)))
1419     return NULL;
1420
1421   if (!REG_P (dest)
1422       && !(GET_CODE (dest) == SUBREG
1423            && REG_P (SUBREG_REG (dest))))
1424     return NULL;
1425
1426   /* Find the accumulator use within the operation.  */
1427   if (code == FMA)
1428     {
1429       /* We only support accumulation via FMA in the ADD position.  */
1430       if (!rtx_equal_p  (dest, XEXP (src, 2)))
1431         return NULL;
1432       accum_pos = 2;
1433     }
1434   else if (rtx_equal_p (dest, XEXP (src, 0)))
1435     accum_pos = 0;
1436   else if (rtx_equal_p (dest, XEXP (src, 1)))
1437     {
1438       /* The method of expansion that we are using; which includes the
1439          initialization of the expansions with zero and the summation of
1440          the expansions at the end of the computation will yield wrong
1441          results for (x = something - x) thus avoid using it in that case.  */
1442       if (code == MINUS)
1443         return NULL;
1444       accum_pos = 1;
1445     }
1446   else
1447     return NULL;
1448
1449   /* It must not otherwise be used.  */
1450   if (code == FMA)
1451     {
1452       if (rtx_referenced_p (dest, XEXP (src, 0))
1453           || rtx_referenced_p (dest, XEXP (src, 1)))
1454         return NULL;
1455     }
1456   else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos)))
1457     return NULL;
1458
1459   /* It must be used in exactly one insn.  */
1460   if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses))
1461     return NULL;
1462
1463   if (dump_file)
1464     {
1465       fprintf (dump_file, "\n;; Expanding Accumulator ");
1466       print_rtl (dump_file, dest);
1467       fprintf (dump_file, "\n");
1468     }
1469
1470   if (debug_uses)
1471     /* Instead of resetting the debug insns, we could replace each
1472        debug use in the loop with the sum or product of all expanded
1473        accumulators.  Since we'll only know of all expansions at the
1474        end, we'd have to keep track of which vars_to_expand a debug
1475        insn in the loop references, take note of each copy of the
1476        debug insn during unrolling, and when it's all done, compute
1477        the sum or product of each variable and adjust the original
1478        debug insn and each copy thereof.  What a pain!  */
1479     reset_debug_uses_in_loop (loop, dest, debug_uses);
1480
1481   /* Record the accumulator to expand.  */
1482   ves = XNEW (struct var_to_expand);
1483   ves->insn = insn;
1484   ves->reg = copy_rtx (dest);
1485   ves->var_expansions.create (1);
1486   ves->next = NULL;
1487   ves->op = GET_CODE (src);
1488   ves->expansion_count = 0;
1489   ves->reuse_expansion = 0;
1490   return ves;
1491 }
1492
1493 /* Determine whether there is an induction variable in INSN that
1494    we would like to split during unrolling.
1495
1496    I.e. replace
1497
1498    i = i + 1;
1499    ...
1500    i = i + 1;
1501    ...
1502    i = i + 1;
1503    ...
1504
1505    type chains by
1506
1507    i0 = i + 1
1508    ...
1509    i = i0 + 1
1510    ...
1511    i = i0 + 2
1512    ...
1513
1514    Return NULL if INSN contains no interesting IVs.  Otherwise, allocate
1515    an IV_TO_SPLIT structure, fill it with the relevant information and return a
1516    pointer to it.  */
1517
1518 static struct iv_to_split *
1519 analyze_iv_to_split_insn (rtx_insn *insn)
1520 {
1521   rtx set, dest;
1522   struct rtx_iv iv;
1523   struct iv_to_split *ivts;
1524   scalar_int_mode mode;
1525   bool ok;
1526
1527   /* For now we just split the basic induction variables.  Later this may be
1528      extended for example by selecting also addresses of memory references.  */
1529   set = single_set (insn);
1530   if (!set)
1531     return NULL;
1532
1533   dest = SET_DEST (set);
1534   if (!REG_P (dest) || !is_a <scalar_int_mode> (GET_MODE (dest), &mode))
1535     return NULL;
1536
1537   if (!biv_p (insn, mode, dest))
1538     return NULL;
1539
1540   ok = iv_analyze_result (insn, dest, &iv);
1541
1542   /* This used to be an assert under the assumption that if biv_p returns
1543      true that iv_analyze_result must also return true.  However, that
1544      assumption is not strictly correct as evidenced by pr25569.
1545
1546      Returning NULL when iv_analyze_result returns false is safe and
1547      avoids the problems in pr25569 until the iv_analyze_* routines
1548      can be fixed, which is apparently hard and time consuming
1549      according to their author.  */
1550   if (! ok)
1551     return NULL;
1552
1553   if (iv.step == const0_rtx
1554       || iv.mode != iv.extend_mode)
1555     return NULL;
1556
1557   /* Record the insn to split.  */
1558   ivts = XNEW (struct iv_to_split);
1559   ivts->insn = insn;
1560   ivts->orig_var = dest;
1561   ivts->base_var = NULL_RTX;
1562   ivts->step = iv.step;
1563   ivts->next = NULL;
1564
1565   return ivts;
1566 }
1567
1568 /* Determines which of insns in LOOP can be optimized.
1569    Return a OPT_INFO struct with the relevant hash tables filled
1570    with all insns to be optimized.  The FIRST_NEW_BLOCK field
1571    is undefined for the return value.  */
1572
1573 static struct opt_info *
1574 analyze_insns_in_loop (struct loop *loop)
1575 {
1576   basic_block *body, bb;
1577   unsigned i;
1578   struct opt_info *opt_info = XCNEW (struct opt_info);
1579   rtx_insn *insn;
1580   struct iv_to_split *ivts = NULL;
1581   struct var_to_expand *ves = NULL;
1582   iv_to_split **slot1;
1583   var_to_expand **slot2;
1584   vec<edge> edges = get_loop_exit_edges (loop);
1585   edge exit;
1586   bool can_apply = false;
1587
1588   iv_analysis_loop_init (loop);
1589
1590   body = get_loop_body (loop);
1591
1592   if (flag_split_ivs_in_unroller)
1593     {
1594       opt_info->insns_to_split
1595         = new hash_table<iv_split_hasher> (5 * loop->num_nodes);
1596       opt_info->iv_to_split_head = NULL;
1597       opt_info->iv_to_split_tail = &opt_info->iv_to_split_head;
1598     }
1599
1600   /* Record the loop exit bb and loop preheader before the unrolling.  */
1601   opt_info->loop_preheader = loop_preheader_edge (loop)->src;
1602
1603   if (edges.length () == 1)
1604     {
1605       exit = edges[0];
1606       if (!(exit->flags & EDGE_COMPLEX))
1607         {
1608           opt_info->loop_exit = split_edge (exit);
1609           can_apply = true;
1610         }
1611     }
1612
1613   if (flag_variable_expansion_in_unroller
1614       && can_apply)
1615     {
1616       opt_info->insns_with_var_to_expand
1617         = new hash_table<var_expand_hasher> (5 * loop->num_nodes);
1618       opt_info->var_to_expand_head = NULL;
1619       opt_info->var_to_expand_tail = &opt_info->var_to_expand_head;
1620     }
1621
1622   for (i = 0; i < loop->num_nodes; i++)
1623     {
1624       bb = body[i];
1625       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1626         continue;
1627
1628       FOR_BB_INSNS (bb, insn)
1629       {
1630         if (!INSN_P (insn))
1631           continue;
1632
1633         if (opt_info->insns_to_split)
1634           ivts = analyze_iv_to_split_insn (insn);
1635
1636         if (ivts)
1637           {
1638             slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT);
1639             gcc_assert (*slot1 == NULL);
1640             *slot1 = ivts;
1641             *opt_info->iv_to_split_tail = ivts;
1642             opt_info->iv_to_split_tail = &ivts->next;
1643             continue;
1644           }
1645
1646         if (opt_info->insns_with_var_to_expand)
1647           ves = analyze_insn_to_expand_var (loop, insn);
1648
1649         if (ves)
1650           {
1651             slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT);
1652             gcc_assert (*slot2 == NULL);
1653             *slot2 = ves;
1654             *opt_info->var_to_expand_tail = ves;
1655             opt_info->var_to_expand_tail = &ves->next;
1656           }
1657       }
1658     }
1659
1660   edges.release ();
1661   free (body);
1662   return opt_info;
1663 }
1664
1665 /* Called just before loop duplication.  Records start of duplicated area
1666    to OPT_INFO.  */
1667
1668 static void
1669 opt_info_start_duplication (struct opt_info *opt_info)
1670 {
1671   if (opt_info)
1672     opt_info->first_new_block = last_basic_block_for_fn (cfun);
1673 }
1674
1675 /* Determine the number of iterations between initialization of the base
1676    variable and the current copy (N_COPY).  N_COPIES is the total number
1677    of newly created copies.  UNROLLING is true if we are unrolling
1678    (not peeling) the loop.  */
1679
1680 static unsigned
1681 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling)
1682 {
1683   if (unrolling)
1684     {
1685       /* If we are unrolling, initialization is done in the original loop
1686          body (number 0).  */
1687       return n_copy;
1688     }
1689   else
1690     {
1691       /* If we are peeling, the copy in that the initialization occurs has
1692          number 1.  The original loop (number 0) is the last.  */
1693       if (n_copy)
1694         return n_copy - 1;
1695       else
1696         return n_copies;
1697     }
1698 }
1699
1700 /* Allocate basic variable for the induction variable chain.  */
1701
1702 static void
1703 allocate_basic_variable (struct iv_to_split *ivts)
1704 {
1705   rtx expr = SET_SRC (single_set (ivts->insn));
1706
1707   ivts->base_var = gen_reg_rtx (GET_MODE (expr));
1708 }
1709
1710 /* Insert initialization of basic variable of IVTS before INSN, taking
1711    the initial value from INSN.  */
1712
1713 static void
1714 insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn)
1715 {
1716   rtx expr = copy_rtx (SET_SRC (single_set (insn)));
1717   rtx_insn *seq;
1718
1719   start_sequence ();
1720   expr = force_operand (expr, ivts->base_var);
1721   if (expr != ivts->base_var)
1722     emit_move_insn (ivts->base_var, expr);
1723   seq = get_insns ();
1724   end_sequence ();
1725
1726   emit_insn_before (seq, insn);
1727 }
1728
1729 /* Replace the use of induction variable described in IVTS in INSN
1730    by base variable + DELTA * step.  */
1731
1732 static void
1733 split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta)
1734 {
1735   rtx expr, *loc, incr, var;
1736   rtx_insn *seq;
1737   machine_mode mode = GET_MODE (ivts->base_var);
1738   rtx src, dest, set;
1739
1740   /* Construct base + DELTA * step.  */
1741   if (!delta)
1742     expr = ivts->base_var;
1743   else
1744     {
1745       incr = simplify_gen_binary (MULT, mode,
1746                                   copy_rtx (ivts->step),
1747                                   gen_int_mode (delta, mode));
1748       expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var),
1749                                   ivts->base_var, incr);
1750     }
1751
1752   /* Figure out where to do the replacement.  */
1753   loc = &SET_SRC (single_set (insn));
1754
1755   /* If we can make the replacement right away, we're done.  */
1756   if (validate_change (insn, loc, expr, 0))
1757     return;
1758
1759   /* Otherwise, force EXPR into a register and try again.  */
1760   start_sequence ();
1761   var = gen_reg_rtx (mode);
1762   expr = force_operand (expr, var);
1763   if (expr != var)
1764     emit_move_insn (var, expr);
1765   seq = get_insns ();
1766   end_sequence ();
1767   emit_insn_before (seq, insn);
1768
1769   if (validate_change (insn, loc, var, 0))
1770     return;
1771
1772   /* The last chance.  Try recreating the assignment in insn
1773      completely from scratch.  */
1774   set = single_set (insn);
1775   gcc_assert (set);
1776
1777   start_sequence ();
1778   *loc = var;
1779   src = copy_rtx (SET_SRC (set));
1780   dest = copy_rtx (SET_DEST (set));
1781   src = force_operand (src, dest);
1782   if (src != dest)
1783     emit_move_insn (dest, src);
1784   seq = get_insns ();
1785   end_sequence ();
1786
1787   emit_insn_before (seq, insn);
1788   delete_insn (insn);
1789 }
1790
1791
1792 /* Return one expansion of the accumulator recorded in struct VE.  */
1793
1794 static rtx
1795 get_expansion (struct var_to_expand *ve)
1796 {
1797   rtx reg;
1798
1799   if (ve->reuse_expansion == 0)
1800     reg = ve->reg;
1801   else
1802     reg = ve->var_expansions[ve->reuse_expansion - 1];
1803
1804   if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion)
1805     ve->reuse_expansion = 0;
1806   else
1807     ve->reuse_expansion++;
1808
1809   return reg;
1810 }
1811
1812
1813 /* Given INSN replace the uses of the accumulator recorded in VE
1814    with a new register.  */
1815
1816 static void
1817 expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn)
1818 {
1819   rtx new_reg, set;
1820   bool really_new_expansion = false;
1821
1822   set = single_set (insn);
1823   gcc_assert (set);
1824
1825   /* Generate a new register only if the expansion limit has not been
1826      reached.  Else reuse an already existing expansion.  */
1827   if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count)
1828     {
1829       really_new_expansion = true;
1830       new_reg = gen_reg_rtx (GET_MODE (ve->reg));
1831     }
1832   else
1833     new_reg = get_expansion (ve);
1834
1835   validate_replace_rtx_group (SET_DEST (set), new_reg, insn);
1836   if (apply_change_group ())
1837     if (really_new_expansion)
1838       {
1839         ve->var_expansions.safe_push (new_reg);
1840         ve->expansion_count++;
1841       }
1842 }
1843
1844 /* Initialize the variable expansions in loop preheader.  PLACE is the
1845    loop-preheader basic block where the initialization of the
1846    expansions should take place.  The expansions are initialized with
1847    (-0) when the operation is plus or minus to honor sign zero.  This
1848    way we can prevent cases where the sign of the final result is
1849    effected by the sign of the expansion.  Here is an example to
1850    demonstrate this:
1851
1852    for (i = 0 ; i < n; i++)
1853      sum += something;
1854
1855    ==>
1856
1857    sum += something
1858    ....
1859    i = i+1;
1860    sum1 += something
1861    ....
1862    i = i+1
1863    sum2 += something;
1864    ....
1865
1866    When SUM is initialized with -zero and SOMETHING is also -zero; the
1867    final result of sum should be -zero thus the expansions sum1 and sum2
1868    should be initialized with -zero as well (otherwise we will get +zero
1869    as the final result).  */
1870
1871 static void
1872 insert_var_expansion_initialization (struct var_to_expand *ve,
1873                                      basic_block place)
1874 {
1875   rtx_insn *seq;
1876   rtx var, zero_init;
1877   unsigned i;
1878   machine_mode mode = GET_MODE (ve->reg);
1879   bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode);
1880
1881   if (ve->var_expansions.length () == 0)
1882     return;
1883
1884   start_sequence ();
1885   switch (ve->op)
1886     {
1887     case FMA:
1888       /* Note that we only accumulate FMA via the ADD operand.  */
1889     case PLUS:
1890     case MINUS:
1891       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1892         {
1893           if (honor_signed_zero_p)
1894             zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode);
1895           else
1896             zero_init = CONST0_RTX (mode);
1897           emit_move_insn (var, zero_init);
1898         }
1899       break;
1900
1901     case MULT:
1902       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1903         {
1904           zero_init = CONST1_RTX (GET_MODE (var));
1905           emit_move_insn (var, zero_init);
1906         }
1907       break;
1908
1909     default:
1910       gcc_unreachable ();
1911     }
1912
1913   seq = get_insns ();
1914   end_sequence ();
1915
1916   emit_insn_after (seq, BB_END (place));
1917 }
1918
1919 /* Combine the variable expansions at the loop exit.  PLACE is the
1920    loop exit basic block where the summation of the expansions should
1921    take place.  */
1922
1923 static void
1924 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place)
1925 {
1926   rtx sum = ve->reg;
1927   rtx expr, var;
1928   rtx_insn *seq, *insn;
1929   unsigned i;
1930
1931   if (ve->var_expansions.length () == 0)
1932     return;
1933
1934   /* ve->reg might be SUBREG or some other non-shareable RTL, and we use
1935      it both here and as the destination of the assignment.  */
1936   sum = copy_rtx (sum);
1937   start_sequence ();
1938   switch (ve->op)
1939     {
1940     case FMA:
1941       /* Note that we only accumulate FMA via the ADD operand.  */
1942     case PLUS:
1943     case MINUS:
1944       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1945         sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum);
1946       break;
1947
1948     case MULT:
1949       FOR_EACH_VEC_ELT (ve->var_expansions, i, var)
1950         sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum);
1951       break;
1952
1953     default:
1954       gcc_unreachable ();
1955     }
1956
1957   expr = force_operand (sum, ve->reg);
1958   if (expr != ve->reg)
1959     emit_move_insn (ve->reg, expr);
1960   seq = get_insns ();
1961   end_sequence ();
1962
1963   insn = BB_HEAD (place);
1964   while (!NOTE_INSN_BASIC_BLOCK_P (insn))
1965     insn = NEXT_INSN (insn);
1966
1967   emit_insn_after (seq, insn);
1968 }
1969
1970 /* Strip away REG_EQUAL notes for IVs we're splitting.
1971
1972    Updating REG_EQUAL notes for IVs we split is tricky: We
1973    cannot tell until after unrolling, DF-rescanning, and liveness
1974    updating, whether an EQ_USE is reached by the split IV while
1975    the IV reg is still live.  See PR55006.
1976
1977    ??? We cannot use remove_reg_equal_equiv_notes_for_regno,
1978    because RTL loop-iv requires us to defer rescanning insns and
1979    any notes attached to them.  So resort to old techniques...  */
1980
1981 static void
1982 maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn)
1983 {
1984   struct iv_to_split *ivts;
1985   rtx note = find_reg_equal_equiv_note (insn);
1986   if (! note)
1987     return;
1988   for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
1989     if (reg_mentioned_p (ivts->orig_var, note))
1990       {
1991         remove_note (insn, note);
1992         return;
1993       }
1994 }
1995
1996 /* Apply loop optimizations in loop copies using the
1997    data which gathered during the unrolling.  Structure
1998    OPT_INFO record that data.
1999
2000    UNROLLING is true if we unrolled (not peeled) the loop.
2001    REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of
2002    the loop (as it should happen in complete unrolling, but not in ordinary
2003    peeling of the loop).  */
2004
2005 static void
2006 apply_opt_in_copies (struct opt_info *opt_info,
2007                      unsigned n_copies, bool unrolling,
2008                      bool rewrite_original_loop)
2009 {
2010   unsigned i, delta;
2011   basic_block bb, orig_bb;
2012   rtx_insn *insn, *orig_insn, *next;
2013   struct iv_to_split ivts_templ, *ivts;
2014   struct var_to_expand ve_templ, *ves;
2015
2016   /* Sanity check -- we need to put initialization in the original loop
2017      body.  */
2018   gcc_assert (!unrolling || rewrite_original_loop);
2019
2020   /* Allocate the basic variables (i0).  */
2021   if (opt_info->insns_to_split)
2022     for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next)
2023       allocate_basic_variable (ivts);
2024
2025   for (i = opt_info->first_new_block;
2026        i < (unsigned) last_basic_block_for_fn (cfun);
2027        i++)
2028     {
2029       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2030       orig_bb = get_bb_original (bb);
2031
2032       /* bb->aux holds position in copy sequence initialized by
2033          duplicate_loop_to_header_edge.  */
2034       delta = determine_split_iv_delta ((size_t)bb->aux, n_copies,
2035                                         unrolling);
2036       bb->aux = 0;
2037       orig_insn = BB_HEAD (orig_bb);
2038       FOR_BB_INSNS_SAFE (bb, insn, next)
2039         {
2040           if (!INSN_P (insn)
2041               || (DEBUG_BIND_INSN_P (insn)
2042                   && INSN_VAR_LOCATION_DECL (insn)
2043                   && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL))
2044             continue;
2045
2046           while (!INSN_P (orig_insn)
2047                  || (DEBUG_BIND_INSN_P (orig_insn)
2048                      && INSN_VAR_LOCATION_DECL (orig_insn)
2049                      && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn))
2050                          == LABEL_DECL)))
2051             orig_insn = NEXT_INSN (orig_insn);
2052
2053           ivts_templ.insn = orig_insn;
2054           ve_templ.insn = orig_insn;
2055
2056           /* Apply splitting iv optimization.  */
2057           if (opt_info->insns_to_split)
2058             {
2059               maybe_strip_eq_note_for_split_iv (opt_info, insn);
2060
2061               ivts = opt_info->insns_to_split->find (&ivts_templ);
2062
2063               if (ivts)
2064                 {
2065                   gcc_assert (GET_CODE (PATTERN (insn))
2066                               == GET_CODE (PATTERN (orig_insn)));
2067
2068                   if (!delta)
2069                     insert_base_initialization (ivts, insn);
2070                   split_iv (ivts, insn, delta);
2071                 }
2072             }
2073           /* Apply variable expansion optimization.  */
2074           if (unrolling && opt_info->insns_with_var_to_expand)
2075             {
2076               ves = (struct var_to_expand *)
2077                 opt_info->insns_with_var_to_expand->find (&ve_templ);
2078               if (ves)
2079                 {
2080                   gcc_assert (GET_CODE (PATTERN (insn))
2081                               == GET_CODE (PATTERN (orig_insn)));
2082                   expand_var_during_unrolling (ves, insn);
2083                 }
2084             }
2085           orig_insn = NEXT_INSN (orig_insn);
2086         }
2087     }
2088
2089   if (!rewrite_original_loop)
2090     return;
2091
2092   /* Initialize the variable expansions in the loop preheader
2093      and take care of combining them at the loop exit.  */
2094   if (opt_info->insns_with_var_to_expand)
2095     {
2096       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2097         insert_var_expansion_initialization (ves, opt_info->loop_preheader);
2098       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2099         combine_var_copies_in_loop_exit (ves, opt_info->loop_exit);
2100     }
2101
2102   /* Rewrite also the original loop body.  Find them as originals of the blocks
2103      in the last copied iteration, i.e. those that have
2104      get_bb_copy (get_bb_original (bb)) == bb.  */
2105   for (i = opt_info->first_new_block;
2106        i < (unsigned) last_basic_block_for_fn (cfun);
2107        i++)
2108     {
2109       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2110       orig_bb = get_bb_original (bb);
2111       if (get_bb_copy (orig_bb) != bb)
2112         continue;
2113
2114       delta = determine_split_iv_delta (0, n_copies, unrolling);
2115       for (orig_insn = BB_HEAD (orig_bb);
2116            orig_insn != NEXT_INSN (BB_END (bb));
2117            orig_insn = next)
2118         {
2119           next = NEXT_INSN (orig_insn);
2120
2121           if (!INSN_P (orig_insn))
2122             continue;
2123
2124           ivts_templ.insn = orig_insn;
2125           if (opt_info->insns_to_split)
2126             {
2127               maybe_strip_eq_note_for_split_iv (opt_info, orig_insn);
2128
2129               ivts = (struct iv_to_split *)
2130                 opt_info->insns_to_split->find (&ivts_templ);
2131               if (ivts)
2132                 {
2133                   if (!delta)
2134                     insert_base_initialization (ivts, orig_insn);
2135                   split_iv (ivts, orig_insn, delta);
2136                   continue;
2137                 }
2138             }
2139
2140         }
2141     }
2142 }
2143
2144 /* Release OPT_INFO.  */
2145
2146 static void
2147 free_opt_info (struct opt_info *opt_info)
2148 {
2149   delete opt_info->insns_to_split;
2150   opt_info->insns_to_split = NULL;
2151   if (opt_info->insns_with_var_to_expand)
2152     {
2153       struct var_to_expand *ves;
2154
2155       for (ves = opt_info->var_to_expand_head; ves; ves = ves->next)
2156         ves->var_expansions.release ();
2157       delete opt_info->insns_with_var_to_expand;
2158       opt_info->insns_with_var_to_expand = NULL;
2159     }
2160   free (opt_info);
2161 }