gcc/loop-doloop.c

   1 /* Perform doloop optimizations
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3    Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "rtl.h"
  26 #include "flags.h"
  27 #include "symtab.h"
  28 #include "hashtab.h"
  29 #include "hash-set.h"
  30 #include "vec.h"
  31 #include "machmode.h"
  32 #include "hard-reg-set.h"
  33 #include "input.h"
  34 #include "function.h"
  35 #include "statistics.h"
  36 #include "double-int.h"
  37 #include "real.h"
  38 #include "fixed-value.h"
  39 #include "alias.h"
  40 #include "wide-int.h"
  41 #include "inchash.h"
  42 #include "tree.h"
  43 #include "insn-config.h"
  44 #include "expmed.h"
  45 #include "dojump.h"
  46 #include "explow.h"
  47 #include "calls.h"
  48 #include "emit-rtl.h"
  49 #include "varasm.h"
  50 #include "stmt.h"
  51 #include "expr.h"
  52 #include "diagnostic-core.h"
  53 #include "tm_p.h"
  54 #include "predict.h"
  55 #include "dominance.h"
  56 #include "cfg.h"
  57 #include "cfgloop.h"
  58 #include "cfgrtl.h"
  59 #include "basic-block.h"
  60 #include "params.h"
  61 #include "target.h"
  62 #include "dumpfile.h"
  63 #include "loop-unroll.h"
  64
  65 /* This module is used to modify loops with a determinable number of
  66    iterations to use special low-overhead looping instructions.
  67
  68    It first validates whether the loop is well behaved and has a
  69    determinable number of iterations (either at compile or run-time).
  70    It then modifies the loop to use a low-overhead looping pattern as
  71    follows:
  72
  73    1. A pseudo register is allocated as the loop iteration counter.
  74
  75    2. The number of loop iterations is calculated and is stored
  76       in the loop counter.
  77
  78    3. At the end of the loop, the jump insn is replaced by the
  79       doloop_end pattern.  The compare must remain because it might be
  80       used elsewhere.  If the loop-variable or condition register are
  81       used elsewhere, they will be eliminated by flow.
  82
  83    4. An optional doloop_begin pattern is inserted at the top of the
  84       loop.
  85
  86    TODO The optimization should only performed when either the biv used for exit
  87    condition is unused at all except for the exit test, or if we do not have to
  88    change its value, since otherwise we have to add a new induction variable,
  89    which usually will not pay up (unless the cost of the doloop pattern is
  90    somehow extremely lower than the cost of compare & jump, or unless the bct
  91    register cannot be used for anything else but doloop -- ??? detect these
  92    cases).  */
  93
  94 #ifdef HAVE_doloop_end
  95
  96 /* Return the loop termination condition for PATTERN or zero
  97    if it is not a decrement and branch jump insn.  */
  98
  99 rtx
 100 doloop_condition_get (rtx doloop_pat)
 101 {
 102   rtx cmp;
 103   rtx inc;
 104   rtx reg;
 105   rtx inc_src;
 106   rtx condition;
 107   rtx pattern;
 108   rtx cc_reg = NULL_RTX;
 109   rtx reg_orig = NULL_RTX;
 110
 111   /* The canonical doloop pattern we expect has one of the following
 112      forms:
 113
 114      1)  (parallel [(set (pc) (if_then_else (condition)
 115                                             (label_ref (label))
 116                                             (pc)))
 117                      (set (reg) (plus (reg) (const_int -1)))
 118                      (additional clobbers and uses)])
 119
 120      The branch must be the first entry of the parallel (also required
 121      by jump.c), and the second entry of the parallel must be a set of
 122      the loop counter register.  Some targets (IA-64) wrap the set of
 123      the loop counter in an if_then_else too.
 124
 125      2)  (set (reg) (plus (reg) (const_int -1))
 126          (set (pc) (if_then_else (reg != 0)
 127                                  (label_ref (label))
 128                                  (pc))).
 129
 130      Some targets (ARM) do the comparison before the branch, as in the
 131      following form:
 132
 133      3) (parallel [(set (cc) (compare ((plus (reg) (const_int -1), 0)))
 134                    (set (reg) (plus (reg) (const_int -1)))])
 135         (set (pc) (if_then_else (cc == NE)
 136                                 (label_ref (label))
 137                                 (pc))) */
 138
 139   pattern = PATTERN (doloop_pat);
 140
 141   if (GET_CODE (pattern) != PARALLEL)
 142     {
 143       rtx cond;
 144       rtx_insn *prev_insn = prev_nondebug_insn (doloop_pat);
 145       rtx cmp_arg1, cmp_arg2;
 146       rtx cmp_orig;
 147
 148       /* In case the pattern is not PARALLEL we expect two forms
 149          of doloop which are cases 2) and 3) above: in case 2) the
 150          decrement immediately precedes the branch, while in case 3)
 151          the compare and decrement instructions immediately precede
 152          the branch.  */
 153
 154       if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
 155         return 0;
 156
 157       cmp = pattern;
 158       if (GET_CODE (PATTERN (prev_insn)) == PARALLEL)
 159         {
 160           /* The third case: the compare and decrement instructions
 161              immediately precede the branch.  */
 162           cmp_orig = XVECEXP (PATTERN (prev_insn), 0, 0);
 163           if (GET_CODE (cmp_orig) != SET)
 164             return 0;
 165           if (GET_CODE (SET_SRC (cmp_orig)) != COMPARE)
 166             return 0;
 167           cmp_arg1 = XEXP (SET_SRC (cmp_orig), 0);
 168           cmp_arg2 = XEXP (SET_SRC (cmp_orig), 1);
 169           if (cmp_arg2 != const0_rtx
 170               || GET_CODE (cmp_arg1) != PLUS)
 171             return 0;
 172           reg_orig = XEXP (cmp_arg1, 0);
 173           if (XEXP (cmp_arg1, 1) != GEN_INT (-1)
 174               || !REG_P (reg_orig))
 175             return 0;
 176           cc_reg = SET_DEST (cmp_orig);
 177
 178           inc = XVECEXP (PATTERN (prev_insn), 0, 1);
 179         }
 180       else
 181         inc = PATTERN (prev_insn);
 182       /* We expect the condition to be of the form (reg != 0)  */
 183       cond = XEXP (SET_SRC (cmp), 0);
 184       if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
 185         return 0;
 186     }
 187   else
 188     {
 189       cmp = XVECEXP (pattern, 0, 0);
 190       inc = XVECEXP (pattern, 0, 1);
 191     }
 192
 193   /* Check for (set (reg) (something)).  */
 194   if (GET_CODE (inc) != SET)
 195     return 0;
 196   reg = SET_DEST (inc);
 197   if (! REG_P (reg))
 198     return 0;
 199
 200   /* Check if something = (plus (reg) (const_int -1)).
 201      On IA-64, this decrement is wrapped in an if_then_else.  */
 202   inc_src = SET_SRC (inc);
 203   if (GET_CODE (inc_src) == IF_THEN_ELSE)
 204     inc_src = XEXP (inc_src, 1);
 205   if (GET_CODE (inc_src) != PLUS
 206       || XEXP (inc_src, 0) != reg
 207       || XEXP (inc_src, 1) != constm1_rtx)
 208     return 0;
 209
 210   /* Check for (set (pc) (if_then_else (condition)
 211                                        (label_ref (label))
 212                                        (pc))).  */
 213   if (GET_CODE (cmp) != SET
 214       || SET_DEST (cmp) != pc_rtx
 215       || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
 216       || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
 217       || XEXP (SET_SRC (cmp), 2) != pc_rtx)
 218     return 0;
 219
 220   /* Extract loop termination condition.  */
 221   condition = XEXP (SET_SRC (cmp), 0);
 222
 223   /* We expect a GE or NE comparison with 0 or 1.  */
 224   if ((GET_CODE (condition) != GE
 225        && GET_CODE (condition) != NE)
 226       || (XEXP (condition, 1) != const0_rtx
 227           && XEXP (condition, 1) != const1_rtx))
 228     return 0;
 229
 230   if ((XEXP (condition, 0) == reg)
 231       /* For the third case:  */
 232       || ((cc_reg != NULL_RTX)
 233           && (XEXP (condition, 0) == cc_reg)
 234           && (reg_orig == reg))
 235       || (GET_CODE (XEXP (condition, 0)) == PLUS
 236           && XEXP (XEXP (condition, 0), 0) == reg))
 237    {
 238      if (GET_CODE (pattern) != PARALLEL)
 239      /*  For the second form we expect:
 240
 241          (set (reg) (plus (reg) (const_int -1))
 242          (set (pc) (if_then_else (reg != 0)
 243                                  (label_ref (label))
 244                                  (pc))).
 245
 246          is equivalent to the following:
 247
 248          (parallel [(set (pc) (if_then_else (reg != 1)
 249                                             (label_ref (label))
 250                                             (pc)))
 251                      (set (reg) (plus (reg) (const_int -1)))
 252                      (additional clobbers and uses)])
 253
 254         For the third form we expect:
 255
 256         (parallel [(set (cc) (compare ((plus (reg) (const_int -1)), 0))
 257                    (set (reg) (plus (reg) (const_int -1)))])
 258         (set (pc) (if_then_else (cc == NE)
 259                                 (label_ref (label))
 260                                 (pc)))
 261
 262         which is equivalent to the following:
 263
 264         (parallel [(set (cc) (compare (reg,  1))
 265                    (set (reg) (plus (reg) (const_int -1)))
 266                    (set (pc) (if_then_else (NE == cc)
 267                                            (label_ref (label))
 268                                            (pc))))])
 269
 270         So we return the second form instead for the two cases.
 271
 272      */
 273         condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
 274
 275     return condition;
 276    }
 277
 278   /* ??? If a machine uses a funny comparison, we could return a
 279      canonicalized form here.  */
 280
 281   return 0;
 282 }
 283
 284 /* Return nonzero if the loop specified by LOOP is suitable for
 285    the use of special low-overhead looping instructions.  DESC
 286    describes the number of iterations of the loop.  */
 287
 288 static bool
 289 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 290 {
 291   basic_block *body = get_loop_body (loop), bb;
 292   rtx_insn *insn;
 293   unsigned i;
 294   bool result = true;
 295
 296   /* Check for loops that may not terminate under special conditions.  */
 297   if (!desc->simple_p
 298       || desc->assumptions
 299       || desc->infinite)
 300     {
 301       /* There are some cases that would require a special attention.
 302          For example if the comparison is LEU and the comparison value
 303          is UINT_MAX then the loop will not terminate.  Similarly, if the
 304          comparison code is GEU and the comparison value is 0, the
 305          loop will not terminate.
 306
 307          If the absolute increment is not 1, the loop can be infinite
 308          even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
 309
 310          ??? We could compute these conditions at run-time and have a
 311          additional jump around the loop to ensure an infinite loop.
 312          However, it is very unlikely that this is the intended
 313          behavior of the loop and checking for these rare boundary
 314          conditions would pessimize all other code.
 315
 316          If the loop is executed only a few times an extra check to
 317          restart the loop could use up most of the benefits of using a
 318          count register loop.  Note however, that normally, this
 319          restart branch would never execute, so it could be predicted
 320          well by the CPU.  We should generate the pessimistic code by
 321          default, and have an option, e.g. -funsafe-loops that would
 322          enable count-register loops in this case.  */
 323       if (dump_file)
 324         fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
 325       result = false;
 326       goto cleanup;
 327     }
 328
 329   for (i = 0; i < loop->num_nodes; i++)
 330     {
 331       bb = body[i];
 332
 333       for (insn = BB_HEAD (bb);
 334            insn != NEXT_INSN (BB_END (bb));
 335            insn = NEXT_INSN (insn))
 336         {
 337           /* Different targets have different necessities for low-overhead
 338              looping.  Call the back end for each instruction within the loop
 339              to let it decide whether the insn prohibits a low-overhead loop.
 340              It will then return the cause for it to emit to the dump file.  */
 341           const char * invalid = targetm.invalid_within_doloop (insn);
 342           if (invalid)
 343             {
 344               if (dump_file)
 345                 fprintf (dump_file, "Doloop: %s\n", invalid);
 346               result = false;
 347               goto cleanup;
 348             }
 349         }
 350     }
 351   result = true;
 352
 353 cleanup:
 354   free (body);
 355
 356   return result;
 357 }
 358
 359 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
 360    edge.  If the condition is always false, do not do anything.  If it is always
 361    true, redirect E to DEST and return false.  In all other cases, true is
 362    returned.  */
 363
 364 static bool
 365 add_test (rtx cond, edge *e, basic_block dest)
 366 {
 367   rtx_insn *seq, *jump;
 368   rtx label;
 369   machine_mode mode;
 370   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
 371   enum rtx_code code = GET_CODE (cond);
 372   basic_block bb;
 373
 374   mode = GET_MODE (XEXP (cond, 0));
 375   if (mode == VOIDmode)
 376     mode = GET_MODE (XEXP (cond, 1));
 377
 378   start_sequence ();
 379   op0 = force_operand (op0, NULL_RTX);
 380   op1 = force_operand (op1, NULL_RTX);
 381   label = block_label (dest);
 382   do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
 383                            NULL_RTX, label, -1);
 384
 385   jump = get_last_insn ();
 386   if (!jump || !JUMP_P (jump))
 387     {
 388       /* The condition is always false and the jump was optimized out.  */
 389       end_sequence ();
 390       return true;
 391     }
 392
 393   seq = get_insns ();
 394   end_sequence ();
 395
 396   /* There always is at least the jump insn in the sequence.  */
 397   gcc_assert (seq != NULL_RTX);
 398
 399   bb = split_edge_and_insert (*e, seq);
 400   *e = single_succ_edge (bb);
 401
 402   if (any_uncondjump_p (jump))
 403     {
 404       /* The condition is always true.  */
 405       delete_insn (jump);
 406       redirect_edge_and_branch_force (*e, dest);
 407       return false;
 408     }
 409
 410   JUMP_LABEL (jump) = label;
 411
 412   /* The jump is supposed to handle an unlikely special case.  */
 413   add_int_reg_note (jump, REG_BR_PROB, 0);
 414
 415   LABEL_NUSES (label)++;
 416
 417   make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
 418   return true;
 419 }
 420
 421 /* Modify the loop to use the low-overhead looping insn where LOOP
 422    describes the loop, DESC describes the number of iterations of the
 423    loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
 424    end of the loop.  CONDITION is the condition separated from the
 425    DOLOOP_SEQ.  COUNT is the number of iterations of the LOOP.  */
 426
 427 static void
 428 doloop_modify (struct loop *loop, struct niter_desc *desc,
 429                rtx doloop_seq, rtx condition, rtx count)
 430 {
 431   rtx counter_reg;
 432   rtx tmp, noloop = NULL_RTX;
 433   rtx_insn *sequence;
 434   rtx_insn *jump_insn;
 435   rtx jump_label;
 436   int nonneg = 0;
 437   bool increment_count;
 438   basic_block loop_end = desc->out_edge->src;
 439   machine_mode mode;
 440   rtx true_prob_val;
 441   widest_int iterations;
 442
 443   jump_insn = BB_END (loop_end);
 444
 445   if (dump_file)
 446     {
 447       fprintf (dump_file, "Doloop: Inserting doloop pattern (");
 448       if (desc->const_iter)
 449         fprintf (dump_file, "%"PRId64, desc->niter);
 450       else
 451         fputs ("runtime", dump_file);
 452       fputs (" iterations).\n", dump_file);
 453     }
 454
 455   /* Get the probability of the original branch. If it exists we would
 456      need to update REG_BR_PROB of the new jump_insn.  */
 457   true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
 458
 459   /* Discard original jump to continue loop.  The original compare
 460      result may still be live, so it cannot be discarded explicitly.  */
 461   delete_insn (jump_insn);
 462
 463   counter_reg = XEXP (condition, 0);
 464   if (GET_CODE (counter_reg) == PLUS)
 465     counter_reg = XEXP (counter_reg, 0);
 466   mode = GET_MODE (counter_reg);
 467
 468   increment_count = false;
 469   switch (GET_CODE (condition))
 470     {
 471     case NE:
 472       /* Currently only NE tests against zero and one are supported.  */
 473       noloop = XEXP (condition, 1);
 474       if (noloop != const0_rtx)
 475         {
 476           gcc_assert (noloop == const1_rtx);
 477           increment_count = true;
 478         }
 479       break;
 480
 481     case GE:
 482       /* Currently only GE tests against zero are supported.  */
 483       gcc_assert (XEXP (condition, 1) == const0_rtx);
 484
 485       noloop = constm1_rtx;
 486
 487       /* The iteration count does not need incrementing for a GE test.  */
 488       increment_count = false;
 489
 490       /* Determine if the iteration counter will be non-negative.
 491          Note that the maximum value loaded is iterations_max - 1.  */
 492       if (get_max_loop_iterations (loop, &iterations)
 493           && wi::leu_p (iterations,
 494                         wi::set_bit_in_zero <widest_int>
 495                         (GET_MODE_PRECISION (mode) - 1)))
 496         nonneg = 1;
 497       break;
 498
 499       /* Abort if an invalid doloop pattern has been generated.  */
 500     default:
 501       gcc_unreachable ();
 502     }
 503
 504   if (increment_count)
 505     count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
 506
 507   /* Insert initialization of the count register into the loop header.  */
 508   start_sequence ();
 509   tmp = force_operand (count, counter_reg);
 510   convert_move (counter_reg, tmp, 1);
 511   sequence = get_insns ();
 512   end_sequence ();
 513   emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 514
 515   if (desc->noloop_assumptions)
 516     {
 517       rtx ass = copy_rtx (desc->noloop_assumptions);
 518       basic_block preheader = loop_preheader_edge (loop)->src;
 519       basic_block set_zero
 520               = split_edge (loop_preheader_edge (loop));
 521       basic_block new_preheader
 522               = split_edge (loop_preheader_edge (loop));
 523       edge te;
 524
 525       /* Expand the condition testing the assumptions and if it does not pass,
 526          reset the count register to 0.  */
 527       redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
 528       set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
 529
 530       set_zero->count = 0;
 531       set_zero->frequency = 0;
 532
 533       te = single_succ_edge (preheader);
 534       for (; ass; ass = XEXP (ass, 1))
 535         if (!add_test (XEXP (ass, 0), &te, set_zero))
 536           break;
 537
 538       if (ass)
 539         {
 540           /* We reached a condition that is always true.  This is very hard to
 541              reproduce (such a loop does not roll, and thus it would most
 542              likely get optimized out by some of the preceding optimizations).
 543              In fact, I do not have any testcase for it.  However, it would
 544              also be very hard to show that it is impossible, so we must
 545              handle this case.  */
 546           set_zero->count = preheader->count;
 547           set_zero->frequency = preheader->frequency;
 548         }
 549
 550       if (EDGE_COUNT (set_zero->preds) == 0)
 551         {
 552           /* All the conditions were simplified to false, remove the
 553              unreachable set_zero block.  */
 554           delete_basic_block (set_zero);
 555         }
 556       else
 557         {
 558           /* Reset the counter to zero in the set_zero block.  */
 559           start_sequence ();
 560           convert_move (counter_reg, noloop, 0);
 561           sequence = get_insns ();
 562           end_sequence ();
 563           emit_insn_after (sequence, BB_END (set_zero));
 564
 565           set_immediate_dominator (CDI_DOMINATORS, set_zero,
 566                                    recompute_dominator (CDI_DOMINATORS,
 567                                                         set_zero));
 568         }
 569
 570       set_immediate_dominator (CDI_DOMINATORS, new_preheader,
 571                                recompute_dominator (CDI_DOMINATORS,
 572                                                     new_preheader));
 573     }
 574
 575   /* Some targets (eg, C4x) need to initialize special looping
 576      registers.  */
 577 #ifdef HAVE_doloop_begin
 578   {
 579     rtx init;
 580
 581     init = gen_doloop_begin (counter_reg, doloop_seq);
 582     if (init)
 583       {
 584         start_sequence ();
 585         emit_insn (init);
 586         sequence = get_insns ();
 587         end_sequence ();
 588         emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
 589       }
 590   }
 591 #endif
 592
 593   /* Insert the new low-overhead looping insn.  */
 594   emit_jump_insn_after (doloop_seq, BB_END (loop_end));
 595   jump_insn = BB_END (loop_end);
 596   jump_label = block_label (desc->in_edge->dest);
 597   JUMP_LABEL (jump_insn) = jump_label;
 598   LABEL_NUSES (jump_label)++;
 599
 600   /* Ensure the right fallthru edge is marked, for case we have reversed
 601      the condition.  */
 602   desc->in_edge->flags &= ~EDGE_FALLTHRU;
 603   desc->out_edge->flags |= EDGE_FALLTHRU;
 604
 605   /* Add a REG_NONNEG note if the actual or estimated maximum number
 606      of iterations is non-negative.  */
 607   if (nonneg)
 608     add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
 609
 610   /* Update the REG_BR_PROB note.  */
 611   if (true_prob_val)
 612     {
 613       /* Seems safer to use the branch probability.  */
 614       add_int_reg_note (jump_insn, REG_BR_PROB, desc->in_edge->probability);
 615     }
 616 }
 617
 618 /* Process loop described by LOOP validating that the loop is suitable for
 619    conversion to use a low overhead looping instruction, replacing the jump
 620    insn where suitable.  Returns true if the loop was successfully
 621    modified.  */
 622
 623 static bool
 624 doloop_optimize (struct loop *loop)
 625 {
 626   machine_mode mode;
 627   rtx doloop_seq, doloop_pat, doloop_reg;
 628   rtx count;
 629   widest_int iterations, iterations_max;
 630   rtx start_label;
 631   rtx condition;
 632   unsigned level, est_niter;
 633   int max_cost;
 634   struct niter_desc *desc;
 635   unsigned word_mode_size;
 636   unsigned HOST_WIDE_INT word_mode_max;
 637   int entered_at_top;
 638
 639   if (dump_file)
 640     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
 641
 642   iv_analysis_loop_init (loop);
 643
 644   /* Find the simple exit of a LOOP.  */
 645   desc = get_simple_loop_desc (loop);
 646
 647   /* Check that loop is a candidate for a low-overhead looping insn.  */
 648   if (!doloop_valid_p (loop, desc))
 649     {
 650       if (dump_file)
 651         fprintf (dump_file,
 652                  "Doloop: The loop is not suitable.\n");
 653       return false;
 654     }
 655   mode = desc->mode;
 656
 657   est_niter = 3;
 658   if (desc->const_iter)
 659     est_niter = desc->niter;
 660   /* If the estimate on number of iterations is reliable (comes from profile
 661      feedback), use it.  Do not use it normally, since the expected number
 662      of iterations of an unrolled loop is 2.  */
 663   if (loop->header->count)
 664     est_niter = expected_loop_iterations (loop);
 665
 666   if (est_niter < 3)
 667     {
 668       if (dump_file)
 669         fprintf (dump_file,
 670                  "Doloop: Too few iterations (%u) to be profitable.\n",
 671                  est_niter);
 672       return false;
 673     }
 674
 675   max_cost
 676     = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
 677   if (set_src_cost (desc->niter_expr, optimize_loop_for_speed_p (loop))
 678       > max_cost)
 679     {
 680       if (dump_file)
 681         fprintf (dump_file,
 682                  "Doloop: number of iterations too costly to compute.\n");
 683       return false;
 684     }
 685
 686   if (desc->const_iter)
 687     iterations = widest_int::from (std::make_pair (desc->niter_expr, mode),
 688                                    UNSIGNED);
 689   else
 690     iterations = 0;
 691   if (!get_max_loop_iterations (loop, &iterations_max))
 692     iterations_max = 0;
 693   level = get_loop_level (loop) + 1;
 694   entered_at_top = (loop->latch == desc->in_edge->dest
 695                     && contains_no_active_insn_p (loop->latch));
 696   if (!targetm.can_use_doloop_p (iterations, iterations_max, level,
 697                                  entered_at_top))
 698     {
 699       if (dump_file)
 700         fprintf (dump_file, "Loop rejected by can_use_doloop_p.\n");
 701       return false;
 702     }
 703
 704   /* Generate looping insn.  If the pattern FAILs then give up trying
 705      to modify the loop since there is some aspect the back-end does
 706      not like.  */
 707   count = copy_rtx (desc->niter_expr);
 708   start_label = block_label (desc->in_edge->dest);
 709   doloop_reg = gen_reg_rtx (mode);
 710   doloop_seq = gen_doloop_end (doloop_reg, start_label);
 711
 712   word_mode_size = GET_MODE_PRECISION (word_mode);
 713   word_mode_max
 714           = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
 715   if (! doloop_seq
 716       && mode != word_mode
 717       /* Before trying mode different from the one in that # of iterations is
 718          computed, we must be sure that the number of iterations fits into
 719          the new mode.  */
 720       && (word_mode_size >= GET_MODE_PRECISION (mode)
 721           || wi::leu_p (iterations_max, word_mode_max)))
 722     {
 723       if (word_mode_size > GET_MODE_PRECISION (mode))
 724         count = simplify_gen_unary (ZERO_EXTEND, word_mode, count, mode);
 725       else
 726         count = lowpart_subreg (word_mode, count, mode);
 727       PUT_MODE (doloop_reg, word_mode);
 728       doloop_seq = gen_doloop_end (doloop_reg, start_label);
 729     }
 730   if (! doloop_seq)
 731     {
 732       if (dump_file)
 733         fprintf (dump_file,
 734                  "Doloop: Target unwilling to use doloop pattern!\n");
 735       return false;
 736     }
 737
 738   /* If multiple instructions were created, the last must be the
 739      jump instruction.  Also, a raw define_insn may yield a plain
 740      pattern.  */
 741   doloop_pat = doloop_seq;
 742   if (INSN_P (doloop_pat))
 743     {
 744       rtx_insn *doloop_insn = as_a <rtx_insn *> (doloop_pat);
 745       while (NEXT_INSN (doloop_insn) != NULL_RTX)
 746         doloop_insn = NEXT_INSN (doloop_insn);
 747       if (!JUMP_P (doloop_insn))
 748         doloop_insn = NULL;
 749       doloop_pat = doloop_insn;
 750     }
 751
 752   if (! doloop_pat
 753       || ! (condition = doloop_condition_get (doloop_pat)))
 754     {
 755       if (dump_file)
 756         fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
 757       return false;
 758     }
 759
 760   doloop_modify (loop, desc, doloop_seq, condition, count);
 761   return true;
 762 }
 763
 764 /* This is the main entry point.  Process all loops using doloop_optimize.  */
 765
 766 void
 767 doloop_optimize_loops (void)
 768 {
 769   struct loop *loop;
 770
 771   FOR_EACH_LOOP (loop, 0)
 772     {
 773       doloop_optimize (loop);
 774     }
 775
 776   iv_analysis_done ();
 777
 778 #ifdef ENABLE_CHECKING
 779   verify_loop_structure ();
 780 #endif
 781 }
 782 #endif /* HAVE_doloop_end */
 783