gcc/tree-ssa-loop-split.cc

   1 /* Loop splitting.
   2    Copyright (C) 2015-2024 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "tree-pass.h"
  27 #include "ssa.h"
  28 #include "fold-const.h"
  29 #include "tree-cfg.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-loop-niter.h"
  32 #include "tree-ssa-loop.h"
  33 #include "tree-ssa-loop-manip.h"
  34 #include "tree-into-ssa.h"
  35 #include "tree-inline.h"
  36 #include "tree-cfgcleanup.h"
  37 #include "cfgloop.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "gimple-iterator.h"
  40 #include "gimple-pretty-print.h"
  41 #include "cfghooks.h"
  42 #include "gimple-fold.h"
  43 #include "gimplify-me.h"
  44 #include "print-tree.h"
  45 #include "value-query.h"
  46 #include "sreal.h"
  47
  48 /* This file implements two kinds of loop splitting.
  49
  50    One transformation of loops like:
  51
  52    for (i = 0; i < 100; i++)
  53      {
  54        if (i < 50)
  55          A;
  56        else
  57          B;
  58      }
  59
  60    into:
  61
  62    for (i = 0; i < 50; i++)
  63      {
  64        A;
  65      }
  66    for (; i < 100; i++)
  67      {
  68        B;
  69      }
  70
  71    */
  72
  73 /* Return true when BB inside LOOP is a potential iteration space
  74    split point, i.e. ends with a condition like "IV < comp", which
  75    is true on one side of the iteration space and false on the other,
  76    and the split point can be computed.  If so, also return the border
  77    point in *BORDER and the comparison induction variable in IV.  */
  78
  79 static tree
  80 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
  81                enum tree_code *guard_code)
  82 {
  83   gcond *stmt;
  84   affine_iv iv2;
  85
  86   /* BB must end in a simple conditional jump.  */
  87   stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
  88   if (!stmt)
  89     return NULL_TREE;
  90
  91   enum tree_code code = gimple_cond_code (stmt);
  92
  93   if (loop_exits_from_bb_p (loop, bb))
  94     return NULL_TREE;
  95
  96   tree op0 = gimple_cond_lhs (stmt);
  97   tree op1 = gimple_cond_rhs (stmt);
  98   class loop *useloop = loop_containing_stmt (stmt);
  99
 100   if (!simple_iv (loop, useloop, op0, iv, false))
 101     return NULL_TREE;
 102   if (!simple_iv (loop, useloop, op1, &iv2, false))
 103     return NULL_TREE;
 104
 105   /* Make it so that the first argument of the condition is
 106      the looping one.  */
 107   if (!integer_zerop (iv2.step))
 108     {
 109       std::swap (op0, op1);
 110       std::swap (*iv, iv2);
 111       code = swap_tree_comparison (code);
 112       gimple_cond_set_condition (stmt, code, op0, op1);
 113       update_stmt (stmt);
 114     }
 115   else if (integer_zerop (iv->step))
 116     return NULL_TREE;
 117   if (!integer_zerop (iv2.step))
 118     return NULL_TREE;
 119   if (!iv->no_overflow)
 120     return NULL_TREE;
 121
 122   /* Only handle relational comparisons, for equality and non-equality
 123      we'd have to split the loop into two loops and a middle statement.  */
 124   switch (code)
 125     {
 126       case LT_EXPR:
 127       case LE_EXPR:
 128       case GT_EXPR:
 129       case GE_EXPR:
 130         break;
 131       case NE_EXPR:
 132       case EQ_EXPR:
 133         /* If the test check for first iteration, we can handle NE/EQ
 134            with only one split loop.  */
 135         if (operand_equal_p (iv->base, iv2.base, 0))
 136           {
 137             if (code == EQ_EXPR)
 138               code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
 139             else
 140               code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
 141             break;
 142           }
 143         /* Similarly when the test checks for minimal or maximal
 144            value range.  */
 145         else
 146           {
 147             int_range<2> r;
 148             get_global_range_query ()->range_of_expr (r, op0, stmt);
 149             if (!r.varying_p () && !r.undefined_p ()
 150                 && TREE_CODE (op1) == INTEGER_CST)
 151               {
 152                 wide_int val = wi::to_wide (op1);
 153                 if (known_eq (val, r.lower_bound ()))
 154                   {
 155                     code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
 156                     break;
 157                   }
 158                 else if (known_eq (val, r.upper_bound ()))
 159                   {
 160                     code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
 161                     break;
 162                   }
 163               }
 164           }
 165         /* TODO: We can compare with exit condition; it seems that testing for
 166            last iteration is common case.  */
 167         return NULL_TREE;
 168       default:
 169         return NULL_TREE;
 170     }
 171
 172   if (dump_file && (dump_flags & TDF_DETAILS))
 173     {
 174       fprintf (dump_file, "Found potential split point: ");
 175       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 176       fprintf (dump_file, " { ");
 177       print_generic_expr (dump_file, iv->base, TDF_SLIM);
 178       fprintf (dump_file, " + I*");
 179       print_generic_expr (dump_file, iv->step, TDF_SLIM);
 180       fprintf (dump_file, " } %s ", get_tree_code_name (code));
 181       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
 182       fprintf (dump_file, "\n");
 183     }
 184
 185   *border = iv2.base;
 186   *guard_code = code;
 187   return op0;
 188 }
 189
 190 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
 191    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
 192    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
 193    exit test statement to loop back only if the GUARD statement will
 194    also be true/false in the next iteration.  */
 195
 196 static void
 197 patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
 198                  tree newbound, bool initial_true)
 199 {
 200   edge exit = single_exit (loop);
 201   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 202   gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
 203   update_stmt (stmt);
 204
 205   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
 206
 207   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 208   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 209
 210   if (initial_true)
 211     {
 212       exit->flags |= EDGE_FALSE_VALUE;
 213       stay->flags |= EDGE_TRUE_VALUE;
 214     }
 215   else
 216     {
 217       exit->flags |= EDGE_TRUE_VALUE;
 218       stay->flags |= EDGE_FALSE_VALUE;
 219     }
 220 }
 221
 222 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
 223    find the loop phi node in LOOP defining it directly, or create
 224    such phi node.  Return that phi node.  */
 225
 226 static gphi *
 227 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
 228 {
 229   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
 230   gphi *phi;
 231   if ((phi = dyn_cast <gphi *> (def))
 232       && gimple_bb (phi) == loop->header)
 233     return phi;
 234
 235   /* XXX Create the PHI instead.  */
 236   return NULL;
 237 }
 238
 239 /* Returns true if the exit values of all loop phi nodes can be
 240    determined easily (i.e. that connect_loop_phis can determine them).  */
 241
 242 static bool
 243 easy_exit_values (class loop *loop)
 244 {
 245   edge exit = single_exit (loop);
 246   edge latch = loop_latch_edge (loop);
 247   gphi_iterator psi;
 248
 249   /* Currently we regard the exit values as easy if they are the same
 250      as the value over the backedge.  Which is the case if the definition
 251      of the backedge value dominates the exit edge.  */
 252   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 253     {
 254       gphi *phi = psi.phi ();
 255       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
 256       basic_block bb;
 257       if (TREE_CODE (next) == SSA_NAME
 258           && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
 259           && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
 260         return false;
 261     }
 262
 263   return true;
 264 }
 265
 266 /* This function updates the SSA form after connect_loops made a new
 267    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
 268    conditional).  I.e. the second loop can now be entered either
 269    via the original entry or via NEW_E, so the entry values of LOOP2
 270    phi nodes are either the original ones or those at the exit
 271    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
 272    this.  The loops need to fulfill easy_exit_values().  */
 273
 274 static void
 275 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 276 {
 277   basic_block rest = loop_preheader_edge (loop2)->src;
 278   gcc_assert (new_e->dest == rest);
 279   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
 280
 281   edge firste = loop_preheader_edge (loop1);
 282   edge seconde = loop_preheader_edge (loop2);
 283   edge firstn = loop_latch_edge (loop1);
 284   gphi_iterator psi_first, psi_second;
 285   for (psi_first = gsi_start_phis (loop1->header),
 286        psi_second = gsi_start_phis (loop2->header);
 287        !gsi_end_p (psi_first);
 288        gsi_next (&psi_first), gsi_next (&psi_second))
 289     {
 290       tree init, next, new_init;
 291       use_operand_p op;
 292       gphi *phi_first = psi_first.phi ();
 293       gphi *phi_second = psi_second.phi ();
 294
 295       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
 296       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
 297       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
 298       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 299
 300       /* Prefer using original variable as a base for the new ssa name.
 301          This is necessary for virtual ops, and useful in order to avoid
 302          losing debug info for real ops.  */
 303       if (TREE_CODE (next) == SSA_NAME
 304           && useless_type_conversion_p (TREE_TYPE (next),
 305                                         TREE_TYPE (init)))
 306         new_init = copy_ssa_name (next);
 307       else if (TREE_CODE (init) == SSA_NAME
 308                && useless_type_conversion_p (TREE_TYPE (init),
 309                                              TREE_TYPE (next)))
 310         new_init = copy_ssa_name (init);
 311       else if (useless_type_conversion_p (TREE_TYPE (next),
 312                                           TREE_TYPE (init)))
 313         new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
 314                                        "unrinittmp");
 315       else
 316         new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
 317                                        "unrinittmp");
 318
 319       gphi * newphi = create_phi_node (new_init, rest);
 320       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
 321       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
 322       SET_USE (op, new_init);
 323     }
 324 }
 325
 326 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
 327    they are still equivalent and placed in two arms of a diamond, like so:
 328
 329                .------if (cond)------.
 330                v                     v
 331              pre1                   pre2
 332               |                      |
 333         .--->h1                     h2<----.
 334         |     |                      |     |
 335         |    ex1---.            .---ex2    |
 336         |    /     |            |     \    |
 337         '---l1     X            |     l2---'
 338                    |            |
 339                    |            |
 340                    '--->join<---'
 341
 342    This function transforms the program such that LOOP1 is conditionally
 343    falling through to LOOP2, or skipping it.  This is done by splitting
 344    the ex1->join edge at X in the diagram above, and inserting a condition
 345    whose one arm goes to pre2, resulting in this situation:
 346
 347                .------if (cond)------.
 348                v                     v
 349              pre1       .---------->pre2
 350               |         |            |
 351         .--->h1         |           h2<----.
 352         |     |         |            |     |
 353         |    ex1---.    |       .---ex2    |
 354         |    /     v    |       |     \    |
 355         '---l1   skip---'       |     l2---'
 356                    |            |
 357                    |            |
 358                    '--->join<---'
 359
 360
 361    The condition used is the exit condition of LOOP1, which effectively means
 362    that when the first loop exits (for whatever reason) but the real original
 363    exit expression is still false the second loop will be entered.
 364    The function returns the new edge cond->pre2.
 365
 366    This doesn't update the SSA form, see connect_loop_phis for that.  */
 367
 368 static edge
 369 connect_loops (class loop *loop1, class loop *loop2)
 370 {
 371   edge exit = single_exit (loop1);
 372   basic_block skip_bb = split_edge (exit);
 373   gcond *skip_stmt;
 374   gimple_stmt_iterator gsi;
 375   edge new_e, skip_e;
 376
 377   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 378   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
 379                                  gimple_cond_lhs (stmt),
 380                                  gimple_cond_rhs (stmt),
 381                                  NULL_TREE, NULL_TREE);
 382   gsi = gsi_last_bb (skip_bb);
 383   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
 384
 385   skip_e = EDGE_SUCC (skip_bb, 0);
 386   skip_e->flags &= ~EDGE_FALLTHRU;
 387   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
 388   if (exit->flags & EDGE_TRUE_VALUE)
 389     {
 390       skip_e->flags |= EDGE_TRUE_VALUE;
 391       new_e->flags |= EDGE_FALSE_VALUE;
 392     }
 393   else
 394     {
 395       skip_e->flags |= EDGE_FALSE_VALUE;
 396       new_e->flags |= EDGE_TRUE_VALUE;
 397     }
 398
 399   new_e->probability = profile_probability::very_likely ();
 400   skip_e->probability = new_e->probability.invert ();
 401
 402   return new_e;
 403 }
 404
 405 /* This returns the new bound for iterations given the original iteration
 406    space in NITER, an arbitrary new bound BORDER, assumed to be some
 407    comparison value with a different IV, the initial value GUARD_INIT of
 408    that other IV, and the comparison code GUARD_CODE that compares
 409    that other IV with BORDER.  We return an SSA name, and place any
 410    necessary statements for that computation into *STMTS.
 411
 412    For example for such a loop:
 413
 414      for (i = beg, j = guard_init; i < end; i++, j++)
 415        if (j < border)  // this is supposed to be true/false
 416          ...
 417
 418    we want to return a new bound (on j) that makes the loop iterate
 419    as long as the condition j < border stays true.  We also don't want
 420    to iterate more often than the original loop, so we have to introduce
 421    some cut-off as well (via min/max), effectively resulting in:
 422
 423      newend = min (end+guard_init-beg, border)
 424      for (i = beg; j = guard_init; j < newend; i++, j++)
 425        if (j < c)
 426          ...
 427
 428    Depending on the direction of the IVs and if the exit tests
 429    are strict or non-strict we need to use MIN or MAX,
 430    and add or subtract 1.  This routine computes newend above.  */
 431
 432 static tree
 433 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
 434                          tree border,
 435                          enum tree_code guard_code, tree guard_init)
 436 {
 437   /* The niter structure contains the after-increment IV, we need
 438      the loop-enter base, so subtract STEP once.  */
 439   tree controlbase = force_gimple_operand (niter->control.base,
 440                                            stmts, true, NULL_TREE);
 441   tree controlstep = niter->control.step;
 442   tree enddiff;
 443   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
 444     {
 445       controlstep = gimple_build (stmts, NEGATE_EXPR,
 446                                   TREE_TYPE (controlstep), controlstep);
 447       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 448                               TREE_TYPE (controlbase),
 449                               controlbase, controlstep);
 450     }
 451   else
 452     enddiff = gimple_build (stmts, MINUS_EXPR,
 453                             TREE_TYPE (controlbase),
 454                             controlbase, controlstep);
 455
 456   /* Compute end-beg.  */
 457   gimple_seq stmts2;
 458   tree end = force_gimple_operand (niter->bound, &stmts2,
 459                                         true, NULL_TREE);
 460   gimple_seq_add_seq_without_update (stmts, stmts2);
 461   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
 462     {
 463       tree tem = gimple_convert (stmts, sizetype, enddiff);
 464       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
 465       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 466                               TREE_TYPE (enddiff),
 467                               end, tem);
 468     }
 469   else
 470     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
 471                             end, enddiff);
 472
 473   /* Compute guard_init + (end-beg).  */
 474   tree newbound;
 475   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
 476   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
 477     {
 478       enddiff = gimple_convert (stmts, sizetype, enddiff);
 479       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
 480                                TREE_TYPE (guard_init),
 481                                guard_init, enddiff);
 482     }
 483   else
 484     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
 485                              guard_init, enddiff);
 486
 487   /* Depending on the direction of the IVs the new bound for the first
 488      loop is the minimum or maximum of old bound and border.
 489      Also, if the guard condition isn't strictly less or greater,
 490      we need to adjust the bound.  */
 491   int addbound = 0;
 492   enum tree_code minmax;
 493   if (niter->cmp == LT_EXPR)
 494     {
 495       /* GT and LE are the same, inverted.  */
 496       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
 497         addbound = -1;
 498       minmax = MIN_EXPR;
 499     }
 500   else
 501     {
 502       gcc_assert (niter->cmp == GT_EXPR);
 503       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
 504         addbound = 1;
 505       minmax = MAX_EXPR;
 506     }
 507
 508   if (addbound)
 509     {
 510       tree type2 = TREE_TYPE (newbound);
 511       if (POINTER_TYPE_P (type2))
 512         type2 = sizetype;
 513       newbound = gimple_build (stmts,
 514                                POINTER_TYPE_P (TREE_TYPE (newbound))
 515                                ? POINTER_PLUS_EXPR : PLUS_EXPR,
 516                                TREE_TYPE (newbound),
 517                                newbound,
 518                                build_int_cst (type2, addbound));
 519     }
 520
 521   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
 522                               border, newbound);
 523   return newend;
 524 }
 525
 526 /* Fix the two loop's bb count after split based on the split edge probability,
 527    don't adjust the bbs dominated by true branches of that loop to avoid
 528    dropping 1s down.  */
 529 static void
 530 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
 531                          edge false_edge)
 532 {
 533   /* Proportion first loop's bb counts except those dominated by true
 534      branch to avoid drop 1s down.  */
 535   basic_block *bbs1, *bbs2;
 536   bbs1 = get_loop_body (loop1);
 537   unsigned j;
 538   for (j = 0; j < loop1->num_nodes; j++)
 539     if (bbs1[j] == loop1->latch
 540         /* Watch for case where the true conditional is empty.  */
 541         || !single_pred_p (true_edge->dest)
 542         || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
 543       bbs1[j]->count
 544         = bbs1[j]->count.apply_probability (true_edge->probability);
 545   free (bbs1);
 546
 547   /* Proportion second loop's bb counts except those dominated by false
 548      branch to avoid drop 1s down.  */
 549   basic_block bbi_copy = get_bb_copy (false_edge->dest);
 550   bbs2 = get_loop_body (loop2);
 551   for (j = 0; j < loop2->num_nodes; j++)
 552     if (bbs2[j] == loop2->latch
 553         /* Watch for case where the flase conditional is empty.  */
 554         || !single_pred_p (bbi_copy)
 555         || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
 556       bbs2[j]->count
 557         = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
 558   free (bbs2);
 559 }
 560
 561 /* Checks if LOOP contains an conditional block whose condition
 562    depends on which side in the iteration space it is, and if so
 563    splits the iteration space into two loops.  Returns true if the
 564    loop was split.  NITER must contain the iteration descriptor for the
 565    single exit of LOOP.  */
 566
 567 static bool
 568 split_loop (class loop *loop1)
 569 {
 570   class tree_niter_desc niter;
 571   basic_block *bbs;
 572   unsigned i;
 573   bool changed = false;
 574   tree guard_iv;
 575   tree border = NULL_TREE;
 576   affine_iv iv;
 577   edge exit1;
 578
 579   if (!(exit1 = single_exit (loop1))
 580       || EDGE_COUNT (exit1->src->succs) != 2
 581       /* ??? We could handle non-empty latches when we split the latch edge
 582          (not the exit edge), and put the new exit condition in the new block.
 583          OTOH this executes some code unconditionally that might have been
 584          skipped by the original exit before.  */
 585       || !empty_block_p (loop1->latch)
 586       || !easy_exit_values (loop1)
 587       || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
 588       || niter.cmp == ERROR_MARK)
 589     return false;
 590   if (niter.cmp == NE_EXPR)
 591     {
 592       if (!niter.control.no_overflow)
 593         return false;
 594       if (tree_int_cst_sign_bit (niter.control.step))
 595         niter.cmp = GT_EXPR;
 596       else
 597         niter.cmp = LT_EXPR;
 598     }
 599
 600   bbs = get_loop_body (loop1);
 601
 602   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
 603     {
 604       free (bbs);
 605       return false;
 606     }
 607
 608   /* Find a splitting opportunity.  */
 609   enum tree_code guard_code;
 610   for (i = 0; i < loop1->num_nodes; i++)
 611     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
 612       {
 613         /* Handling opposite steps is not implemented yet.  Neither
 614            is handling different step sizes.  */
 615         if ((tree_int_cst_sign_bit (iv.step)
 616              != tree_int_cst_sign_bit (niter.control.step))
 617             || !tree_int_cst_equal (iv.step, niter.control.step))
 618           continue;
 619
 620         /* Find a loop PHI node that defines guard_iv directly,
 621            or create one doing that.  */
 622         gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
 623         if (!phi)
 624           continue;
 625         gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 626         tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
 627                                                  loop_preheader_edge (loop1));
 628
 629         /* Loop splitting is implemented by versioning the loop, placing
 630            the new loop after the old loop, make the first loop iterate
 631            as long as the conditional stays true (or false) and let the
 632            second (new) loop handle the rest of the iterations.
 633
 634            First we need to determine if the condition will start being true
 635            or false in the first loop.  */
 636         bool initial_true;
 637         switch (guard_code)
 638           {
 639             case LT_EXPR:
 640             case LE_EXPR:
 641               initial_true = !tree_int_cst_sign_bit (iv.step);
 642               break;
 643             case GT_EXPR:
 644             case GE_EXPR:
 645               initial_true = tree_int_cst_sign_bit (iv.step);
 646               break;
 647             default:
 648               gcc_unreachable ();
 649           }
 650
 651         /* Build a condition that will skip the first loop when the
 652            guard condition won't ever be true (or false).  */
 653         gimple_seq stmts2;
 654         border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
 655         if (stmts2)
 656           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 657                                             stmts2);
 658         tree cond = fold_build2 (guard_code, boolean_type_node,
 659                                  guard_init, border);
 660         if (!initial_true)
 661           cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
 662
 663         edge true_edge, false_edge;
 664         extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 665
 666         /* Now version the loop, placing loop2 after loop1 connecting
 667            them, and fix up SSA form for that.  */
 668         initialize_original_copy_tables ();
 669         basic_block cond_bb;
 670
 671         profile_probability loop1_prob
 672           = integer_onep (cond) ? profile_probability::always ()
 673                                 : true_edge->probability;
 674         /* TODO: It is commonly a case that we know that both loops will be
 675            entered.  very_likely below is the probability that second loop will
 676            be entered given by connect_loops.  We should work out the common
 677            case it is always true.  */
 678         class loop *loop2 = loop_version (loop1, cond, &cond_bb,
 679                                           loop1_prob,
 680                                           /* Pass always as we will later
 681                                              redirect first loop to second
 682                                              loop.  */
 683                                           profile_probability::always (),
 684                                           profile_probability::always (),
 685                                           profile_probability::very_likely (),
 686                                           true);
 687         gcc_assert (loop2);
 688         /* Correct probability of edge  cond_bb->preheader_of_loop2.  */
 689         single_pred_edge
 690                 (loop_preheader_edge (loop2)->src)->probability
 691                         = loop1_prob.invert ();
 692
 693         fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
 694         /* If conditional we split on has reliable profilea nd both
 695            preconditionals of loop1 and loop2 are constant true, we can
 696            only redistribute the iteration counts to the split loops.
 697
 698            If the conditionals we insert before loop1 or loop2 are non-trivial
 699            they increase expected loop count, so account this accordingly.
 700            If we do not know the probability of split conditional, avoid
 701            reudcing loop estimates, since we do not really know how they are
 702            split between of the two new loops.  Keep orignal estimate since
 703            it is likely better then completely dropping it.
 704
 705            TODO: If we know that one of the new loops has constant
 706            number of iterations, we can do better.  We could also update
 707            upper bounds.  */
 708         if (loop1->any_estimate
 709             && wi::fits_shwi_p (loop1->nb_iterations_estimate))
 710           {
 711             sreal scale = true_edge->probability.reliable_p ()
 712                           ? true_edge->probability.to_sreal () : (sreal)1;
 713             sreal scale2 = false_edge->probability.reliable_p ()
 714                           ? false_edge->probability.to_sreal () : (sreal)1;
 715             sreal div1 = loop1_prob.initialized_p ()
 716                          ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
 717             /* +1 to get header interations rather than latch iterations and then
 718                -1 to convert back.  */
 719             if (div1 != 0)
 720               loop1->nb_iterations_estimate
 721                 = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
 722                        * scale / div1).to_nearest_int () - 1, 0);
 723             else
 724               loop1->any_estimate = false;
 725             loop2->nb_iterations_estimate
 726               = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
 727                      / profile_probability::very_likely ().to_sreal ())
 728                      .to_nearest_int () - 1, 0);
 729           }
 730         update_loop_exit_probability_scale_dom_bbs (loop1);
 731         update_loop_exit_probability_scale_dom_bbs (loop2);
 732
 733         edge new_e = connect_loops (loop1, loop2);
 734         connect_loop_phis (loop1, loop2, new_e);
 735
 736         /* The iterations of the second loop is now already
 737            exactly those that the first loop didn't do, but the
 738            iteration space of the first loop is still the original one.
 739            Compute the new bound for the guarding IV and patch the
 740            loop exit to use it instead of original IV and bound.  */
 741         gimple_seq stmts = NULL;
 742         tree newend = compute_new_first_bound (&stmts, &niter, border,
 743                                                guard_code, guard_init);
 744         if (stmts)
 745           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 746                                             stmts);
 747         tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 748         patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
 749
 750         /* Finally patch out the two copies of the condition to be always
 751            true/false (or opposite).  */
 752         gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 753         gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
 754         if (!initial_true)
 755           std::swap (force_true, force_false);
 756         gimple_cond_make_true (force_true);
 757         gimple_cond_make_false (force_false);
 758         update_stmt (force_true);
 759         update_stmt (force_false);
 760
 761         free_original_copy_tables ();
 762
 763         changed = true;
 764         if (dump_file && (dump_flags & TDF_DETAILS))
 765           fprintf (dump_file, ";; Loop split.\n");
 766
 767         if (dump_enabled_p ())
 768           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
 769
 770         /* Only deal with the first opportunity.  */
 771         break;
 772       }
 773
 774   free (bbs);
 775   return changed;
 776 }
 777
 778 /* Another transformation of loops like:
 779
 780    for (i = INIT (); CHECK (i); i = NEXT ())
 781      {
 782        if (expr (a_1, a_2, ..., a_n))  // expr is pure
 783          a_j = ...;  // change at least one a_j
 784        else
 785          S;          // not change any a_j
 786      }
 787
 788    into:
 789
 790    for (i = INIT (); CHECK (i); i = NEXT ())
 791      {
 792        if (expr (a_1, a_2, ..., a_n))
 793          a_j = ...;
 794        else
 795          {
 796            S;
 797            i = NEXT ();
 798            break;
 799          }
 800      }
 801
 802    for (; CHECK (i); i = NEXT ())
 803      {
 804        S;
 805      }
 806
 807    */
 808
 809 /* Data structure to hold temporary information during loop split upon
 810    semi-invariant conditional statement.  */
 811 class split_info {
 812 public:
 813   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
 814   basic_block *bbs;
 815
 816   /* All memory store/clobber statements in a loop.  */
 817   auto_vec<gimple *> memory_stores;
 818
 819   /* Whether above memory stores vector has been filled.  */
 820   int need_init;
 821
 822   /* Control dependencies of basic blocks in a loop.  */
 823   auto_vec<hash_set<basic_block> *> control_deps;
 824
 825   split_info () : bbs (NULL),  need_init (true) { }
 826
 827   ~split_info ()
 828     {
 829       if (bbs)
 830         free (bbs);
 831
 832       for (unsigned i = 0; i < control_deps.length (); i++)
 833         delete control_deps[i];
 834     }
 835 };
 836
 837 /* Find all statements with memory-write effect in LOOP, including memory
 838    store and non-pure function call, and keep those in a vector.  This work
 839    is only done one time, for the vector should be constant during analysis
 840    stage of semi-invariant condition.  */
 841
 842 static void
 843 find_vdef_in_loop (struct loop *loop)
 844 {
 845   split_info *info = (split_info *) loop->aux;
 846   gphi *vphi = get_virtual_phi (loop->header);
 847
 848   /* Indicate memory store vector has been filled.  */
 849   info->need_init = false;
 850
 851   /* If loop contains memory operation, there must be a virtual PHI node in
 852      loop header basic block.  */
 853   if (vphi == NULL)
 854     return;
 855
 856   /* All virtual SSA names inside the loop are connected to be a cyclic
 857      graph via virtual PHI nodes.  The virtual PHI node in loop header just
 858      links the first and the last virtual SSA names, by using the last as
 859      PHI operand to define the first.  */
 860   const edge latch = loop_latch_edge (loop);
 861   const tree first = gimple_phi_result (vphi);
 862   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
 863
 864   /* The virtual SSA cyclic graph might consist of only one SSA name, who
 865      is defined by itself.
 866
 867        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
 868
 869      This means the loop contains only memory loads, so we can skip it.  */
 870   if (first == last)
 871     return;
 872
 873   auto_vec<gimple *> other_stores;
 874   auto_vec<tree> worklist;
 875   auto_bitmap visited;
 876
 877   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
 878   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
 879   worklist.safe_push (last);
 880
 881   do
 882     {
 883       tree vuse = worklist.pop ();
 884       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
 885
 886       /* We mark the first and last SSA names as visited at the beginning,
 887          and reversely start the process from the last SSA name towards the
 888          first, which ensures that this do-while will not touch SSA names
 889          defined outside the loop.  */
 890       gcc_assert (gimple_bb (stmt)
 891                   && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
 892
 893       if (gimple_code (stmt) == GIMPLE_PHI)
 894         {
 895           gphi *phi = as_a <gphi *> (stmt);
 896
 897           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
 898             {
 899               tree arg = gimple_phi_arg_def (stmt, i);
 900
 901               if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
 902                 worklist.safe_push (arg);
 903             }
 904         }
 905       else
 906         {
 907           tree prev = gimple_vuse (stmt);
 908
 909           /* Non-pure call statement is conservatively assumed to impact all
 910              memory locations.  So place call statements ahead of other memory
 911              stores in the vector with an idea of using them as shortcut
 912              terminators to memory alias analysis.  */
 913           if (gimple_code (stmt) == GIMPLE_CALL)
 914             info->memory_stores.safe_push (stmt);
 915           else
 916             other_stores.safe_push (stmt);
 917
 918           if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
 919             worklist.safe_push (prev);
 920         }
 921     } while (!worklist.is_empty ());
 922
 923     info->memory_stores.safe_splice (other_stores);
 924 }
 925
 926 /* Two basic blocks have equivalent control dependency if one dominates to
 927    the other, and it is post-dominated by the latter.  Given a basic block
 928    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
 929    is a constraint that BB does not post-dominate loop header of LOOP, this
 930    means BB is control-dependent on at least one basic block in LOOP.  */
 931
 932 static basic_block
 933 get_control_equiv_head_block (struct loop *loop, basic_block bb)
 934 {
 935   while (!bb->aux)
 936     {
 937       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
 938
 939       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
 940
 941       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
 942         break;
 943
 944       bb = dom_bb;
 945     }
 946   return bb;
 947 }
 948
 949 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
 950    dependent on.  */
 951
 952 static hash_set<basic_block> *
 953 find_control_dep_blocks (struct loop *loop, basic_block bb)
 954 {
 955   /* BB has same control dependency as loop header, then it is not control-
 956      dependent on any basic block in LOOP.  */
 957   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
 958     return NULL;
 959
 960   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
 961
 962   if (equiv_head->aux)
 963     {
 964       /* There is a basic block containing control dependency equivalent
 965          to BB.  No need to recompute that, and also set this information
 966          to other equivalent basic blocks.  */
 967       for (; bb != equiv_head;
 968            bb = get_immediate_dominator (CDI_DOMINATORS, bb))
 969         bb->aux = equiv_head->aux;
 970       return (hash_set<basic_block> *) equiv_head->aux;
 971     }
 972
 973   /* A basic block X is control-dependent on another Y iff there exists
 974      a path from X to Y, in which every basic block other than X and Y
 975      is post-dominated by Y, but X is not post-dominated by Y.
 976
 977      According to this rule, traverse basic blocks in the loop backwards
 978      starting from BB, if a basic block is post-dominated by BB, extend
 979      current post-dominating path to this block, otherwise it is another
 980      one that BB is control-dependent on.  */
 981
 982   auto_vec<basic_block> pdom_worklist;
 983   hash_set<basic_block> pdom_visited;
 984   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
 985
 986   pdom_worklist.safe_push (equiv_head);
 987
 988   do
 989     {
 990       basic_block pdom_bb = pdom_worklist.pop ();
 991       edge_iterator ei;
 992       edge e;
 993
 994       if (pdom_visited.add (pdom_bb))
 995         continue;
 996
 997       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
 998         {
 999           basic_block pred_bb = e->src;
1000
1001           if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1002             {
1003               dep_bbs->add (pred_bb);
1004               continue;
1005             }
1006
1007           pred_bb = get_control_equiv_head_block (loop, pred_bb);
1008
1009           if (pdom_visited.contains (pred_bb))
1010             continue;
1011
1012           if (!pred_bb->aux)
1013             {
1014               pdom_worklist.safe_push (pred_bb);
1015               continue;
1016             }
1017
1018           /* If control dependency of basic block is available, fast extend
1019              post-dominating path using the information instead of advancing
1020              forward step-by-step.  */
1021           hash_set<basic_block> *pred_dep_bbs
1022                         = (hash_set<basic_block> *) pred_bb->aux;
1023
1024           for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1025                iter != pred_dep_bbs->end (); ++iter)
1026             {
1027               basic_block pred_dep_bb = *iter;
1028
1029               /* Basic blocks can either be in control dependency of BB, or
1030                  must be post-dominated by BB, if so, extend the path from
1031                  these basic blocks.  */
1032               if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1033                 dep_bbs->add (pred_dep_bb);
1034               else if (!pdom_visited.contains (pred_dep_bb))
1035                 pdom_worklist.safe_push (pred_dep_bb);
1036             }
1037         }
1038     } while (!pdom_worklist.is_empty ());
1039
1040   /* Record computed control dependencies in loop so that we can reach them
1041      when reclaiming resources.  */
1042   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1043
1044   /* Associate control dependence with related equivalent basic blocks.  */
1045   for (equiv_head->aux = dep_bbs; bb != equiv_head;
1046        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1047     bb->aux = dep_bbs;
1048
1049   return dep_bbs;
1050 }
1051
1052 /* Forward declaration */
1053
1054 static bool
1055 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1056                          const_basic_block skip_head,
1057                          hash_map<gimple *, bool> &stmt_stat);
1058
1059 /* Given STMT, memory load or pure call statement, check whether it is impacted
1060    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1061    trace is composed of SKIP_HEAD and those basic block dominated by it, always
1062    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
1063    NULL, all basic blocks of LOOP are checked.  */
1064
1065 static bool
1066 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1067                        const_basic_block skip_head)
1068 {
1069   split_info *info = (split_info *) loop->aux;
1070   tree rhs = NULL_TREE;
1071   ao_ref ref;
1072   gimple *store;
1073   unsigned i;
1074
1075   /* Collect memory store/clobber statements if haven't done that.  */
1076   if (info->need_init)
1077     find_vdef_in_loop (loop);
1078
1079   if (is_gimple_assign (stmt))
1080     rhs = gimple_assign_rhs1 (stmt);
1081
1082   ao_ref_init (&ref, rhs);
1083
1084   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1085     {
1086       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
1087       if (skip_head
1088           && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1089         continue;
1090
1091       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1092         return false;
1093     }
1094
1095   return true;
1096 }
1097
1098 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1099    certain iteration of LOOP, check whether an SSA name (NAME) remains
1100    unchanged in next iteration.  We call this characteristic semi-
1101    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1102    blocks and control flows in the loop will be considered.  Semi-invariant
1103    state of checked statement is cached in hash map STMT_STAT to avoid
1104    redundant computation in possible following re-check.  */
1105
1106 static inline bool
1107 ssa_semi_invariant_p (struct loop *loop, tree name,
1108                       const_basic_block skip_head,
1109                       hash_map<gimple *, bool> &stmt_stat)
1110 {
1111   gimple *def = SSA_NAME_DEF_STMT (name);
1112   const_basic_block def_bb = gimple_bb (def);
1113
1114   /* An SSA name defined outside loop is definitely semi-invariant.  */
1115   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1116     return true;
1117
1118   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1119     return false;
1120
1121   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1122 }
1123
1124 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1125    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1126    are excluded from LOOP.  */
1127
1128 static bool
1129 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1130                                 const_basic_block skip_head)
1131 {
1132   const_edge latch = loop_latch_edge (loop);
1133   tree name = gimple_phi_result (loop_phi);
1134   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1135
1136   gcc_checking_assert (from);
1137
1138   /* Loop iteration PHI node locates in loop header, and it has two source
1139      operands, one is an initial value coming from outside the loop, the other
1140      is a value through latch of the loop, which is derived in last iteration,
1141      we call the latter latch value.  From the PHI node to definition of latch
1142      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1143      assignment or likewise, there is no other kind of value redefinition, SSA
1144      name defined by the PHI node is semi-invariant.
1145
1146                          loop entry
1147                               |     .--- latch ---.
1148                               |     |             |
1149                               v     v             |
1150                   x_1 = PHI <x_0,  x_3>           |
1151                            |                      |
1152                            v                      |
1153               .------- if (cond) -------.         |
1154               |                         |         |
1155               |                     [ SKIP ]      |
1156               |                         |         |
1157               |                     x_2 = ...     |
1158               |                         |         |
1159               '---- T ---->.<---- F ----'         |
1160                            |                      |
1161                            v                      |
1162                   x_3 = PHI <x_1, x_2>            |
1163                            |                      |
1164                            '----------------------'
1165
1166      Suppose in certain iteration, execution flow in above graph goes through
1167      true branch, which means that one source value to define x_3 in false
1168      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1169      iterations is defined by x_3, we know that x_1 will never changed if COND
1170      always chooses true branch from then on.  */
1171
1172   while (from != name)
1173     {
1174       /* A new value comes from a CONSTANT.  */
1175       if (TREE_CODE (from) != SSA_NAME)
1176         return false;
1177
1178       gimple *stmt = SSA_NAME_DEF_STMT (from);
1179       const_basic_block bb = gimple_bb (stmt);
1180
1181       /* A new value comes from outside the loop.  */
1182       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1183         return false;
1184
1185       from = NULL_TREE;
1186
1187       if (gimple_code (stmt) == GIMPLE_PHI)
1188         {
1189           gphi *phi = as_a <gphi *> (stmt);
1190
1191           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1192             {
1193               if (skip_head)
1194                 {
1195                   const_edge e = gimple_phi_arg_edge (phi, i);
1196
1197                   /* Don't consider redefinitions in excluded basic blocks.  */
1198                   if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1199                     continue;
1200                 }
1201
1202               tree arg = gimple_phi_arg_def (phi, i);
1203
1204               if (!from)
1205                 from = arg;
1206               else if (!operand_equal_p (from, arg, 0))
1207                 /* There are more than one source operands that provide
1208                    different values to the SSA name, it is variant.  */
1209                 return false;
1210             }
1211         }
1212       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1213         {
1214           /* For simple value copy, check its rhs instead.  */
1215           if (gimple_assign_ssa_name_copy_p (stmt))
1216             from = gimple_assign_rhs1 (stmt);
1217         }
1218
1219       /* Any other kind of definition is deemed to introduce a new value
1220          to the SSA name.  */
1221       if (!from)
1222         return false;
1223     }
1224   return true;
1225 }
1226
1227 /* Check whether conditional predicates that BB is control-dependent on, are
1228    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1229    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1230    in hash map STMT_STAT.  */
1231
1232 static bool
1233 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1234                               const_basic_block skip_head,
1235                               hash_map<gimple *, bool> &stmt_stat)
1236 {
1237   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1238
1239   if (!dep_bbs)
1240     return true;
1241
1242   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1243        iter != dep_bbs->end (); ++iter)
1244     {
1245       gimple *last = *gsi_last_bb (*iter);
1246       if (!last)
1247         return false;
1248
1249       /* Only check condition predicates.  */
1250       if (gimple_code (last) != GIMPLE_COND
1251           && gimple_code (last) != GIMPLE_SWITCH)
1252         return false;
1253
1254       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1255         return false;
1256     }
1257
1258   return true;
1259 }
1260
1261 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1262    semi-invariant, consequently, all its defined values are semi-invariant.
1263    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1264    Semi-invariant state of checked statement is cached in hash map
1265    STMT_STAT.  */
1266
1267 static bool
1268 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1269                          const_basic_block skip_head,
1270                          hash_map<gimple *, bool> &stmt_stat)
1271 {
1272   bool existed;
1273   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1274
1275   if (existed)
1276     return invar;
1277
1278   /* A statement might depend on itself, which is treated as variant.  So set
1279      state of statement under check to be variant to ensure that.  */
1280   invar = false;
1281
1282   if (gimple_code (stmt) == GIMPLE_PHI)
1283     {
1284       gphi *phi = as_a <gphi *> (stmt);
1285
1286       if (gimple_bb (stmt) == loop->header)
1287         {
1288           /* If the entry value is subject to abnormal coalescing
1289              avoid the transform since we're going to duplicate the
1290              loop header and thus likely introduce overlapping life-ranges
1291              between the PHI def and the entry on the path when the
1292              first loop is skipped.  */
1293           tree entry_def
1294             = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1295           if (TREE_CODE (entry_def) == SSA_NAME
1296               && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1297             return false;
1298           invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1299           return invar;
1300         }
1301
1302       /* For a loop PHI node that does not locate in loop header, it is semi-
1303          invariant only if two conditions are met.  The first is its source
1304          values are derived from CONSTANT (including loop-invariant value), or
1305          from SSA name defined by semi-invariant loop iteration PHI node.  The
1306          second is its source incoming edges are control-dependent on semi-
1307          invariant conditional predicates.  */
1308       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1309         {
1310           const_edge e = gimple_phi_arg_edge (phi, i);
1311           tree arg = gimple_phi_arg_def (phi, i);
1312
1313           if (TREE_CODE (arg) == SSA_NAME)
1314             {
1315               if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1316                 return false;
1317
1318               /* If source value is defined in location from where the source
1319                  edge comes in, no need to check control dependency again
1320                  since this has been done in above SSA name check stage.  */
1321               if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1322                 continue;
1323             }
1324
1325           if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1326                                              stmt_stat))
1327             return false;
1328         }
1329     }
1330   else
1331     {
1332       ssa_op_iter iter;
1333       tree use;
1334
1335       /* Volatile memory load or return of normal (non-const/non-pure) call
1336          should not be treated as constant in each iteration of loop.  */
1337       if (gimple_has_side_effects (stmt))
1338         return false;
1339
1340       /* Check if any memory store may kill memory load at this place.  */
1341       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1342         return false;
1343
1344       /* Although operand of a statement might be SSA name, CONSTANT or
1345          VARDECL, here we only need to check SSA name operands.  This is
1346          because check on VARDECL operands, which involve memory loads,
1347          must have been done prior to invocation of this function in
1348          vuse_semi_invariant_p.  */
1349       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1350         if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1351           return false;
1352     }
1353
1354   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1355                                      stmt_stat))
1356     return false;
1357
1358   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1359      to new insertion, and thus invar may point to invalid memory.  */
1360   stmt_stat.put (stmt, true);
1361   return true;
1362 }
1363
1364 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1365    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1366
1367 static bool
1368 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1369                        const_basic_block skip_head)
1370 {
1371   hash_map<gimple *, bool> stmt_stat;
1372   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1373 }
1374
1375 /* Determine when conditional statement never transfers execution to one of its
1376    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1377    and those basic blocks dominated by BRANCH_BB.  */
1378
1379 static bool
1380 branch_removable_p (basic_block branch_bb)
1381 {
1382   edge_iterator ei;
1383   edge e;
1384
1385   if (single_pred_p (branch_bb))
1386     return true;
1387
1388   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1389     {
1390       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1391         continue;
1392
1393       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1394         continue;
1395
1396        /* The branch can be reached from opposite branch, or from some
1397           statement not dominated by the conditional statement.  */
1398       return false;
1399     }
1400
1401   return true;
1402 }
1403
1404 /* Find out which branch of a conditional statement (COND) is invariant in the
1405    execution context of LOOP.  That is: once the branch is selected in certain
1406    iteration of the loop, any operand that contributes to computation of the
1407    conditional statement remains unchanged in all following iterations.  */
1408
1409 static edge
1410 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1411 {
1412   basic_block cond_bb = gimple_bb (cond);
1413   basic_block targ_bb[2];
1414   bool invar[2];
1415   unsigned invar_checks = 0;
1416
1417   for (unsigned i = 0; i < 2; i++)
1418     {
1419       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1420
1421       /* One branch directs to loop exit, no need to perform loop split upon
1422          this conditional statement.  Firstly, it is trivial if the exit branch
1423          is semi-invariant, for the statement is just to break loop.  Secondly,
1424          if the opposite branch is semi-invariant, it means that the statement
1425          is real loop-invariant, which is covered by loop unswitch.  */
1426       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1427         return NULL;
1428     }
1429
1430   for (unsigned i = 0; i < 2; i++)
1431     {
1432       invar[!i] = false;
1433
1434       if (!branch_removable_p (targ_bb[i]))
1435         continue;
1436
1437       /* Given a semi-invariant branch, if its opposite branch dominates
1438          loop latch, it and its following trace will only be executed in
1439          final iteration of loop, namely it is not part of repeated body
1440          of the loop.  Similar to the above case that the branch is loop
1441          exit, no need to split loop.  */
1442       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1443         continue;
1444
1445       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1446       invar_checks++;
1447     }
1448
1449   /* With both branches being invariant (handled by loop unswitch) or
1450      variant is not what we want.  */
1451   if (invar[0] ^ !invar[1])
1452     return NULL;
1453
1454   /* Found a real loop-invariant condition, do nothing.  */
1455   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1456     return NULL;
1457
1458   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1459 }
1460
1461 /* Calculate increased code size measured by estimated insn number if applying
1462    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1463
1464 static int
1465 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1466 {
1467   basic_block cond_bb = branch_edge->src;
1468   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1469   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1470   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1471   int num = 0;
1472
1473   for (unsigned i = 0; i < loop->num_nodes; i++)
1474     {
1475       /* Do no count basic blocks only in opposite branch.  */
1476       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1477         continue;
1478
1479       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1480     }
1481
1482   /* It is unnecessary to evaluate expression of the conditional statement
1483      in new loop that contains only invariant branch.  This expression should
1484      be constant value (either true or false).  Exclude code size of insns
1485      that contribute to computation of the expression.  */
1486
1487   auto_vec<gimple *> worklist;
1488   hash_set<gimple *> removed;
1489   gimple *stmt = last_nondebug_stmt (cond_bb);
1490
1491   worklist.safe_push (stmt);
1492   removed.add (stmt);
1493   num -= estimate_num_insns (stmt, &eni_size_weights);
1494
1495   do
1496     {
1497       ssa_op_iter opnd_iter;
1498       use_operand_p opnd_p;
1499
1500       stmt = worklist.pop ();
1501       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1502         {
1503           tree opnd = USE_FROM_PTR (opnd_p);
1504
1505           if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1506             continue;
1507
1508           gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1509           use_operand_p use_p;
1510           imm_use_iterator use_iter;
1511
1512           if (removed.contains (opnd_stmt)
1513               || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1514             continue;
1515
1516           FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1517             {
1518               gimple *use_stmt = USE_STMT (use_p);
1519
1520               if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1521                 {
1522                   opnd_stmt = NULL;
1523                   break;
1524                 }
1525             }
1526
1527           if (opnd_stmt)
1528             {
1529               worklist.safe_push (opnd_stmt);
1530               removed.add (opnd_stmt);
1531               num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1532             }
1533         }
1534     } while (!worklist.is_empty ());
1535
1536   gcc_assert (num >= 0);
1537   return num;
1538 }
1539
1540 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1541    and check whether it is eligible and profitable to perform loop split upon
1542    this branch in LOOP.  */
1543
1544 static edge
1545 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1546 {
1547   edge invar_branch = get_cond_invariant_branch (loop, cond);
1548   if (!invar_branch)
1549     return NULL;
1550
1551   /* When accurate profile information is available, and execution
1552      frequency of the branch is too low, just let it go.  */
1553   profile_probability prob = invar_branch->probability;
1554   if (prob.reliable_p ())
1555     {
1556       int thres = param_min_loop_cond_split_prob;
1557
1558       if (prob < profile_probability::always ().apply_scale (thres, 100))
1559         return NULL;
1560     }
1561
1562   /* Add a threshold for increased code size to disable loop split.  */
1563   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1564     return NULL;
1565
1566   return invar_branch;
1567 }
1568
1569 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1570    conditional statement, perform loop split transformation illustrated
1571    as the following graph.
1572
1573                .-------T------ if (true) ------F------.
1574                |                    .---------------. |
1575                |                    |               | |
1576                v                    |               v v
1577           pre-header                |            pre-header
1578                | .------------.     |                 | .------------.
1579                | |            |     |                 | |            |
1580                | v            |     |                 | v            |
1581              header           |     |               header           |
1582                |              |     |                 |              |
1583       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1584       |                 |     |     |        |                 |     |
1585   invariant             |     |     |    invariant             |     |
1586       |                 |     |     |        |                 |     |
1587       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1588                |              |    /                  |              |
1589              stmts            |   /                 stmts            |
1590                |              F  T                    |              |
1591               / \             | /                    / \             |
1592      .-------*   *      [ if (cond) ]       .-------*   *            |
1593      |           |            |             |           |            |
1594      |         latch          |             |         latch          |
1595      |           |            |             |           |            |
1596      |           '------------'             |           '------------'
1597      '------------------------. .-----------'
1598              loop1            | |                   loop2
1599                               v v
1600                              exits
1601
1602    In the graph, loop1 represents the part derived from original one, and
1603    loop2 is duplicated using loop_version (), which corresponds to the part
1604    of original one being splitted out.  In original latch edge of loop1, we
1605    insert a new conditional statement duplicated from the semi-invariant cond,
1606    and one of its branch goes back to loop1 header as a latch edge, and the
1607    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1608    we abandon the variant branch of the conditional statement by setting a
1609    constant bool condition, based on which branch is semi-invariant.  */
1610
1611 static bool
1612 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1613 {
1614   basic_block cond_bb = invar_branch->src;
1615   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1616   gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1617
1618   gcc_assert (cond_bb->loop_father == loop1);
1619
1620   if (dump_enabled_p ())
1621     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1622                      "loop split on semi-invariant condition at %s branch\n",
1623                      true_invar ? "true" : "false");
1624
1625   initialize_original_copy_tables ();
1626
1627   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1628                                      invar_branch->probability.invert (),
1629                                      invar_branch->probability,
1630                                      profile_probability::always (),
1631                                      profile_probability::always (),
1632                                      true);
1633   if (!loop2)
1634     {
1635       free_original_copy_tables ();
1636       return false;
1637     }
1638
1639   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1640   gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1641
1642   /* Replace the condition in loop2 with a bool constant to let PassManager
1643      remove the variant branch after current pass completes.  */
1644   if (true_invar)
1645     gimple_cond_make_true (cond_copy);
1646   else
1647     gimple_cond_make_false (cond_copy);
1648
1649   update_stmt (cond_copy);
1650
1651   /* Insert a new conditional statement on latch edge of loop1, its condition
1652      is duplicated from the semi-invariant.  This statement acts as a switch
1653      to transfer execution from loop1 to loop2, when loop1 enters into
1654      invariant state.  */
1655   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1656   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1657   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1658                                           gimple_cond_lhs (cond),
1659                                           gimple_cond_rhs (cond),
1660                                           NULL_TREE, NULL_TREE);
1661
1662   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1663   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1664
1665   edge to_loop1 = single_succ_edge (break_bb);
1666   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1667
1668   to_loop1->flags &= ~EDGE_FALLTHRU;
1669   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1670   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1671
1672   /* Due to introduction of a control flow edge from loop1 latch to loop2
1673      pre-header, we should update PHIs in loop2 to reflect this connection
1674      between loop1 and loop2.  */
1675   connect_loop_phis (loop1, loop2, to_loop2);
1676
1677   edge true_edge, false_edge, skip_edge1, skip_edge2;
1678   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1679
1680   skip_edge1 = true_invar ? false_edge : true_edge;
1681   skip_edge2 = true_invar ? true_edge : false_edge;
1682   fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1683
1684   /* Fix first loop's exit probability after scaling.  */
1685   to_loop1->probability = invar_branch->probability.invert ();
1686   to_loop2->probability = invar_branch->probability;
1687
1688   free_original_copy_tables ();
1689
1690   return true;
1691 }
1692
1693 /* Traverse all conditional statements in LOOP, to find out a good candidate
1694    upon which we can do loop split.  */
1695
1696 static bool
1697 split_loop_on_cond (struct loop *loop)
1698 {
1699   split_info *info = new split_info ();
1700   basic_block *bbs = info->bbs = get_loop_body (loop);
1701   bool do_split = false;
1702
1703   /* Allocate an area to keep temporary info, and associate its address
1704      with loop aux field.  */
1705   loop->aux = info;
1706
1707   for (unsigned i = 0; i < loop->num_nodes; i++)
1708     bbs[i]->aux = NULL;
1709
1710   for (unsigned i = 0; i < loop->num_nodes; i++)
1711     {
1712       basic_block bb = bbs[i];
1713
1714       /* We only consider conditional statement, which be executed at most once
1715          in each iteration of the loop.  So skip statements in inner loops.  */
1716       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1717         continue;
1718
1719       /* Actually this check is not a must constraint.  With it, we can ensure
1720          conditional statement will always be executed in each iteration.  */
1721       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1722         continue;
1723
1724       gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1725       if (!cond)
1726         continue;
1727
1728       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1729
1730       if (branch_edge)
1731         {
1732           do_split_loop_on_cond (loop, branch_edge);
1733           do_split = true;
1734           break;
1735         }
1736     }
1737
1738   delete info;
1739   loop->aux = NULL;
1740
1741   return do_split;
1742 }
1743
1744 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1745
1746 static unsigned int
1747 tree_ssa_split_loops (void)
1748 {
1749   bool changed = false;
1750
1751   gcc_assert (scev_initialized_p ());
1752
1753   calculate_dominance_info (CDI_POST_DOMINATORS);
1754
1755   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1756     loop->aux = NULL;
1757
1758   /* Go through all loops starting from innermost.  */
1759   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1760     {
1761       if (loop->aux)
1762         {
1763           /* If any of our inner loops was split, don't split us,
1764              and mark our containing loop as having had splits as well.
1765              This allows for delaying SSA update.  */
1766           loop_outer (loop)->aux = loop;
1767           continue;
1768         }
1769
1770       if (optimize_loop_for_size_p (loop))
1771         continue;
1772
1773       if (split_loop (loop) || split_loop_on_cond (loop))
1774         {
1775           /* Mark our containing loop as having had some split inner loops.  */
1776           loop_outer (loop)->aux = loop;
1777           changed = true;
1778         }
1779     }
1780
1781   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1782     loop->aux = NULL;
1783
1784   clear_aux_for_blocks ();
1785
1786   free_dominance_info (CDI_POST_DOMINATORS);
1787
1788   if (changed)
1789     {
1790       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1791       return TODO_cleanup_cfg;
1792     }
1793   return 0;
1794 }
1795
1796 /* Loop splitting pass.  */
1797
1798 namespace {
1799
1800 const pass_data pass_data_loop_split =
1801 {
1802   GIMPLE_PASS, /* type */
1803   "lsplit", /* name */
1804   OPTGROUP_LOOP, /* optinfo_flags */
1805   TV_LOOP_SPLIT, /* tv_id */
1806   PROP_cfg, /* properties_required */
1807   0, /* properties_provided */
1808   0, /* properties_destroyed */
1809   0, /* todo_flags_start */
1810   0, /* todo_flags_finish */
1811 };
1812
1813 class pass_loop_split : public gimple_opt_pass
1814 {
1815 public:
1816   pass_loop_split (gcc::context *ctxt)
1817     : gimple_opt_pass (pass_data_loop_split, ctxt)
1818   {}
1819
1820   /* opt_pass methods: */
1821   bool gate (function *) final override { return flag_split_loops != 0; }
1822   unsigned int execute (function *) final override;
1823
1824 }; // class pass_loop_split
1825
1826 unsigned int
1827 pass_loop_split::execute (function *fun)
1828 {
1829   if (number_of_loops (fun) <= 1)
1830     return 0;
1831
1832   return tree_ssa_split_loops ();
1833 }
1834
1835 } // anon namespace
1836
1837 gimple_opt_pass *
1838 make_pass_loop_split (gcc::context *ctxt)
1839 {
1840   return new pass_loop_split (ctxt);
1841 }