gcc/tree-ssa-loop-split.cc

   1 /* Loop splitting.
   2    Copyright (C) 2015-2024 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "tree-pass.h"
  27 #include "ssa.h"
  28 #include "fold-const.h"
  29 #include "tree-cfg.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-loop-niter.h"
  32 #include "tree-ssa-loop.h"
  33 #include "tree-ssa-loop-manip.h"
  34 #include "tree-into-ssa.h"
  35 #include "tree-inline.h"
  36 #include "tree-cfgcleanup.h"
  37 #include "cfgloop.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "gimple-iterator.h"
  40 #include "gimple-pretty-print.h"
  41 #include "cfghooks.h"
  42 #include "gimple-fold.h"
  43 #include "gimplify-me.h"
  44 #include "print-tree.h"
  45 #include "value-query.h"
  46 #include "sreal.h"
  47
  48 /* This file implements two kinds of loop splitting.
  49
  50    One transformation of loops like:
  51
  52    for (i = 0; i < 100; i++)
  53      {
  54        if (i < 50)
  55          A;
  56        else
  57          B;
  58      }
  59
  60    into:
  61
  62    for (i = 0; i < 50; i++)
  63      {
  64        A;
  65      }
  66    for (; i < 100; i++)
  67      {
  68        B;
  69      }
  70
  71    */
  72
  73 /* Return true when BB inside LOOP is a potential iteration space
  74    split point, i.e. ends with a condition like "IV < comp", which
  75    is true on one side of the iteration space and false on the other,
  76    and the split point can be computed.  If so, also return the border
  77    point in *BORDER and the comparison induction variable in IV.  */
  78
  79 static tree
  80 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
  81                enum tree_code *guard_code)
  82 {
  83   gcond *stmt;
  84   affine_iv iv2;
  85
  86   /* BB must end in a simple conditional jump.  */
  87   stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
  88   if (!stmt)
  89     return NULL_TREE;
  90
  91   enum tree_code code = gimple_cond_code (stmt);
  92
  93   if (loop_exits_from_bb_p (loop, bb))
  94     return NULL_TREE;
  95
  96   tree op0 = gimple_cond_lhs (stmt);
  97   tree op1 = gimple_cond_rhs (stmt);
  98   class loop *useloop = loop_containing_stmt (stmt);
  99
 100   if (!simple_iv (loop, useloop, op0, iv, false))
 101     return NULL_TREE;
 102   if (!simple_iv (loop, useloop, op1, &iv2, false))
 103     return NULL_TREE;
 104
 105   /* Make it so that the first argument of the condition is
 106      the looping one.  */
 107   if (!integer_zerop (iv2.step))
 108     {
 109       std::swap (op0, op1);
 110       std::swap (*iv, iv2);
 111       code = swap_tree_comparison (code);
 112       gimple_cond_set_condition (stmt, code, op0, op1);
 113       update_stmt (stmt);
 114     }
 115   else if (integer_zerop (iv->step))
 116     return NULL_TREE;
 117   if (!integer_zerop (iv2.step))
 118     return NULL_TREE;
 119   if (!iv->no_overflow)
 120     return NULL_TREE;
 121
 122   /* Only handle relational comparisons, for equality and non-equality
 123      we'd have to split the loop into two loops and a middle statement.  */
 124   switch (code)
 125     {
 126       case LT_EXPR:
 127       case LE_EXPR:
 128       case GT_EXPR:
 129       case GE_EXPR:
 130         break;
 131       case NE_EXPR:
 132       case EQ_EXPR:
 133         /* If the test check for first iteration, we can handle NE/EQ
 134            with only one split loop.  */
 135         if (operand_equal_p (iv->base, iv2.base, 0))
 136           {
 137             if (code == EQ_EXPR)
 138               code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
 139             else
 140               code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
 141             break;
 142           }
 143         /* Similarly when the test checks for minimal or maximal
 144            value range.  */
 145         else
 146           {
 147             Value_Range r (TREE_TYPE (op0));
 148             get_global_range_query ()->range_of_expr (r, op0, stmt);
 149             if (!r.varying_p () && !r.undefined_p ()
 150                 && TREE_CODE (op1) == INTEGER_CST)
 151               {
 152                 wide_int val = wi::to_wide (op1);
 153                 if (known_eq (val, wi::to_wide (r.lbound ())))
 154                   {
 155                     code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
 156                     break;
 157                   }
 158                 else if (known_eq (val, wi::to_wide (r.ubound ())))
 159                   {
 160                     code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
 161                     break;
 162                   }
 163               }
 164           }
 165         /* TODO: We can compare with exit condition; it seems that testing for
 166            last iteration is common case.  */
 167         return NULL_TREE;
 168       default:
 169         return NULL_TREE;
 170     }
 171
 172   if (dump_file && (dump_flags & TDF_DETAILS))
 173     {
 174       fprintf (dump_file, "Found potential split point: ");
 175       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 176       fprintf (dump_file, " { ");
 177       print_generic_expr (dump_file, iv->base, TDF_SLIM);
 178       fprintf (dump_file, " + I*");
 179       print_generic_expr (dump_file, iv->step, TDF_SLIM);
 180       fprintf (dump_file, " } %s ", get_tree_code_name (code));
 181       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
 182       fprintf (dump_file, "\n");
 183     }
 184
 185   *border = iv2.base;
 186   *guard_code = code;
 187   return op0;
 188 }
 189
 190 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
 191    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
 192    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
 193    exit test statement to loop back only if the GUARD statement will
 194    also be true/false in the next iteration.  */
 195
 196 static void
 197 patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
 198                  tree newbound, bool initial_true)
 199 {
 200   edge exit = single_exit (loop);
 201   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 202   gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
 203   update_stmt (stmt);
 204
 205   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
 206
 207   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 208   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 209
 210   if (initial_true)
 211     {
 212       exit->flags |= EDGE_FALSE_VALUE;
 213       stay->flags |= EDGE_TRUE_VALUE;
 214     }
 215   else
 216     {
 217       exit->flags |= EDGE_TRUE_VALUE;
 218       stay->flags |= EDGE_FALSE_VALUE;
 219     }
 220 }
 221
 222 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
 223    find the loop phi node in LOOP defining it directly, or create
 224    such phi node.  Return that phi node.  */
 225
 226 static gphi *
 227 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
 228 {
 229   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
 230   gphi *phi;
 231   if ((phi = dyn_cast <gphi *> (def))
 232       && gimple_bb (phi) == loop->header)
 233     return phi;
 234
 235   /* XXX Create the PHI instead.  */
 236   return NULL;
 237 }
 238
 239 /* Returns true if the exit values of all loop phi nodes can be
 240    determined easily (i.e. that connect_loop_phis can determine them).  */
 241
 242 static bool
 243 easy_exit_values (class loop *loop)
 244 {
 245   edge exit = single_exit (loop);
 246   edge latch = loop_latch_edge (loop);
 247   gphi_iterator psi;
 248
 249   /* Currently we regard the exit values as easy if they are the same
 250      as the value over the backedge.  Which is the case if the definition
 251      of the backedge value dominates the exit edge.  */
 252   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 253     {
 254       gphi *phi = psi.phi ();
 255       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
 256       basic_block bb;
 257       if (TREE_CODE (next) == SSA_NAME
 258           && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
 259           && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
 260         return false;
 261     }
 262
 263   return true;
 264 }
 265
 266 /* This function updates the SSA form after connect_loops made a new
 267    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
 268    conditional).  I.e. the second loop can now be entered either
 269    via the original entry or via NEW_E, so the entry values of LOOP2
 270    phi nodes are either the original ones or those at the exit
 271    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
 272    this.  The loops need to fulfill easy_exit_values().  */
 273
 274 static void
 275 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 276 {
 277   basic_block rest = loop_preheader_edge (loop2)->src;
 278   gcc_assert (new_e->dest == rest);
 279   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
 280
 281   edge firste = loop_preheader_edge (loop1);
 282   edge seconde = loop_preheader_edge (loop2);
 283   edge firstn = loop_latch_edge (loop1);
 284   gphi_iterator psi_first, psi_second;
 285   for (psi_first = gsi_start_phis (loop1->header),
 286        psi_second = gsi_start_phis (loop2->header);
 287        !gsi_end_p (psi_first);
 288        gsi_next (&psi_first), gsi_next (&psi_second))
 289     {
 290       tree init, next, new_init;
 291       use_operand_p op;
 292       gphi *phi_first = psi_first.phi ();
 293       gphi *phi_second = psi_second.phi ();
 294
 295       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
 296       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
 297       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
 298       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 299
 300       /* Prefer using original variable as a base for the new ssa name.
 301          This is necessary for virtual ops, and useful in order to avoid
 302          losing debug info for real ops.  */
 303       if (TREE_CODE (next) == SSA_NAME
 304           && useless_type_conversion_p (TREE_TYPE (next),
 305                                         TREE_TYPE (init)))
 306         new_init = copy_ssa_name (next);
 307       else if (TREE_CODE (init) == SSA_NAME
 308                && useless_type_conversion_p (TREE_TYPE (init),
 309                                              TREE_TYPE (next)))
 310         new_init = copy_ssa_name (init);
 311       else if (useless_type_conversion_p (TREE_TYPE (next),
 312                                           TREE_TYPE (init)))
 313         new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
 314                                        "unrinittmp");
 315       else
 316         new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
 317                                        "unrinittmp");
 318
 319       gphi * newphi = create_phi_node (new_init, rest);
 320       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
 321       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
 322       SET_USE (op, new_init);
 323     }
 324 }
 325
 326 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
 327    they are still equivalent and placed in two arms of a diamond, like so:
 328
 329                .------if (cond)------.
 330                v                     v
 331              pre1                   pre2
 332               |                      |
 333         .--->h1                     h2<----.
 334         |     |                      |     |
 335         |    ex1---.            .---ex2    |
 336         |    /     |            |     \    |
 337         '---l1     X            |     l2---'
 338                    |            |
 339                    |            |
 340                    '--->join<---'
 341
 342    This function transforms the program such that LOOP1 is conditionally
 343    falling through to LOOP2, or skipping it.  This is done by splitting
 344    the ex1->join edge at X in the diagram above, and inserting a condition
 345    whose one arm goes to pre2, resulting in this situation:
 346
 347                .------if (cond)------.
 348                v                     v
 349              pre1       .---------->pre2
 350               |         |            |
 351         .--->h1         |           h2<----.
 352         |     |         |            |     |
 353         |    ex1---.    |       .---ex2    |
 354         |    /     v    |       |     \    |
 355         '---l1   skip---'       |     l2---'
 356                    |            |
 357                    |            |
 358                    '--->join<---'
 359
 360
 361    The condition used is the exit condition of LOOP1, which effectively means
 362    that when the first loop exits (for whatever reason) but the real original
 363    exit expression is still false the second loop will be entered.
 364    The function returns the new edge cond->pre2.
 365
 366    This doesn't update the SSA form, see connect_loop_phis for that.  */
 367
 368 static edge
 369 connect_loops (class loop *loop1, class loop *loop2)
 370 {
 371   edge exit = single_exit (loop1);
 372   basic_block skip_bb = split_edge (exit);
 373   gcond *skip_stmt;
 374   gimple_stmt_iterator gsi;
 375   edge new_e, skip_e;
 376
 377   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 378   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
 379                                  gimple_cond_lhs (stmt),
 380                                  gimple_cond_rhs (stmt),
 381                                  NULL_TREE, NULL_TREE);
 382   gsi = gsi_last_bb (skip_bb);
 383   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
 384
 385   skip_e = EDGE_SUCC (skip_bb, 0);
 386   skip_e->flags &= ~EDGE_FALLTHRU;
 387   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
 388   if (exit->flags & EDGE_TRUE_VALUE)
 389     {
 390       skip_e->flags |= EDGE_TRUE_VALUE;
 391       new_e->flags |= EDGE_FALSE_VALUE;
 392     }
 393   else
 394     {
 395       skip_e->flags |= EDGE_FALSE_VALUE;
 396       new_e->flags |= EDGE_TRUE_VALUE;
 397     }
 398
 399   new_e->probability = profile_probability::very_likely ();
 400   skip_e->probability = new_e->probability.invert ();
 401
 402   return new_e;
 403 }
 404
 405 /* This returns the new bound for iterations given the original iteration
 406    space in NITER, an arbitrary new bound BORDER, assumed to be some
 407    comparison value with a different IV, the initial value GUARD_INIT of
 408    that other IV, and the comparison code GUARD_CODE that compares
 409    that other IV with BORDER.  We return an SSA name, and place any
 410    necessary statements for that computation into *STMTS.
 411
 412    For example for such a loop:
 413
 414      for (i = beg, j = guard_init; i < end; i++, j++)
 415        if (j < border)  // this is supposed to be true/false
 416          ...
 417
 418    we want to return a new bound (on j) that makes the loop iterate
 419    as long as the condition j < border stays true.  We also don't want
 420    to iterate more often than the original loop, so we have to introduce
 421    some cut-off as well (via min/max), effectively resulting in:
 422
 423      newend = min (end+guard_init-beg, border)
 424      for (i = beg; j = guard_init; j < newend; i++, j++)
 425        if (j < c)
 426          ...
 427
 428    Depending on the direction of the IVs and if the exit tests
 429    are strict or non-strict we need to use MIN or MAX,
 430    and add or subtract 1.  This routine computes newend above.  */
 431
 432 static tree
 433 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
 434                          tree border,
 435                          enum tree_code guard_code, tree guard_init)
 436 {
 437   /* The niter structure contains the after-increment IV, we need
 438      the loop-enter base, so subtract STEP once.  */
 439   tree controlbase = force_gimple_operand (niter->control.base,
 440                                            stmts, true, NULL_TREE);
 441   tree controlstep = niter->control.step;
 442   tree enddiff;
 443   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
 444     {
 445       controlstep = gimple_build (stmts, NEGATE_EXPR,
 446                                   TREE_TYPE (controlstep), controlstep);
 447       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 448                               TREE_TYPE (controlbase),
 449                               controlbase, controlstep);
 450     }
 451   else
 452     enddiff = gimple_build (stmts, MINUS_EXPR,
 453                             TREE_TYPE (controlbase),
 454                             controlbase, controlstep);
 455
 456   /* Compute end-beg.  */
 457   gimple_seq stmts2;
 458   tree end = force_gimple_operand (niter->bound, &stmts2,
 459                                         true, NULL_TREE);
 460   gimple_seq_add_seq_without_update (stmts, stmts2);
 461   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
 462     {
 463       tree tem = gimple_convert (stmts, sizetype, enddiff);
 464       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
 465       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 466                               TREE_TYPE (enddiff),
 467                               end, tem);
 468     }
 469   else
 470     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
 471                             end, enddiff);
 472
 473   /* Compute guard_init + (end-beg).  */
 474   tree newbound;
 475   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
 476   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
 477     {
 478       enddiff = gimple_convert (stmts, sizetype, enddiff);
 479       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
 480                                TREE_TYPE (guard_init),
 481                                guard_init, enddiff);
 482     }
 483   else
 484     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
 485                              guard_init, enddiff);
 486
 487   /* Depending on the direction of the IVs the new bound for the first
 488      loop is the minimum or maximum of old bound and border.
 489      Also, if the guard condition isn't strictly less or greater,
 490      we need to adjust the bound.  */
 491   int addbound = 0;
 492   enum tree_code minmax;
 493   if (niter->cmp == LT_EXPR)
 494     {
 495       /* GT and LE are the same, inverted.  */
 496       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
 497         addbound = -1;
 498       minmax = MIN_EXPR;
 499     }
 500   else
 501     {
 502       gcc_assert (niter->cmp == GT_EXPR);
 503       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
 504         addbound = 1;
 505       minmax = MAX_EXPR;
 506     }
 507
 508   if (addbound)
 509     {
 510       tree type2 = TREE_TYPE (newbound);
 511       if (POINTER_TYPE_P (type2))
 512         type2 = sizetype;
 513       newbound = gimple_build (stmts,
 514                                POINTER_TYPE_P (TREE_TYPE (newbound))
 515                                ? POINTER_PLUS_EXPR : PLUS_EXPR,
 516                                TREE_TYPE (newbound),
 517                                newbound,
 518                                build_int_cst (type2, addbound));
 519     }
 520
 521   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
 522                               border, newbound);
 523   return newend;
 524 }
 525
 526 /* Fix the two loop's bb count after split based on the split edge probability,
 527    don't adjust the bbs dominated by true branches of that loop to avoid
 528    dropping 1s down.  */
 529 static void
 530 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
 531                          edge false_edge)
 532 {
 533   /* Proportion first loop's bb counts except those dominated by true
 534      branch to avoid drop 1s down.  */
 535   basic_block *bbs1, *bbs2;
 536   bbs1 = get_loop_body (loop1);
 537   unsigned j;
 538   for (j = 0; j < loop1->num_nodes; j++)
 539     if (bbs1[j] == loop1->latch
 540         /* Watch for case where the true conditional is empty.  */
 541         || !single_pred_p (true_edge->dest)
 542         || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
 543       bbs1[j]->count
 544         = bbs1[j]->count.apply_probability (true_edge->probability);
 545   free (bbs1);
 546
 547   /* Proportion second loop's bb counts except those dominated by false
 548      branch to avoid drop 1s down.  */
 549   basic_block bbi_copy = get_bb_copy (false_edge->dest);
 550   bbs2 = get_loop_body (loop2);
 551   for (j = 0; j < loop2->num_nodes; j++)
 552     if (bbs2[j] == loop2->latch
 553         /* Watch for case where the flase conditional is empty.  */
 554         || !single_pred_p (bbi_copy)
 555         || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
 556       bbs2[j]->count
 557         = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
 558   free (bbs2);
 559 }
 560
 561 /* Checks if LOOP contains an conditional block whose condition
 562    depends on which side in the iteration space it is, and if so
 563    splits the iteration space into two loops.  Returns true if the
 564    loop was split.  NITER must contain the iteration descriptor for the
 565    single exit of LOOP.  */
 566
 567 static bool
 568 split_loop (class loop *loop1)
 569 {
 570   class tree_niter_desc niter;
 571   basic_block *bbs;
 572   unsigned i;
 573   bool changed = false;
 574   tree guard_iv;
 575   tree border = NULL_TREE;
 576   affine_iv iv;
 577   edge exit1;
 578
 579   if (!(exit1 = single_exit (loop1))
 580       || EDGE_COUNT (exit1->src->succs) != 2
 581       /* ??? We could handle non-empty latches when we split the latch edge
 582          (not the exit edge), and put the new exit condition in the new block.
 583          OTOH this executes some code unconditionally that might have been
 584          skipped by the original exit before.  */
 585       || !empty_block_p (loop1->latch)
 586       || !easy_exit_values (loop1)
 587       || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
 588       || niter.cmp == ERROR_MARK)
 589     return false;
 590   if (niter.cmp == NE_EXPR)
 591     {
 592       if (!niter.control.no_overflow)
 593         return false;
 594       if (tree_int_cst_sign_bit (niter.control.step))
 595         niter.cmp = GT_EXPR;
 596       else
 597         niter.cmp = LT_EXPR;
 598     }
 599
 600   bbs = get_loop_body (loop1);
 601
 602   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
 603     {
 604       free (bbs);
 605       return false;
 606     }
 607
 608   /* Find a splitting opportunity.  */
 609   enum tree_code guard_code;
 610   for (i = 0; i < loop1->num_nodes; i++)
 611     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
 612       {
 613         /* Handling opposite steps is not implemented yet.  Neither
 614            is handling different step sizes.  */
 615         if ((tree_int_cst_sign_bit (iv.step)
 616              != tree_int_cst_sign_bit (niter.control.step))
 617             || !tree_int_cst_equal (iv.step, niter.control.step))
 618           continue;
 619
 620         /* Find a loop PHI node that defines guard_iv directly,
 621            or create one doing that.  */
 622         gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
 623         if (!phi)
 624           continue;
 625         gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 626         tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
 627                                                  loop_preheader_edge (loop1));
 628
 629         /* Loop splitting is implemented by versioning the loop, placing
 630            the new loop after the old loop, make the first loop iterate
 631            as long as the conditional stays true (or false) and let the
 632            second (new) loop handle the rest of the iterations.
 633
 634            First we need to determine if the condition will start being true
 635            or false in the first loop.  */
 636         bool initial_true;
 637         switch (guard_code)
 638           {
 639             case LT_EXPR:
 640             case LE_EXPR:
 641               initial_true = !tree_int_cst_sign_bit (iv.step);
 642               break;
 643             case GT_EXPR:
 644             case GE_EXPR:
 645               initial_true = tree_int_cst_sign_bit (iv.step);
 646               break;
 647             default:
 648               gcc_unreachable ();
 649           }
 650
 651         /* Build a condition that will skip the first loop when the
 652            guard condition won't ever be true (or false).  */
 653         gimple_seq stmts2;
 654         border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
 655         if (stmts2)
 656           {
 657             /* When the split condition is not always evaluated make sure
 658                to rewrite it to defined overflow.  */
 659             if (!dominated_by_p (CDI_DOMINATORS, exit1->src, bbs[i]))
 660               {
 661                 gimple_stmt_iterator gsi;
 662                 gsi = gsi_start (stmts2);
 663                 while (!gsi_end_p (gsi))
 664                   {
 665                     gimple *stmt = gsi_stmt (gsi);
 666                     if (is_gimple_assign (stmt)
 667                         && arith_code_with_undefined_signed_overflow
 668                                                 (gimple_assign_rhs_code (stmt)))
 669                       rewrite_to_defined_overflow (&gsi);
 670                     gsi_next (&gsi);
 671                   }
 672               }
 673             gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 674                                               stmts2);
 675           }
 676         tree cond = fold_build2 (guard_code, boolean_type_node,
 677                                  guard_init, border);
 678         if (!initial_true)
 679           cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
 680
 681         edge true_edge, false_edge;
 682         extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 683
 684         /* Now version the loop, placing loop2 after loop1 connecting
 685            them, and fix up SSA form for that.  */
 686         initialize_original_copy_tables ();
 687         basic_block cond_bb;
 688
 689         profile_probability loop1_prob
 690           = integer_onep (cond) ? profile_probability::always ()
 691                                 : true_edge->probability;
 692         /* TODO: It is commonly a case that we know that both loops will be
 693            entered.  very_likely below is the probability that second loop will
 694            be entered given by connect_loops.  We should work out the common
 695            case it is always true.  */
 696         class loop *loop2 = loop_version (loop1, cond, &cond_bb,
 697                                           loop1_prob,
 698                                           /* Pass always as we will later
 699                                              redirect first loop to second
 700                                              loop.  */
 701                                           profile_probability::always (),
 702                                           profile_probability::always (),
 703                                           profile_probability::very_likely (),
 704                                           true);
 705         gcc_assert (loop2);
 706         /* Correct probability of edge  cond_bb->preheader_of_loop2.  */
 707         single_pred_edge
 708                 (loop_preheader_edge (loop2)->src)->probability
 709                         = loop1_prob.invert ();
 710
 711         fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
 712         /* If conditional we split on has reliable profilea nd both
 713            preconditionals of loop1 and loop2 are constant true, we can
 714            only redistribute the iteration counts to the split loops.
 715
 716            If the conditionals we insert before loop1 or loop2 are non-trivial
 717            they increase expected loop count, so account this accordingly.
 718            If we do not know the probability of split conditional, avoid
 719            reudcing loop estimates, since we do not really know how they are
 720            split between of the two new loops.  Keep orignal estimate since
 721            it is likely better then completely dropping it.
 722
 723            TODO: If we know that one of the new loops has constant
 724            number of iterations, we can do better.  We could also update
 725            upper bounds.  */
 726         if (loop1->any_estimate
 727             && wi::fits_shwi_p (loop1->nb_iterations_estimate))
 728           {
 729             sreal scale = true_edge->probability.reliable_p ()
 730                           ? true_edge->probability.to_sreal () : (sreal)1;
 731             sreal scale2 = false_edge->probability.reliable_p ()
 732                           ? false_edge->probability.to_sreal () : (sreal)1;
 733             sreal div1 = loop1_prob.initialized_p ()
 734                          ? loop1_prob.to_sreal () : (sreal)1/(sreal)2;
 735             /* +1 to get header interations rather than latch iterations and then
 736                -1 to convert back.  */
 737             if (div1 != 0)
 738               loop1->nb_iterations_estimate
 739                 = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
 740                        * scale / div1).to_nearest_int () - 1, 0);
 741             else
 742               loop1->any_estimate = false;
 743             loop2->nb_iterations_estimate
 744               = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
 745                      / profile_probability::very_likely ().to_sreal ())
 746                      .to_nearest_int () - 1, 0);
 747           }
 748         update_loop_exit_probability_scale_dom_bbs (loop1);
 749         update_loop_exit_probability_scale_dom_bbs (loop2);
 750
 751         edge new_e = connect_loops (loop1, loop2);
 752         connect_loop_phis (loop1, loop2, new_e);
 753
 754         /* The iterations of the second loop is now already
 755            exactly those that the first loop didn't do, but the
 756            iteration space of the first loop is still the original one.
 757            Compute the new bound for the guarding IV and patch the
 758            loop exit to use it instead of original IV and bound.  */
 759         gimple_seq stmts = NULL;
 760         tree newend = compute_new_first_bound (&stmts, &niter, border,
 761                                                guard_code, guard_init);
 762         if (stmts)
 763           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 764                                             stmts);
 765         tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 766         patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
 767
 768         /* Finally patch out the two copies of the condition to be always
 769            true/false (or opposite).  */
 770         gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 771         gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
 772         if (!initial_true)
 773           std::swap (force_true, force_false);
 774         gimple_cond_make_true (force_true);
 775         gimple_cond_make_false (force_false);
 776         update_stmt (force_true);
 777         update_stmt (force_false);
 778
 779         free_original_copy_tables ();
 780
 781         changed = true;
 782         if (dump_file && (dump_flags & TDF_DETAILS))
 783           fprintf (dump_file, ";; Loop split.\n");
 784
 785         if (dump_enabled_p ())
 786           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
 787
 788         /* Only deal with the first opportunity.  */
 789         break;
 790       }
 791
 792   free (bbs);
 793   return changed;
 794 }
 795
 796 /* Another transformation of loops like:
 797
 798    for (i = INIT (); CHECK (i); i = NEXT ())
 799      {
 800        if (expr (a_1, a_2, ..., a_n))  // expr is pure
 801          a_j = ...;  // change at least one a_j
 802        else
 803          S;          // not change any a_j
 804      }
 805
 806    into:
 807
 808    for (i = INIT (); CHECK (i); i = NEXT ())
 809      {
 810        if (expr (a_1, a_2, ..., a_n))
 811          a_j = ...;
 812        else
 813          {
 814            S;
 815            i = NEXT ();
 816            break;
 817          }
 818      }
 819
 820    for (; CHECK (i); i = NEXT ())
 821      {
 822        S;
 823      }
 824
 825    */
 826
 827 /* Data structure to hold temporary information during loop split upon
 828    semi-invariant conditional statement.  */
 829 class split_info {
 830 public:
 831   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
 832   basic_block *bbs;
 833
 834   /* All memory store/clobber statements in a loop.  */
 835   auto_vec<gimple *> memory_stores;
 836
 837   /* Whether above memory stores vector has been filled.  */
 838   int need_init;
 839
 840   /* Control dependencies of basic blocks in a loop.  */
 841   auto_vec<hash_set<basic_block> *> control_deps;
 842
 843   split_info () : bbs (NULL),  need_init (true) { }
 844
 845   ~split_info ()
 846     {
 847       if (bbs)
 848         free (bbs);
 849
 850       for (unsigned i = 0; i < control_deps.length (); i++)
 851         delete control_deps[i];
 852     }
 853 };
 854
 855 /* Find all statements with memory-write effect in LOOP, including memory
 856    store and non-pure function call, and keep those in a vector.  This work
 857    is only done one time, for the vector should be constant during analysis
 858    stage of semi-invariant condition.  */
 859
 860 static void
 861 find_vdef_in_loop (struct loop *loop)
 862 {
 863   split_info *info = (split_info *) loop->aux;
 864   gphi *vphi = get_virtual_phi (loop->header);
 865
 866   /* Indicate memory store vector has been filled.  */
 867   info->need_init = false;
 868
 869   /* If loop contains memory operation, there must be a virtual PHI node in
 870      loop header basic block.  */
 871   if (vphi == NULL)
 872     return;
 873
 874   /* All virtual SSA names inside the loop are connected to be a cyclic
 875      graph via virtual PHI nodes.  The virtual PHI node in loop header just
 876      links the first and the last virtual SSA names, by using the last as
 877      PHI operand to define the first.  */
 878   const edge latch = loop_latch_edge (loop);
 879   const tree first = gimple_phi_result (vphi);
 880   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
 881
 882   /* The virtual SSA cyclic graph might consist of only one SSA name, who
 883      is defined by itself.
 884
 885        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
 886
 887      This means the loop contains only memory loads, so we can skip it.  */
 888   if (first == last)
 889     return;
 890
 891   auto_vec<gimple *> other_stores;
 892   auto_vec<tree> worklist;
 893   auto_bitmap visited;
 894
 895   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
 896   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
 897   worklist.safe_push (last);
 898
 899   do
 900     {
 901       tree vuse = worklist.pop ();
 902       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
 903
 904       /* We mark the first and last SSA names as visited at the beginning,
 905          and reversely start the process from the last SSA name towards the
 906          first, which ensures that this do-while will not touch SSA names
 907          defined outside the loop.  */
 908       gcc_assert (gimple_bb (stmt)
 909                   && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
 910
 911       if (gimple_code (stmt) == GIMPLE_PHI)
 912         {
 913           gphi *phi = as_a <gphi *> (stmt);
 914
 915           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
 916             {
 917               tree arg = gimple_phi_arg_def (stmt, i);
 918
 919               if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
 920                 worklist.safe_push (arg);
 921             }
 922         }
 923       else
 924         {
 925           tree prev = gimple_vuse (stmt);
 926
 927           /* Non-pure call statement is conservatively assumed to impact all
 928              memory locations.  So place call statements ahead of other memory
 929              stores in the vector with an idea of using them as shortcut
 930              terminators to memory alias analysis.  */
 931           if (gimple_code (stmt) == GIMPLE_CALL)
 932             info->memory_stores.safe_push (stmt);
 933           else
 934             other_stores.safe_push (stmt);
 935
 936           if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
 937             worklist.safe_push (prev);
 938         }
 939     } while (!worklist.is_empty ());
 940
 941     info->memory_stores.safe_splice (other_stores);
 942 }
 943
 944 /* Two basic blocks have equivalent control dependency if one dominates to
 945    the other, and it is post-dominated by the latter.  Given a basic block
 946    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
 947    is a constraint that BB does not post-dominate loop header of LOOP, this
 948    means BB is control-dependent on at least one basic block in LOOP.  */
 949
 950 static basic_block
 951 get_control_equiv_head_block (struct loop *loop, basic_block bb)
 952 {
 953   while (!bb->aux)
 954     {
 955       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
 956
 957       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
 958
 959       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
 960         break;
 961
 962       bb = dom_bb;
 963     }
 964   return bb;
 965 }
 966
 967 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
 968    dependent on.  */
 969
 970 static hash_set<basic_block> *
 971 find_control_dep_blocks (struct loop *loop, basic_block bb)
 972 {
 973   /* BB has same control dependency as loop header, then it is not control-
 974      dependent on any basic block in LOOP.  */
 975   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
 976     return NULL;
 977
 978   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
 979
 980   if (equiv_head->aux)
 981     {
 982       /* There is a basic block containing control dependency equivalent
 983          to BB.  No need to recompute that, and also set this information
 984          to other equivalent basic blocks.  */
 985       for (; bb != equiv_head;
 986            bb = get_immediate_dominator (CDI_DOMINATORS, bb))
 987         bb->aux = equiv_head->aux;
 988       return (hash_set<basic_block> *) equiv_head->aux;
 989     }
 990
 991   /* A basic block X is control-dependent on another Y iff there exists
 992      a path from X to Y, in which every basic block other than X and Y
 993      is post-dominated by Y, but X is not post-dominated by Y.
 994
 995      According to this rule, traverse basic blocks in the loop backwards
 996      starting from BB, if a basic block is post-dominated by BB, extend
 997      current post-dominating path to this block, otherwise it is another
 998      one that BB is control-dependent on.  */
 999
1000   auto_vec<basic_block> pdom_worklist;
1001   hash_set<basic_block> pdom_visited;
1002   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
1003
1004   pdom_worklist.safe_push (equiv_head);
1005
1006   do
1007     {
1008       basic_block pdom_bb = pdom_worklist.pop ();
1009       edge_iterator ei;
1010       edge e;
1011
1012       if (pdom_visited.add (pdom_bb))
1013         continue;
1014
1015       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
1016         {
1017           basic_block pred_bb = e->src;
1018
1019           if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1020             {
1021               dep_bbs->add (pred_bb);
1022               continue;
1023             }
1024
1025           pred_bb = get_control_equiv_head_block (loop, pred_bb);
1026
1027           if (pdom_visited.contains (pred_bb))
1028             continue;
1029
1030           if (!pred_bb->aux)
1031             {
1032               pdom_worklist.safe_push (pred_bb);
1033               continue;
1034             }
1035
1036           /* If control dependency of basic block is available, fast extend
1037              post-dominating path using the information instead of advancing
1038              forward step-by-step.  */
1039           hash_set<basic_block> *pred_dep_bbs
1040                         = (hash_set<basic_block> *) pred_bb->aux;
1041
1042           for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1043                iter != pred_dep_bbs->end (); ++iter)
1044             {
1045               basic_block pred_dep_bb = *iter;
1046
1047               /* Basic blocks can either be in control dependency of BB, or
1048                  must be post-dominated by BB, if so, extend the path from
1049                  these basic blocks.  */
1050               if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1051                 dep_bbs->add (pred_dep_bb);
1052               else if (!pdom_visited.contains (pred_dep_bb))
1053                 pdom_worklist.safe_push (pred_dep_bb);
1054             }
1055         }
1056     } while (!pdom_worklist.is_empty ());
1057
1058   /* Record computed control dependencies in loop so that we can reach them
1059      when reclaiming resources.  */
1060   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1061
1062   /* Associate control dependence with related equivalent basic blocks.  */
1063   for (equiv_head->aux = dep_bbs; bb != equiv_head;
1064        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1065     bb->aux = dep_bbs;
1066
1067   return dep_bbs;
1068 }
1069
1070 /* Forward declaration */
1071
1072 static bool
1073 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1074                          const_basic_block skip_head,
1075                          hash_map<gimple *, bool> &stmt_stat);
1076
1077 /* Given STMT, memory load or pure call statement, check whether it is impacted
1078    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1079    trace is composed of SKIP_HEAD and those basic block dominated by it, always
1080    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
1081    NULL, all basic blocks of LOOP are checked.  */
1082
1083 static bool
1084 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1085                        const_basic_block skip_head)
1086 {
1087   split_info *info = (split_info *) loop->aux;
1088   tree rhs = NULL_TREE;
1089   ao_ref ref;
1090   gimple *store;
1091   unsigned i;
1092
1093   /* Collect memory store/clobber statements if haven't done that.  */
1094   if (info->need_init)
1095     find_vdef_in_loop (loop);
1096
1097   if (is_gimple_assign (stmt))
1098     rhs = gimple_assign_rhs1 (stmt);
1099
1100   ao_ref_init (&ref, rhs);
1101
1102   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1103     {
1104       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
1105       if (skip_head
1106           && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1107         continue;
1108
1109       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1110         return false;
1111     }
1112
1113   return true;
1114 }
1115
1116 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1117    certain iteration of LOOP, check whether an SSA name (NAME) remains
1118    unchanged in next iteration.  We call this characteristic semi-
1119    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1120    blocks and control flows in the loop will be considered.  Semi-invariant
1121    state of checked statement is cached in hash map STMT_STAT to avoid
1122    redundant computation in possible following re-check.  */
1123
1124 static inline bool
1125 ssa_semi_invariant_p (struct loop *loop, tree name,
1126                       const_basic_block skip_head,
1127                       hash_map<gimple *, bool> &stmt_stat)
1128 {
1129   gimple *def = SSA_NAME_DEF_STMT (name);
1130   const_basic_block def_bb = gimple_bb (def);
1131
1132   /* An SSA name defined outside loop is definitely semi-invariant.  */
1133   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1134     return true;
1135
1136   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1137     return false;
1138
1139   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1140 }
1141
1142 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1143    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1144    are excluded from LOOP.  */
1145
1146 static bool
1147 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1148                                 const_basic_block skip_head)
1149 {
1150   const_edge latch = loop_latch_edge (loop);
1151   tree name = gimple_phi_result (loop_phi);
1152   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1153
1154   gcc_checking_assert (from);
1155
1156   /* Loop iteration PHI node locates in loop header, and it has two source
1157      operands, one is an initial value coming from outside the loop, the other
1158      is a value through latch of the loop, which is derived in last iteration,
1159      we call the latter latch value.  From the PHI node to definition of latch
1160      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1161      assignment or likewise, there is no other kind of value redefinition, SSA
1162      name defined by the PHI node is semi-invariant.
1163
1164                          loop entry
1165                               |     .--- latch ---.
1166                               |     |             |
1167                               v     v             |
1168                   x_1 = PHI <x_0,  x_3>           |
1169                            |                      |
1170                            v                      |
1171               .------- if (cond) -------.         |
1172               |                         |         |
1173               |                     [ SKIP ]      |
1174               |                         |         |
1175               |                     x_2 = ...     |
1176               |                         |         |
1177               '---- T ---->.<---- F ----'         |
1178                            |                      |
1179                            v                      |
1180                   x_3 = PHI <x_1, x_2>            |
1181                            |                      |
1182                            '----------------------'
1183
1184      Suppose in certain iteration, execution flow in above graph goes through
1185      true branch, which means that one source value to define x_3 in false
1186      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1187      iterations is defined by x_3, we know that x_1 will never changed if COND
1188      always chooses true branch from then on.  */
1189
1190   while (from != name)
1191     {
1192       /* A new value comes from a CONSTANT.  */
1193       if (TREE_CODE (from) != SSA_NAME)
1194         return false;
1195
1196       gimple *stmt = SSA_NAME_DEF_STMT (from);
1197       const_basic_block bb = gimple_bb (stmt);
1198
1199       /* A new value comes from outside the loop.  */
1200       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1201         return false;
1202
1203       from = NULL_TREE;
1204
1205       if (gimple_code (stmt) == GIMPLE_PHI)
1206         {
1207           gphi *phi = as_a <gphi *> (stmt);
1208
1209           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1210             {
1211               if (skip_head)
1212                 {
1213                   const_edge e = gimple_phi_arg_edge (phi, i);
1214
1215                   /* Don't consider redefinitions in excluded basic blocks.  */
1216                   if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1217                     continue;
1218                 }
1219
1220               tree arg = gimple_phi_arg_def (phi, i);
1221
1222               if (!from)
1223                 from = arg;
1224               else if (!operand_equal_p (from, arg, 0))
1225                 /* There are more than one source operands that provide
1226                    different values to the SSA name, it is variant.  */
1227                 return false;
1228             }
1229         }
1230       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1231         {
1232           /* For simple value copy, check its rhs instead.  */
1233           if (gimple_assign_ssa_name_copy_p (stmt))
1234             from = gimple_assign_rhs1 (stmt);
1235         }
1236
1237       /* Any other kind of definition is deemed to introduce a new value
1238          to the SSA name.  */
1239       if (!from)
1240         return false;
1241     }
1242   return true;
1243 }
1244
1245 /* Check whether conditional predicates that BB is control-dependent on, are
1246    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1247    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1248    in hash map STMT_STAT.  */
1249
1250 static bool
1251 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1252                               const_basic_block skip_head,
1253                               hash_map<gimple *, bool> &stmt_stat)
1254 {
1255   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1256
1257   if (!dep_bbs)
1258     return true;
1259
1260   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1261        iter != dep_bbs->end (); ++iter)
1262     {
1263       gimple *last = *gsi_last_bb (*iter);
1264       if (!last)
1265         return false;
1266
1267       /* Only check condition predicates.  */
1268       if (gimple_code (last) != GIMPLE_COND
1269           && gimple_code (last) != GIMPLE_SWITCH)
1270         return false;
1271
1272       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1273         return false;
1274     }
1275
1276   return true;
1277 }
1278
1279 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1280    semi-invariant, consequently, all its defined values are semi-invariant.
1281    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1282    Semi-invariant state of checked statement is cached in hash map
1283    STMT_STAT.  */
1284
1285 static bool
1286 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1287                          const_basic_block skip_head,
1288                          hash_map<gimple *, bool> &stmt_stat)
1289 {
1290   bool existed;
1291   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1292
1293   if (existed)
1294     return invar;
1295
1296   /* A statement might depend on itself, which is treated as variant.  So set
1297      state of statement under check to be variant to ensure that.  */
1298   invar = false;
1299
1300   if (gimple_code (stmt) == GIMPLE_PHI)
1301     {
1302       gphi *phi = as_a <gphi *> (stmt);
1303
1304       if (gimple_bb (stmt) == loop->header)
1305         {
1306           /* If the entry value is subject to abnormal coalescing
1307              avoid the transform since we're going to duplicate the
1308              loop header and thus likely introduce overlapping life-ranges
1309              between the PHI def and the entry on the path when the
1310              first loop is skipped.  */
1311           tree entry_def
1312             = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1313           if (TREE_CODE (entry_def) == SSA_NAME
1314               && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1315             return false;
1316           invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1317           return invar;
1318         }
1319
1320       /* For a loop PHI node that does not locate in loop header, it is semi-
1321          invariant only if two conditions are met.  The first is its source
1322          values are derived from CONSTANT (including loop-invariant value), or
1323          from SSA name defined by semi-invariant loop iteration PHI node.  The
1324          second is its source incoming edges are control-dependent on semi-
1325          invariant conditional predicates.  */
1326       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1327         {
1328           const_edge e = gimple_phi_arg_edge (phi, i);
1329           tree arg = gimple_phi_arg_def (phi, i);
1330
1331           if (TREE_CODE (arg) == SSA_NAME)
1332             {
1333               if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1334                 return false;
1335
1336               /* If source value is defined in location from where the source
1337                  edge comes in, no need to check control dependency again
1338                  since this has been done in above SSA name check stage.  */
1339               if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1340                 continue;
1341             }
1342
1343           if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1344                                              stmt_stat))
1345             return false;
1346         }
1347     }
1348   else
1349     {
1350       ssa_op_iter iter;
1351       tree use;
1352
1353       /* Volatile memory load or return of normal (non-const/non-pure) call
1354          should not be treated as constant in each iteration of loop.  */
1355       if (gimple_has_side_effects (stmt))
1356         return false;
1357
1358       /* Check if any memory store may kill memory load at this place.  */
1359       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1360         return false;
1361
1362       /* Although operand of a statement might be SSA name, CONSTANT or
1363          VARDECL, here we only need to check SSA name operands.  This is
1364          because check on VARDECL operands, which involve memory loads,
1365          must have been done prior to invocation of this function in
1366          vuse_semi_invariant_p.  */
1367       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1368         if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1369           return false;
1370     }
1371
1372   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1373                                      stmt_stat))
1374     return false;
1375
1376   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1377      to new insertion, and thus invar may point to invalid memory.  */
1378   stmt_stat.put (stmt, true);
1379   return true;
1380 }
1381
1382 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1383    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1384
1385 static bool
1386 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1387                        const_basic_block skip_head)
1388 {
1389   hash_map<gimple *, bool> stmt_stat;
1390   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1391 }
1392
1393 /* Determine when conditional statement never transfers execution to one of its
1394    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1395    and those basic blocks dominated by BRANCH_BB.  */
1396
1397 static bool
1398 branch_removable_p (basic_block branch_bb)
1399 {
1400   edge_iterator ei;
1401   edge e;
1402
1403   if (single_pred_p (branch_bb))
1404     return true;
1405
1406   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1407     {
1408       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1409         continue;
1410
1411       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1412         continue;
1413
1414        /* The branch can be reached from opposite branch, or from some
1415           statement not dominated by the conditional statement.  */
1416       return false;
1417     }
1418
1419   return true;
1420 }
1421
1422 /* Find out which branch of a conditional statement (COND) is invariant in the
1423    execution context of LOOP.  That is: once the branch is selected in certain
1424    iteration of the loop, any operand that contributes to computation of the
1425    conditional statement remains unchanged in all following iterations.  */
1426
1427 static edge
1428 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1429 {
1430   basic_block cond_bb = gimple_bb (cond);
1431   basic_block targ_bb[2];
1432   bool invar[2];
1433   unsigned invar_checks = 0;
1434
1435   for (unsigned i = 0; i < 2; i++)
1436     {
1437       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1438
1439       /* One branch directs to loop exit, no need to perform loop split upon
1440          this conditional statement.  Firstly, it is trivial if the exit branch
1441          is semi-invariant, for the statement is just to break loop.  Secondly,
1442          if the opposite branch is semi-invariant, it means that the statement
1443          is real loop-invariant, which is covered by loop unswitch.  */
1444       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1445         return NULL;
1446     }
1447
1448   for (unsigned i = 0; i < 2; i++)
1449     {
1450       invar[!i] = false;
1451
1452       if (!branch_removable_p (targ_bb[i]))
1453         continue;
1454
1455       /* Given a semi-invariant branch, if its opposite branch dominates
1456          loop latch, it and its following trace will only be executed in
1457          final iteration of loop, namely it is not part of repeated body
1458          of the loop.  Similar to the above case that the branch is loop
1459          exit, no need to split loop.  */
1460       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1461         continue;
1462
1463       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1464       invar_checks++;
1465     }
1466
1467   /* With both branches being invariant (handled by loop unswitch) or
1468      variant is not what we want.  */
1469   if (invar[0] ^ !invar[1])
1470     return NULL;
1471
1472   /* Found a real loop-invariant condition, do nothing.  */
1473   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1474     return NULL;
1475
1476   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1477 }
1478
1479 /* Calculate increased code size measured by estimated insn number if applying
1480    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1481
1482 static int
1483 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1484 {
1485   basic_block cond_bb = branch_edge->src;
1486   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1487   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1488   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1489   int num = 0;
1490
1491   for (unsigned i = 0; i < loop->num_nodes; i++)
1492     {
1493       /* Do no count basic blocks only in opposite branch.  */
1494       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1495         continue;
1496
1497       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1498     }
1499
1500   /* It is unnecessary to evaluate expression of the conditional statement
1501      in new loop that contains only invariant branch.  This expression should
1502      be constant value (either true or false).  Exclude code size of insns
1503      that contribute to computation of the expression.  */
1504
1505   auto_vec<gimple *> worklist;
1506   hash_set<gimple *> removed;
1507   gimple *stmt = last_nondebug_stmt (cond_bb);
1508
1509   worklist.safe_push (stmt);
1510   removed.add (stmt);
1511   num -= estimate_num_insns (stmt, &eni_size_weights);
1512
1513   do
1514     {
1515       ssa_op_iter opnd_iter;
1516       use_operand_p opnd_p;
1517
1518       stmt = worklist.pop ();
1519       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1520         {
1521           tree opnd = USE_FROM_PTR (opnd_p);
1522
1523           if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1524             continue;
1525
1526           gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1527           use_operand_p use_p;
1528           imm_use_iterator use_iter;
1529
1530           if (removed.contains (opnd_stmt)
1531               || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1532             continue;
1533
1534           FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1535             {
1536               gimple *use_stmt = USE_STMT (use_p);
1537
1538               if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1539                 {
1540                   opnd_stmt = NULL;
1541                   break;
1542                 }
1543             }
1544
1545           if (opnd_stmt)
1546             {
1547               worklist.safe_push (opnd_stmt);
1548               removed.add (opnd_stmt);
1549               num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1550             }
1551         }
1552     } while (!worklist.is_empty ());
1553
1554   gcc_assert (num >= 0);
1555   return num;
1556 }
1557
1558 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1559    and check whether it is eligible and profitable to perform loop split upon
1560    this branch in LOOP.  */
1561
1562 static edge
1563 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1564 {
1565   edge invar_branch = get_cond_invariant_branch (loop, cond);
1566   if (!invar_branch)
1567     return NULL;
1568
1569   /* When accurate profile information is available, and execution
1570      frequency of the branch is too low, just let it go.  */
1571   profile_probability prob = invar_branch->probability;
1572   if (prob.reliable_p ())
1573     {
1574       int thres = param_min_loop_cond_split_prob;
1575
1576       if (prob < profile_probability::always ().apply_scale (thres, 100))
1577         return NULL;
1578     }
1579
1580   /* Add a threshold for increased code size to disable loop split.  */
1581   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1582     return NULL;
1583
1584   return invar_branch;
1585 }
1586
1587 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1588    conditional statement, perform loop split transformation illustrated
1589    as the following graph.
1590
1591                .-------T------ if (true) ------F------.
1592                |                    .---------------. |
1593                |                    |               | |
1594                v                    |               v v
1595           pre-header                |            pre-header
1596                | .------------.     |                 | .------------.
1597                | |            |     |                 | |            |
1598                | v            |     |                 | v            |
1599              header           |     |               header           |
1600                |              |     |                 |              |
1601       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1602       |                 |     |     |        |                 |     |
1603   invariant             |     |     |    invariant             |     |
1604       |                 |     |     |        |                 |     |
1605       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1606                |              |    /                  |              |
1607              stmts            |   /                 stmts            |
1608                |              F  T                    |              |
1609               / \             | /                    / \             |
1610      .-------*   *      [ if (cond) ]       .-------*   *            |
1611      |           |            |             |           |            |
1612      |         latch          |             |         latch          |
1613      |           |            |             |           |            |
1614      |           '------------'             |           '------------'
1615      '------------------------. .-----------'
1616              loop1            | |                   loop2
1617                               v v
1618                              exits
1619
1620    In the graph, loop1 represents the part derived from original one, and
1621    loop2 is duplicated using loop_version (), which corresponds to the part
1622    of original one being splitted out.  In original latch edge of loop1, we
1623    insert a new conditional statement duplicated from the semi-invariant cond,
1624    and one of its branch goes back to loop1 header as a latch edge, and the
1625    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1626    we abandon the variant branch of the conditional statement by setting a
1627    constant bool condition, based on which branch is semi-invariant.  */
1628
1629 static bool
1630 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1631 {
1632   basic_block cond_bb = invar_branch->src;
1633   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1634   gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1635
1636   gcc_assert (cond_bb->loop_father == loop1);
1637
1638   if (dump_enabled_p ())
1639     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1640                      "loop split on semi-invariant condition at %s branch\n",
1641                      true_invar ? "true" : "false");
1642
1643   initialize_original_copy_tables ();
1644
1645   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1646                                      invar_branch->probability.invert (),
1647                                      invar_branch->probability,
1648                                      profile_probability::always (),
1649                                      profile_probability::always (),
1650                                      true);
1651   if (!loop2)
1652     {
1653       free_original_copy_tables ();
1654       return false;
1655     }
1656
1657   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1658   gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1659
1660   /* Replace the condition in loop2 with a bool constant to let PassManager
1661      remove the variant branch after current pass completes.  */
1662   if (true_invar)
1663     gimple_cond_make_true (cond_copy);
1664   else
1665     gimple_cond_make_false (cond_copy);
1666
1667   update_stmt (cond_copy);
1668
1669   /* Insert a new conditional statement on latch edge of loop1, its condition
1670      is duplicated from the semi-invariant.  This statement acts as a switch
1671      to transfer execution from loop1 to loop2, when loop1 enters into
1672      invariant state.  */
1673   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1674   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1675   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1676                                           gimple_cond_lhs (cond),
1677                                           gimple_cond_rhs (cond),
1678                                           NULL_TREE, NULL_TREE);
1679
1680   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1681   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1682
1683   edge to_loop1 = single_succ_edge (break_bb);
1684   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1685
1686   to_loop1->flags &= ~EDGE_FALLTHRU;
1687   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1688   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1689
1690   /* Due to introduction of a control flow edge from loop1 latch to loop2
1691      pre-header, we should update PHIs in loop2 to reflect this connection
1692      between loop1 and loop2.  */
1693   connect_loop_phis (loop1, loop2, to_loop2);
1694
1695   edge true_edge, false_edge, skip_edge1, skip_edge2;
1696   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1697
1698   skip_edge1 = true_invar ? false_edge : true_edge;
1699   skip_edge2 = true_invar ? true_edge : false_edge;
1700   fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1701
1702   /* Fix first loop's exit probability after scaling.  */
1703   to_loop1->probability = invar_branch->probability.invert ();
1704   to_loop2->probability = invar_branch->probability;
1705
1706   free_original_copy_tables ();
1707
1708   return true;
1709 }
1710
1711 /* Traverse all conditional statements in LOOP, to find out a good candidate
1712    upon which we can do loop split.  */
1713
1714 static bool
1715 split_loop_on_cond (struct loop *loop)
1716 {
1717   split_info *info = new split_info ();
1718   basic_block *bbs = info->bbs = get_loop_body (loop);
1719   bool do_split = false;
1720
1721   /* Allocate an area to keep temporary info, and associate its address
1722      with loop aux field.  */
1723   loop->aux = info;
1724
1725   for (unsigned i = 0; i < loop->num_nodes; i++)
1726     bbs[i]->aux = NULL;
1727
1728   for (unsigned i = 0; i < loop->num_nodes; i++)
1729     {
1730       basic_block bb = bbs[i];
1731
1732       /* We only consider conditional statement, which be executed at most once
1733          in each iteration of the loop.  So skip statements in inner loops.  */
1734       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1735         continue;
1736
1737       /* Actually this check is not a must constraint.  With it, we can ensure
1738          conditional statement will always be executed in each iteration.  */
1739       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1740         continue;
1741
1742       gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1743       if (!cond)
1744         continue;
1745
1746       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1747
1748       if (branch_edge)
1749         {
1750           do_split_loop_on_cond (loop, branch_edge);
1751           do_split = true;
1752           break;
1753         }
1754     }
1755
1756   delete info;
1757   loop->aux = NULL;
1758
1759   return do_split;
1760 }
1761
1762 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1763
1764 static unsigned int
1765 tree_ssa_split_loops (void)
1766 {
1767   bool changed = false;
1768
1769   gcc_assert (scev_initialized_p ());
1770
1771   calculate_dominance_info (CDI_POST_DOMINATORS);
1772
1773   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1774     loop->aux = NULL;
1775
1776   /* Go through all loops starting from innermost.  */
1777   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1778     {
1779       if (loop->aux)
1780         {
1781           /* If any of our inner loops was split, don't split us,
1782              and mark our containing loop as having had splits as well.
1783              This allows for delaying SSA update.  */
1784           loop_outer (loop)->aux = loop;
1785           continue;
1786         }
1787
1788       if (optimize_loop_for_size_p (loop))
1789         continue;
1790
1791       if (split_loop (loop) || split_loop_on_cond (loop))
1792         {
1793           /* Mark our containing loop as having had some split inner loops.  */
1794           loop_outer (loop)->aux = loop;
1795           changed = true;
1796         }
1797     }
1798
1799   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1800     loop->aux = NULL;
1801
1802   clear_aux_for_blocks ();
1803
1804   free_dominance_info (CDI_POST_DOMINATORS);
1805
1806   if (changed)
1807     {
1808       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1809       return TODO_cleanup_cfg;
1810     }
1811   return 0;
1812 }
1813
1814 /* Loop splitting pass.  */
1815
1816 namespace {
1817
1818 const pass_data pass_data_loop_split =
1819 {
1820   GIMPLE_PASS, /* type */
1821   "lsplit", /* name */
1822   OPTGROUP_LOOP, /* optinfo_flags */
1823   TV_LOOP_SPLIT, /* tv_id */
1824   PROP_cfg, /* properties_required */
1825   0, /* properties_provided */
1826   0, /* properties_destroyed */
1827   0, /* todo_flags_start */
1828   0, /* todo_flags_finish */
1829 };
1830
1831 class pass_loop_split : public gimple_opt_pass
1832 {
1833 public:
1834   pass_loop_split (gcc::context *ctxt)
1835     : gimple_opt_pass (pass_data_loop_split, ctxt)
1836   {}
1837
1838   /* opt_pass methods: */
1839   bool gate (function *) final override { return flag_split_loops != 0; }
1840   unsigned int execute (function *) final override;
1841
1842 }; // class pass_loop_split
1843
1844 unsigned int
1845 pass_loop_split::execute (function *fun)
1846 {
1847   if (number_of_loops (fun) <= 1)
1848     return 0;
1849
1850   return tree_ssa_split_loops ();
1851 }
1852
1853 } // anon namespace
1854
1855 gimple_opt_pass *
1856 make_pass_loop_split (gcc::context *ctxt)
1857 {
1858   return new pass_loop_split (ctxt);
1859 }