gcc/tree-ssa-loop-split.cc

   1 /* Loop splitting.
   2    Copyright (C) 2015-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "tree-pass.h"
  27 #include "ssa.h"
  28 #include "fold-const.h"
  29 #include "tree-cfg.h"
  30 #include "tree-ssa.h"
  31 #include "tree-ssa-loop-niter.h"
  32 #include "tree-ssa-loop.h"
  33 #include "tree-ssa-loop-manip.h"
  34 #include "tree-into-ssa.h"
  35 #include "tree-inline.h"
  36 #include "tree-cfgcleanup.h"
  37 #include "cfgloop.h"
  38 #include "tree-scalar-evolution.h"
  39 #include "gimple-iterator.h"
  40 #include "gimple-pretty-print.h"
  41 #include "cfghooks.h"
  42 #include "gimple-fold.h"
  43 #include "gimplify-me.h"
  44 #include "print-tree.h"
  45 #include "value-query.h"
  46 #include "sreal.h"
  47
  48 /* This file implements two kinds of loop splitting.
  49
  50    One transformation of loops like:
  51
  52    for (i = 0; i < 100; i++)
  53      {
  54        if (i < 50)
  55          A;
  56        else
  57          B;
  58      }
  59
  60    into:
  61
  62    for (i = 0; i < 50; i++)
  63      {
  64        A;
  65      }
  66    for (; i < 100; i++)
  67      {
  68        B;
  69      }
  70
  71    */
  72
  73 /* Return true when BB inside LOOP is a potential iteration space
  74    split point, i.e. ends with a condition like "IV < comp", which
  75    is true on one side of the iteration space and false on the other,
  76    and the split point can be computed.  If so, also return the border
  77    point in *BORDER and the comparison induction variable in IV.  */
  78
  79 static tree
  80 split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
  81                enum tree_code *guard_code)
  82 {
  83   gcond *stmt;
  84   affine_iv iv2;
  85
  86   /* BB must end in a simple conditional jump.  */
  87   stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
  88   if (!stmt)
  89     return NULL_TREE;
  90
  91   enum tree_code code = gimple_cond_code (stmt);
  92
  93   if (loop_exits_from_bb_p (loop, bb))
  94     return NULL_TREE;
  95
  96   tree op0 = gimple_cond_lhs (stmt);
  97   tree op1 = gimple_cond_rhs (stmt);
  98   class loop *useloop = loop_containing_stmt (stmt);
  99
 100   if (!simple_iv (loop, useloop, op0, iv, false))
 101     return NULL_TREE;
 102   if (!simple_iv (loop, useloop, op1, &iv2, false))
 103     return NULL_TREE;
 104
 105   /* Make it so that the first argument of the condition is
 106      the looping one.  */
 107   if (!integer_zerop (iv2.step))
 108     {
 109       std::swap (op0, op1);
 110       std::swap (*iv, iv2);
 111       code = swap_tree_comparison (code);
 112       gimple_cond_set_condition (stmt, code, op0, op1);
 113       update_stmt (stmt);
 114     }
 115   else if (integer_zerop (iv->step))
 116     return NULL_TREE;
 117   if (!integer_zerop (iv2.step))
 118     return NULL_TREE;
 119   if (!iv->no_overflow)
 120     return NULL_TREE;
 121
 122   /* Only handle relational comparisons, for equality and non-equality
 123      we'd have to split the loop into two loops and a middle statement.  */
 124   switch (code)
 125     {
 126       case LT_EXPR:
 127       case LE_EXPR:
 128       case GT_EXPR:
 129       case GE_EXPR:
 130         break;
 131       case NE_EXPR:
 132       case EQ_EXPR:
 133         /* If the test check for first iteration, we can handle NE/EQ
 134            with only one split loop.  */
 135         if (operand_equal_p (iv->base, iv2.base, 0))
 136           {
 137             if (code == EQ_EXPR)
 138               code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
 139             else
 140               code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
 141             break;
 142           }
 143         /* Similarly when the test checks for minimal or maximal
 144            value range.  */
 145         else
 146           {
 147             int_range<2> r;
 148             get_global_range_query ()->range_of_expr (r, op0, stmt);
 149             if (!r.varying_p () && !r.undefined_p ()
 150                 && TREE_CODE (op1) == INTEGER_CST)
 151               {
 152                 wide_int val = wi::to_wide (op1);
 153                 if (known_eq (val, r.lower_bound ()))
 154                   {
 155                     code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
 156                     break;
 157                   }
 158                 else if (known_eq (val, r.upper_bound ()))
 159                   {
 160                     code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
 161                     break;
 162                   }
 163               }
 164           }
 165         /* TODO: We can compare with exit condition; it seems that testing for
 166            last iteration is common case.  */
 167         return NULL_TREE;
 168       default:
 169         return NULL_TREE;
 170     }
 171
 172   if (dump_file && (dump_flags & TDF_DETAILS))
 173     {
 174       fprintf (dump_file, "Found potential split point: ");
 175       print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
 176       fprintf (dump_file, " { ");
 177       print_generic_expr (dump_file, iv->base, TDF_SLIM);
 178       fprintf (dump_file, " + I*");
 179       print_generic_expr (dump_file, iv->step, TDF_SLIM);
 180       fprintf (dump_file, " } %s ", get_tree_code_name (code));
 181       print_generic_expr (dump_file, iv2.base, TDF_SLIM);
 182       fprintf (dump_file, "\n");
 183     }
 184
 185   *border = iv2.base;
 186   *guard_code = code;
 187   return op0;
 188 }
 189
 190 /* Given a GUARD conditional stmt inside LOOP, which we want to make always
 191    true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
 192    (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
 193    exit test statement to loop back only if the GUARD statement will
 194    also be true/false in the next iteration.  */
 195
 196 static void
 197 patch_loop_exit (class loop *loop, tree_code guard_code, tree nextval,
 198                  tree newbound, bool initial_true)
 199 {
 200   edge exit = single_exit (loop);
 201   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 202   gimple_cond_set_condition (stmt, guard_code, nextval, newbound);
 203   update_stmt (stmt);
 204
 205   edge stay = EDGE_SUCC (exit->src, EDGE_SUCC (exit->src, 0) == exit);
 206
 207   exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 208   stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
 209
 210   if (initial_true)
 211     {
 212       exit->flags |= EDGE_FALSE_VALUE;
 213       stay->flags |= EDGE_TRUE_VALUE;
 214     }
 215   else
 216     {
 217       exit->flags |= EDGE_TRUE_VALUE;
 218       stay->flags |= EDGE_FALSE_VALUE;
 219     }
 220 }
 221
 222 /* Give an induction variable GUARD_IV, and its affine descriptor IV,
 223    find the loop phi node in LOOP defining it directly, or create
 224    such phi node.  Return that phi node.  */
 225
 226 static gphi *
 227 find_or_create_guard_phi (class loop *loop, tree guard_iv, affine_iv * /*iv*/)
 228 {
 229   gimple *def = SSA_NAME_DEF_STMT (guard_iv);
 230   gphi *phi;
 231   if ((phi = dyn_cast <gphi *> (def))
 232       && gimple_bb (phi) == loop->header)
 233     return phi;
 234
 235   /* XXX Create the PHI instead.  */
 236   return NULL;
 237 }
 238
 239 /* Returns true if the exit values of all loop phi nodes can be
 240    determined easily (i.e. that connect_loop_phis can determine them).  */
 241
 242 static bool
 243 easy_exit_values (class loop *loop)
 244 {
 245   edge exit = single_exit (loop);
 246   edge latch = loop_latch_edge (loop);
 247   gphi_iterator psi;
 248
 249   /* Currently we regard the exit values as easy if they are the same
 250      as the value over the backedge.  Which is the case if the definition
 251      of the backedge value dominates the exit edge.  */
 252   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 253     {
 254       gphi *phi = psi.phi ();
 255       tree next = PHI_ARG_DEF_FROM_EDGE (phi, latch);
 256       basic_block bb;
 257       if (TREE_CODE (next) == SSA_NAME
 258           && (bb = gimple_bb (SSA_NAME_DEF_STMT (next)))
 259           && !dominated_by_p (CDI_DOMINATORS, exit->src, bb))
 260         return false;
 261     }
 262
 263   return true;
 264 }
 265
 266 /* This function updates the SSA form after connect_loops made a new
 267    edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
 268    conditional).  I.e. the second loop can now be entered either
 269    via the original entry or via NEW_E, so the entry values of LOOP2
 270    phi nodes are either the original ones or those at the exit
 271    of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
 272    this.  The loops need to fulfill easy_exit_values().  */
 273
 274 static void
 275 connect_loop_phis (class loop *loop1, class loop *loop2, edge new_e)
 276 {
 277   basic_block rest = loop_preheader_edge (loop2)->src;
 278   gcc_assert (new_e->dest == rest);
 279   edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
 280
 281   edge firste = loop_preheader_edge (loop1);
 282   edge seconde = loop_preheader_edge (loop2);
 283   edge firstn = loop_latch_edge (loop1);
 284   gphi_iterator psi_first, psi_second;
 285   for (psi_first = gsi_start_phis (loop1->header),
 286        psi_second = gsi_start_phis (loop2->header);
 287        !gsi_end_p (psi_first);
 288        gsi_next (&psi_first), gsi_next (&psi_second))
 289     {
 290       tree init, next, new_init;
 291       use_operand_p op;
 292       gphi *phi_first = psi_first.phi ();
 293       gphi *phi_second = psi_second.phi ();
 294
 295       init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
 296       next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
 297       op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
 298       gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
 299
 300       /* Prefer using original variable as a base for the new ssa name.
 301          This is necessary for virtual ops, and useful in order to avoid
 302          losing debug info for real ops.  */
 303       if (TREE_CODE (next) == SSA_NAME
 304           && useless_type_conversion_p (TREE_TYPE (next),
 305                                         TREE_TYPE (init)))
 306         new_init = copy_ssa_name (next);
 307       else if (TREE_CODE (init) == SSA_NAME
 308                && useless_type_conversion_p (TREE_TYPE (init),
 309                                              TREE_TYPE (next)))
 310         new_init = copy_ssa_name (init);
 311       else if (useless_type_conversion_p (TREE_TYPE (next),
 312                                           TREE_TYPE (init)))
 313         new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
 314                                        "unrinittmp");
 315       else
 316         new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
 317                                        "unrinittmp");
 318
 319       gphi * newphi = create_phi_node (new_init, rest);
 320       add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
 321       add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
 322       SET_USE (op, new_init);
 323     }
 324 }
 325
 326 /* The two loops LOOP1 and LOOP2 were just created by loop versioning,
 327    they are still equivalent and placed in two arms of a diamond, like so:
 328
 329                .------if (cond)------.
 330                v                     v
 331              pre1                   pre2
 332               |                      |
 333         .--->h1                     h2<----.
 334         |     |                      |     |
 335         |    ex1---.            .---ex2    |
 336         |    /     |            |     \    |
 337         '---l1     X            |     l2---'
 338                    |            |
 339                    |            |
 340                    '--->join<---'
 341
 342    This function transforms the program such that LOOP1 is conditionally
 343    falling through to LOOP2, or skipping it.  This is done by splitting
 344    the ex1->join edge at X in the diagram above, and inserting a condition
 345    whose one arm goes to pre2, resulting in this situation:
 346
 347                .------if (cond)------.
 348                v                     v
 349              pre1       .---------->pre2
 350               |         |            |
 351         .--->h1         |           h2<----.
 352         |     |         |            |     |
 353         |    ex1---.    |       .---ex2    |
 354         |    /     v    |       |     \    |
 355         '---l1   skip---'       |     l2---'
 356                    |            |
 357                    |            |
 358                    '--->join<---'
 359
 360
 361    The condition used is the exit condition of LOOP1, which effectively means
 362    that when the first loop exits (for whatever reason) but the real original
 363    exit expression is still false the second loop will be entered.
 364    The function returns the new edge cond->pre2.
 365
 366    This doesn't update the SSA form, see connect_loop_phis for that.  */
 367
 368 static edge
 369 connect_loops (class loop *loop1, class loop *loop2)
 370 {
 371   edge exit = single_exit (loop1);
 372   basic_block skip_bb = split_edge (exit);
 373   gcond *skip_stmt;
 374   gimple_stmt_iterator gsi;
 375   edge new_e, skip_e;
 376
 377   gcond *stmt = as_a <gcond *> (*gsi_last_bb (exit->src));
 378   skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
 379                                  gimple_cond_lhs (stmt),
 380                                  gimple_cond_rhs (stmt),
 381                                  NULL_TREE, NULL_TREE);
 382   gsi = gsi_last_bb (skip_bb);
 383   gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
 384
 385   skip_e = EDGE_SUCC (skip_bb, 0);
 386   skip_e->flags &= ~EDGE_FALLTHRU;
 387   new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
 388   if (exit->flags & EDGE_TRUE_VALUE)
 389     {
 390       skip_e->flags |= EDGE_TRUE_VALUE;
 391       new_e->flags |= EDGE_FALSE_VALUE;
 392     }
 393   else
 394     {
 395       skip_e->flags |= EDGE_FALSE_VALUE;
 396       new_e->flags |= EDGE_TRUE_VALUE;
 397     }
 398
 399   new_e->probability = profile_probability::very_likely ();
 400   skip_e->probability = new_e->probability.invert ();
 401
 402   return new_e;
 403 }
 404
 405 /* This returns the new bound for iterations given the original iteration
 406    space in NITER, an arbitrary new bound BORDER, assumed to be some
 407    comparison value with a different IV, the initial value GUARD_INIT of
 408    that other IV, and the comparison code GUARD_CODE that compares
 409    that other IV with BORDER.  We return an SSA name, and place any
 410    necessary statements for that computation into *STMTS.
 411
 412    For example for such a loop:
 413
 414      for (i = beg, j = guard_init; i < end; i++, j++)
 415        if (j < border)  // this is supposed to be true/false
 416          ...
 417
 418    we want to return a new bound (on j) that makes the loop iterate
 419    as long as the condition j < border stays true.  We also don't want
 420    to iterate more often than the original loop, so we have to introduce
 421    some cut-off as well (via min/max), effectively resulting in:
 422
 423      newend = min (end+guard_init-beg, border)
 424      for (i = beg; j = guard_init; j < newend; i++, j++)
 425        if (j < c)
 426          ...
 427
 428    Depending on the direction of the IVs and if the exit tests
 429    are strict or non-strict we need to use MIN or MAX,
 430    and add or subtract 1.  This routine computes newend above.  */
 431
 432 static tree
 433 compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter,
 434                          tree border,
 435                          enum tree_code guard_code, tree guard_init)
 436 {
 437   /* The niter structure contains the after-increment IV, we need
 438      the loop-enter base, so subtract STEP once.  */
 439   tree controlbase = force_gimple_operand (niter->control.base,
 440                                            stmts, true, NULL_TREE);
 441   tree controlstep = niter->control.step;
 442   tree enddiff;
 443   if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
 444     {
 445       controlstep = gimple_build (stmts, NEGATE_EXPR,
 446                                   TREE_TYPE (controlstep), controlstep);
 447       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 448                               TREE_TYPE (controlbase),
 449                               controlbase, controlstep);
 450     }
 451   else
 452     enddiff = gimple_build (stmts, MINUS_EXPR,
 453                             TREE_TYPE (controlbase),
 454                             controlbase, controlstep);
 455
 456   /* Compute end-beg.  */
 457   gimple_seq stmts2;
 458   tree end = force_gimple_operand (niter->bound, &stmts2,
 459                                         true, NULL_TREE);
 460   gimple_seq_add_seq_without_update (stmts, stmts2);
 461   if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
 462     {
 463       tree tem = gimple_convert (stmts, sizetype, enddiff);
 464       tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
 465       enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
 466                               TREE_TYPE (enddiff),
 467                               end, tem);
 468     }
 469   else
 470     enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
 471                             end, enddiff);
 472
 473   /* Compute guard_init + (end-beg).  */
 474   tree newbound;
 475   enddiff = gimple_convert (stmts, TREE_TYPE (guard_init), enddiff);
 476   if (POINTER_TYPE_P (TREE_TYPE (guard_init)))
 477     {
 478       enddiff = gimple_convert (stmts, sizetype, enddiff);
 479       newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
 480                                TREE_TYPE (guard_init),
 481                                guard_init, enddiff);
 482     }
 483   else
 484     newbound = gimple_build (stmts, PLUS_EXPR, TREE_TYPE (guard_init),
 485                              guard_init, enddiff);
 486
 487   /* Depending on the direction of the IVs the new bound for the first
 488      loop is the minimum or maximum of old bound and border.
 489      Also, if the guard condition isn't strictly less or greater,
 490      we need to adjust the bound.  */
 491   int addbound = 0;
 492   enum tree_code minmax;
 493   if (niter->cmp == LT_EXPR)
 494     {
 495       /* GT and LE are the same, inverted.  */
 496       if (guard_code == GT_EXPR || guard_code == LE_EXPR)
 497         addbound = -1;
 498       minmax = MIN_EXPR;
 499     }
 500   else
 501     {
 502       gcc_assert (niter->cmp == GT_EXPR);
 503       if (guard_code == GE_EXPR || guard_code == LT_EXPR)
 504         addbound = 1;
 505       minmax = MAX_EXPR;
 506     }
 507
 508   if (addbound)
 509     {
 510       tree type2 = TREE_TYPE (newbound);
 511       if (POINTER_TYPE_P (type2))
 512         type2 = sizetype;
 513       newbound = gimple_build (stmts,
 514                                POINTER_TYPE_P (TREE_TYPE (newbound))
 515                                ? POINTER_PLUS_EXPR : PLUS_EXPR,
 516                                TREE_TYPE (newbound),
 517                                newbound,
 518                                build_int_cst (type2, addbound));
 519     }
 520
 521   tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
 522                               border, newbound);
 523   return newend;
 524 }
 525
 526 /* Fix the two loop's bb count after split based on the split edge probability,
 527    don't adjust the bbs dominated by true branches of that loop to avoid
 528    dropping 1s down.  */
 529 static void
 530 fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge true_edge,
 531                          edge false_edge)
 532 {
 533   /* Proportion first loop's bb counts except those dominated by true
 534      branch to avoid drop 1s down.  */
 535   basic_block *bbs1, *bbs2;
 536   bbs1 = get_loop_body (loop1);
 537   unsigned j;
 538   for (j = 0; j < loop1->num_nodes; j++)
 539     if (bbs1[j] == loop1->latch
 540         /* Watch for case where the true conditional is empty.  */
 541         || !single_pred_p (true_edge->dest)
 542         || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
 543       bbs1[j]->count
 544         = bbs1[j]->count.apply_probability (true_edge->probability);
 545   free (bbs1);
 546
 547   /* Proportion second loop's bb counts except those dominated by false
 548      branch to avoid drop 1s down.  */
 549   basic_block bbi_copy = get_bb_copy (false_edge->dest);
 550   bbs2 = get_loop_body (loop2);
 551   for (j = 0; j < loop2->num_nodes; j++)
 552     if (bbs2[j] == loop2->latch
 553         /* Watch for case where the flase conditional is empty.  */
 554         || !single_pred_p (bbi_copy)
 555         || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
 556       bbs2[j]->count
 557         = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
 558   free (bbs2);
 559 }
 560
 561 /* Checks if LOOP contains an conditional block whose condition
 562    depends on which side in the iteration space it is, and if so
 563    splits the iteration space into two loops.  Returns true if the
 564    loop was split.  NITER must contain the iteration descriptor for the
 565    single exit of LOOP.  */
 566
 567 static bool
 568 split_loop (class loop *loop1)
 569 {
 570   class tree_niter_desc niter;
 571   basic_block *bbs;
 572   unsigned i;
 573   bool changed = false;
 574   tree guard_iv;
 575   tree border = NULL_TREE;
 576   affine_iv iv;
 577   edge exit1;
 578
 579   if (!(exit1 = single_exit (loop1))
 580       || EDGE_COUNT (exit1->src->succs) != 2
 581       /* ??? We could handle non-empty latches when we split the latch edge
 582          (not the exit edge), and put the new exit condition in the new block.
 583          OTOH this executes some code unconditionally that might have been
 584          skipped by the original exit before.  */
 585       || !empty_block_p (loop1->latch)
 586       || !easy_exit_values (loop1)
 587       || !number_of_iterations_exit (loop1, exit1, &niter, false, true)
 588       || niter.cmp == ERROR_MARK)
 589     return false;
 590   if (niter.cmp == NE_EXPR)
 591     {
 592       if (!niter.control.no_overflow)
 593         return false;
 594       if (tree_int_cst_sign_bit (niter.control.step))
 595         niter.cmp = GT_EXPR;
 596       else
 597         niter.cmp = LT_EXPR;
 598     }
 599
 600   bbs = get_loop_body (loop1);
 601
 602   if (!can_copy_bbs_p (bbs, loop1->num_nodes))
 603     {
 604       free (bbs);
 605       return false;
 606     }
 607
 608   /* Find a splitting opportunity.  */
 609   enum tree_code guard_code;
 610   for (i = 0; i < loop1->num_nodes; i++)
 611     if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
 612       {
 613         /* Handling opposite steps is not implemented yet.  Neither
 614            is handling different step sizes.  */
 615         if ((tree_int_cst_sign_bit (iv.step)
 616              != tree_int_cst_sign_bit (niter.control.step))
 617             || !tree_int_cst_equal (iv.step, niter.control.step))
 618           continue;
 619
 620         /* Find a loop PHI node that defines guard_iv directly,
 621            or create one doing that.  */
 622         gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
 623         if (!phi)
 624           continue;
 625         gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 626         tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
 627                                                  loop_preheader_edge (loop1));
 628
 629         /* Loop splitting is implemented by versioning the loop, placing
 630            the new loop after the old loop, make the first loop iterate
 631            as long as the conditional stays true (or false) and let the
 632            second (new) loop handle the rest of the iterations.
 633
 634            First we need to determine if the condition will start being true
 635            or false in the first loop.  */
 636         bool initial_true;
 637         switch (guard_code)
 638           {
 639             case LT_EXPR:
 640             case LE_EXPR:
 641               initial_true = !tree_int_cst_sign_bit (iv.step);
 642               break;
 643             case GT_EXPR:
 644             case GE_EXPR:
 645               initial_true = tree_int_cst_sign_bit (iv.step);
 646               break;
 647             default:
 648               gcc_unreachable ();
 649           }
 650
 651         /* Build a condition that will skip the first loop when the
 652            guard condition won't ever be true (or false).  */
 653         gimple_seq stmts2;
 654         border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
 655         if (stmts2)
 656           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 657                                             stmts2);
 658         tree cond = fold_build2 (guard_code, boolean_type_node,
 659                                  guard_init, border);
 660         if (!initial_true)
 661           cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
 662
 663         edge true_edge, false_edge;
 664         extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 665
 666         /* Now version the loop, placing loop2 after loop1 connecting
 667            them, and fix up SSA form for that.  */
 668         initialize_original_copy_tables ();
 669         basic_block cond_bb;
 670
 671         profile_probability loop1_prob
 672           = integer_onep (cond) ? profile_probability::always ()
 673                                 : true_edge->probability;
 674         /* TODO: It is commonly a case that we know that both loops will be
 675            entered.  very_likely below is the probability that second loop will
 676            be entered given by connect_loops.  We should work out the common
 677            case it is always true.  */
 678         class loop *loop2 = loop_version (loop1, cond, &cond_bb,
 679                                           loop1_prob,
 680                                           /* Pass always as we will later
 681                                              redirect first loop to second
 682                                              loop.  */
 683                                           profile_probability::always (),
 684                                           profile_probability::always (),
 685                                           profile_probability::very_likely (),
 686                                           true);
 687         gcc_assert (loop2);
 688         /* Correct probability of edge  cond_bb->preheader_of_loop2.  */
 689         single_pred_edge
 690                 (loop_preheader_edge (loop2)->src)->probability
 691                         = loop1_prob.invert ();
 692
 693         fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
 694         /* If conditional we split on has reliable profilea nd both
 695            preconditionals of loop1 and loop2 are constant true, we can
 696            only redistribute the iteration counts to the split loops.
 697
 698            If the conditionals we insert before loop1 or loop2 are non-trivial
 699            they increase expected loop count, so account this accordingly.
 700            If we do not know the probability of split conditional, avoid
 701            reudcing loop estimates, since we do not really know how they are
 702            split between of the two new loops.  Keep orignal estimate since
 703            it is likely better then completely dropping it.
 704
 705            TODO: If we know that one of the new loops has constant
 706            number of iterations, we can do better.  We could also update
 707            upper bounds.  */
 708         if (loop1->any_estimate
 709             && wi::fits_shwi_p (loop1->nb_iterations_estimate))
 710           {
 711             sreal scale = true_edge->probability.reliable_p ()
 712                           ? true_edge->probability.to_sreal () : (sreal)1;
 713             sreal scale2 = false_edge->probability.reliable_p ()
 714                           ? false_edge->probability.to_sreal () : (sreal)1;
 715             sreal div1 = loop1_prob.to_sreal ();
 716             /* +1 to get header interations rather than latch iterations and then
 717                -1 to convert back.  */
 718             if (div1 != 0)
 719               loop1->nb_iterations_estimate
 720                 = MAX ((((sreal)loop1->nb_iterations_estimate.to_shwi () + 1)
 721                        * scale / div1).to_nearest_int () - 1, 0);
 722             else
 723               loop1->any_estimate = false;
 724             loop2->nb_iterations_estimate
 725               = MAX ((((sreal)loop2->nb_iterations_estimate.to_shwi () + 1) * scale2
 726                      / profile_probability::very_likely ().to_sreal ())
 727                      .to_nearest_int () - 1, 0);
 728           }
 729         update_loop_exit_probability_scale_dom_bbs (loop1);
 730         update_loop_exit_probability_scale_dom_bbs (loop2);
 731
 732         edge new_e = connect_loops (loop1, loop2);
 733         connect_loop_phis (loop1, loop2, new_e);
 734
 735         /* The iterations of the second loop is now already
 736            exactly those that the first loop didn't do, but the
 737            iteration space of the first loop is still the original one.
 738            Compute the new bound for the guarding IV and patch the
 739            loop exit to use it instead of original IV and bound.  */
 740         gimple_seq stmts = NULL;
 741         tree newend = compute_new_first_bound (&stmts, &niter, border,
 742                                                guard_code, guard_init);
 743         if (stmts)
 744           gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
 745                                             stmts);
 746         tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 747         patch_loop_exit (loop1, guard_code, guard_next, newend, initial_true);
 748
 749         /* Finally patch out the two copies of the condition to be always
 750            true/false (or opposite).  */
 751         gcond *force_true = as_a<gcond *> (*gsi_last_bb (bbs[i]));
 752         gcond *force_false = as_a<gcond *> (*gsi_last_bb (get_bb_copy (bbs[i])));
 753         if (!initial_true)
 754           std::swap (force_true, force_false);
 755         gimple_cond_make_true (force_true);
 756         gimple_cond_make_false (force_false);
 757         update_stmt (force_true);
 758         update_stmt (force_false);
 759
 760         free_original_copy_tables ();
 761
 762         changed = true;
 763         if (dump_file && (dump_flags & TDF_DETAILS))
 764           fprintf (dump_file, ";; Loop split.\n");
 765
 766         if (dump_enabled_p ())
 767           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, guard_stmt, "loop split\n");
 768
 769         /* Only deal with the first opportunity.  */
 770         break;
 771       }
 772
 773   free (bbs);
 774   return changed;
 775 }
 776
 777 /* Another transformation of loops like:
 778
 779    for (i = INIT (); CHECK (i); i = NEXT ())
 780      {
 781        if (expr (a_1, a_2, ..., a_n))  // expr is pure
 782          a_j = ...;  // change at least one a_j
 783        else
 784          S;          // not change any a_j
 785      }
 786
 787    into:
 788
 789    for (i = INIT (); CHECK (i); i = NEXT ())
 790      {
 791        if (expr (a_1, a_2, ..., a_n))
 792          a_j = ...;
 793        else
 794          {
 795            S;
 796            i = NEXT ();
 797            break;
 798          }
 799      }
 800
 801    for (; CHECK (i); i = NEXT ())
 802      {
 803        S;
 804      }
 805
 806    */
 807
 808 /* Data structure to hold temporary information during loop split upon
 809    semi-invariant conditional statement.  */
 810 class split_info {
 811 public:
 812   /* Array of all basic blocks in a loop, returned by get_loop_body().  */
 813   basic_block *bbs;
 814
 815   /* All memory store/clobber statements in a loop.  */
 816   auto_vec<gimple *> memory_stores;
 817
 818   /* Whether above memory stores vector has been filled.  */
 819   int need_init;
 820
 821   /* Control dependencies of basic blocks in a loop.  */
 822   auto_vec<hash_set<basic_block> *> control_deps;
 823
 824   split_info () : bbs (NULL),  need_init (true) { }
 825
 826   ~split_info ()
 827     {
 828       if (bbs)
 829         free (bbs);
 830
 831       for (unsigned i = 0; i < control_deps.length (); i++)
 832         delete control_deps[i];
 833     }
 834 };
 835
 836 /* Find all statements with memory-write effect in LOOP, including memory
 837    store and non-pure function call, and keep those in a vector.  This work
 838    is only done one time, for the vector should be constant during analysis
 839    stage of semi-invariant condition.  */
 840
 841 static void
 842 find_vdef_in_loop (struct loop *loop)
 843 {
 844   split_info *info = (split_info *) loop->aux;
 845   gphi *vphi = get_virtual_phi (loop->header);
 846
 847   /* Indicate memory store vector has been filled.  */
 848   info->need_init = false;
 849
 850   /* If loop contains memory operation, there must be a virtual PHI node in
 851      loop header basic block.  */
 852   if (vphi == NULL)
 853     return;
 854
 855   /* All virtual SSA names inside the loop are connected to be a cyclic
 856      graph via virtual PHI nodes.  The virtual PHI node in loop header just
 857      links the first and the last virtual SSA names, by using the last as
 858      PHI operand to define the first.  */
 859   const edge latch = loop_latch_edge (loop);
 860   const tree first = gimple_phi_result (vphi);
 861   const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch);
 862
 863   /* The virtual SSA cyclic graph might consist of only one SSA name, who
 864      is defined by itself.
 865
 866        .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)>
 867
 868      This means the loop contains only memory loads, so we can skip it.  */
 869   if (first == last)
 870     return;
 871
 872   auto_vec<gimple *> other_stores;
 873   auto_vec<tree> worklist;
 874   auto_bitmap visited;
 875
 876   bitmap_set_bit (visited, SSA_NAME_VERSION (first));
 877   bitmap_set_bit (visited, SSA_NAME_VERSION (last));
 878   worklist.safe_push (last);
 879
 880   do
 881     {
 882       tree vuse = worklist.pop ();
 883       gimple *stmt = SSA_NAME_DEF_STMT (vuse);
 884
 885       /* We mark the first and last SSA names as visited at the beginning,
 886          and reversely start the process from the last SSA name towards the
 887          first, which ensures that this do-while will not touch SSA names
 888          defined outside the loop.  */
 889       gcc_assert (gimple_bb (stmt)
 890                   && flow_bb_inside_loop_p (loop, gimple_bb (stmt)));
 891
 892       if (gimple_code (stmt) == GIMPLE_PHI)
 893         {
 894           gphi *phi = as_a <gphi *> (stmt);
 895
 896           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
 897             {
 898               tree arg = gimple_phi_arg_def (stmt, i);
 899
 900               if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg)))
 901                 worklist.safe_push (arg);
 902             }
 903         }
 904       else
 905         {
 906           tree prev = gimple_vuse (stmt);
 907
 908           /* Non-pure call statement is conservatively assumed to impact all
 909              memory locations.  So place call statements ahead of other memory
 910              stores in the vector with an idea of using them as shortcut
 911              terminators to memory alias analysis.  */
 912           if (gimple_code (stmt) == GIMPLE_CALL)
 913             info->memory_stores.safe_push (stmt);
 914           else
 915             other_stores.safe_push (stmt);
 916
 917           if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev)))
 918             worklist.safe_push (prev);
 919         }
 920     } while (!worklist.is_empty ());
 921
 922     info->memory_stores.safe_splice (other_stores);
 923 }
 924
 925 /* Two basic blocks have equivalent control dependency if one dominates to
 926    the other, and it is post-dominated by the latter.  Given a basic block
 927    BB in LOOP, find farest equivalent dominating basic block.  For BB, there
 928    is a constraint that BB does not post-dominate loop header of LOOP, this
 929    means BB is control-dependent on at least one basic block in LOOP.  */
 930
 931 static basic_block
 932 get_control_equiv_head_block (struct loop *loop, basic_block bb)
 933 {
 934   while (!bb->aux)
 935     {
 936       basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
 937
 938       gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb));
 939
 940       if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb))
 941         break;
 942
 943       bb = dom_bb;
 944     }
 945   return bb;
 946 }
 947
 948 /* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control-
 949    dependent on.  */
 950
 951 static hash_set<basic_block> *
 952 find_control_dep_blocks (struct loop *loop, basic_block bb)
 953 {
 954   /* BB has same control dependency as loop header, then it is not control-
 955      dependent on any basic block in LOOP.  */
 956   if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb))
 957     return NULL;
 958
 959   basic_block equiv_head = get_control_equiv_head_block (loop, bb);
 960
 961   if (equiv_head->aux)
 962     {
 963       /* There is a basic block containing control dependency equivalent
 964          to BB.  No need to recompute that, and also set this information
 965          to other equivalent basic blocks.  */
 966       for (; bb != equiv_head;
 967            bb = get_immediate_dominator (CDI_DOMINATORS, bb))
 968         bb->aux = equiv_head->aux;
 969       return (hash_set<basic_block> *) equiv_head->aux;
 970     }
 971
 972   /* A basic block X is control-dependent on another Y iff there exists
 973      a path from X to Y, in which every basic block other than X and Y
 974      is post-dominated by Y, but X is not post-dominated by Y.
 975
 976      According to this rule, traverse basic blocks in the loop backwards
 977      starting from BB, if a basic block is post-dominated by BB, extend
 978      current post-dominating path to this block, otherwise it is another
 979      one that BB is control-dependent on.  */
 980
 981   auto_vec<basic_block> pdom_worklist;
 982   hash_set<basic_block> pdom_visited;
 983   hash_set<basic_block> *dep_bbs = new hash_set<basic_block>;
 984
 985   pdom_worklist.safe_push (equiv_head);
 986
 987   do
 988     {
 989       basic_block pdom_bb = pdom_worklist.pop ();
 990       edge_iterator ei;
 991       edge e;
 992
 993       if (pdom_visited.add (pdom_bb))
 994         continue;
 995
 996       FOR_EACH_EDGE (e, ei, pdom_bb->preds)
 997         {
 998           basic_block pred_bb = e->src;
 999
1000           if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb))
1001             {
1002               dep_bbs->add (pred_bb);
1003               continue;
1004             }
1005
1006           pred_bb = get_control_equiv_head_block (loop, pred_bb);
1007
1008           if (pdom_visited.contains (pred_bb))
1009             continue;
1010
1011           if (!pred_bb->aux)
1012             {
1013               pdom_worklist.safe_push (pred_bb);
1014               continue;
1015             }
1016
1017           /* If control dependency of basic block is available, fast extend
1018              post-dominating path using the information instead of advancing
1019              forward step-by-step.  */
1020           hash_set<basic_block> *pred_dep_bbs
1021                         = (hash_set<basic_block> *) pred_bb->aux;
1022
1023           for (hash_set<basic_block>::iterator iter = pred_dep_bbs->begin ();
1024                iter != pred_dep_bbs->end (); ++iter)
1025             {
1026               basic_block pred_dep_bb = *iter;
1027
1028               /* Basic blocks can either be in control dependency of BB, or
1029                  must be post-dominated by BB, if so, extend the path from
1030                  these basic blocks.  */
1031               if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb))
1032                 dep_bbs->add (pred_dep_bb);
1033               else if (!pdom_visited.contains (pred_dep_bb))
1034                 pdom_worklist.safe_push (pred_dep_bb);
1035             }
1036         }
1037     } while (!pdom_worklist.is_empty ());
1038
1039   /* Record computed control dependencies in loop so that we can reach them
1040      when reclaiming resources.  */
1041   ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs);
1042
1043   /* Associate control dependence with related equivalent basic blocks.  */
1044   for (equiv_head->aux = dep_bbs; bb != equiv_head;
1045        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
1046     bb->aux = dep_bbs;
1047
1048   return dep_bbs;
1049 }
1050
1051 /* Forward declaration */
1052
1053 static bool
1054 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1055                          const_basic_block skip_head,
1056                          hash_map<gimple *, bool> &stmt_stat);
1057
1058 /* Given STMT, memory load or pure call statement, check whether it is impacted
1059    by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the
1060    trace is composed of SKIP_HEAD and those basic block dominated by it, always
1061    corresponds to one branch of a conditional statement).  If SKIP_HEAD is
1062    NULL, all basic blocks of LOOP are checked.  */
1063
1064 static bool
1065 vuse_semi_invariant_p (struct loop *loop, gimple *stmt,
1066                        const_basic_block skip_head)
1067 {
1068   split_info *info = (split_info *) loop->aux;
1069   tree rhs = NULL_TREE;
1070   ao_ref ref;
1071   gimple *store;
1072   unsigned i;
1073
1074   /* Collect memory store/clobber statements if haven't done that.  */
1075   if (info->need_init)
1076     find_vdef_in_loop (loop);
1077
1078   if (is_gimple_assign (stmt))
1079     rhs = gimple_assign_rhs1 (stmt);
1080
1081   ao_ref_init (&ref, rhs);
1082
1083   FOR_EACH_VEC_ELT (info->memory_stores, i, store)
1084     {
1085       /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL.  */
1086       if (skip_head
1087           && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head))
1088         continue;
1089
1090       if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref))
1091         return false;
1092     }
1093
1094   return true;
1095 }
1096
1097 /* Suppose one condition branch, led by SKIP_HEAD, is not executed since
1098    certain iteration of LOOP, check whether an SSA name (NAME) remains
1099    unchanged in next iteration.  We call this characteristic semi-
1100    invariantness.  SKIP_HEAD might be NULL, if so, nothing excluded, all basic
1101    blocks and control flows in the loop will be considered.  Semi-invariant
1102    state of checked statement is cached in hash map STMT_STAT to avoid
1103    redundant computation in possible following re-check.  */
1104
1105 static inline bool
1106 ssa_semi_invariant_p (struct loop *loop, tree name,
1107                       const_basic_block skip_head,
1108                       hash_map<gimple *, bool> &stmt_stat)
1109 {
1110   gimple *def = SSA_NAME_DEF_STMT (name);
1111   const_basic_block def_bb = gimple_bb (def);
1112
1113   /* An SSA name defined outside loop is definitely semi-invariant.  */
1114   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
1115     return true;
1116
1117   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1118     return false;
1119
1120   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
1121 }
1122
1123 /* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is
1124    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1125    are excluded from LOOP.  */
1126
1127 static bool
1128 loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi,
1129                                 const_basic_block skip_head)
1130 {
1131   const_edge latch = loop_latch_edge (loop);
1132   tree name = gimple_phi_result (loop_phi);
1133   tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch);
1134
1135   gcc_checking_assert (from);
1136
1137   /* Loop iteration PHI node locates in loop header, and it has two source
1138      operands, one is an initial value coming from outside the loop, the other
1139      is a value through latch of the loop, which is derived in last iteration,
1140      we call the latter latch value.  From the PHI node to definition of latch
1141      value, if excluding branch trace starting from SKIP_HEAD, except copy-
1142      assignment or likewise, there is no other kind of value redefinition, SSA
1143      name defined by the PHI node is semi-invariant.
1144
1145                          loop entry
1146                               |     .--- latch ---.
1147                               |     |             |
1148                               v     v             |
1149                   x_1 = PHI <x_0,  x_3>           |
1150                            |                      |
1151                            v                      |
1152               .------- if (cond) -------.         |
1153               |                         |         |
1154               |                     [ SKIP ]      |
1155               |                         |         |
1156               |                     x_2 = ...     |
1157               |                         |         |
1158               '---- T ---->.<---- F ----'         |
1159                            |                      |
1160                            v                      |
1161                   x_3 = PHI <x_1, x_2>            |
1162                            |                      |
1163                            '----------------------'
1164
1165      Suppose in certain iteration, execution flow in above graph goes through
1166      true branch, which means that one source value to define x_3 in false
1167      branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next
1168      iterations is defined by x_3, we know that x_1 will never changed if COND
1169      always chooses true branch from then on.  */
1170
1171   while (from != name)
1172     {
1173       /* A new value comes from a CONSTANT.  */
1174       if (TREE_CODE (from) != SSA_NAME)
1175         return false;
1176
1177       gimple *stmt = SSA_NAME_DEF_STMT (from);
1178       const_basic_block bb = gimple_bb (stmt);
1179
1180       /* A new value comes from outside the loop.  */
1181       if (!bb || !flow_bb_inside_loop_p (loop, bb))
1182         return false;
1183
1184       from = NULL_TREE;
1185
1186       if (gimple_code (stmt) == GIMPLE_PHI)
1187         {
1188           gphi *phi = as_a <gphi *> (stmt);
1189
1190           for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1191             {
1192               if (skip_head)
1193                 {
1194                   const_edge e = gimple_phi_arg_edge (phi, i);
1195
1196                   /* Don't consider redefinitions in excluded basic blocks.  */
1197                   if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head))
1198                     continue;
1199                 }
1200
1201               tree arg = gimple_phi_arg_def (phi, i);
1202
1203               if (!from)
1204                 from = arg;
1205               else if (!operand_equal_p (from, arg, 0))
1206                 /* There are more than one source operands that provide
1207                    different values to the SSA name, it is variant.  */
1208                 return false;
1209             }
1210         }
1211       else if (gimple_code (stmt) == GIMPLE_ASSIGN)
1212         {
1213           /* For simple value copy, check its rhs instead.  */
1214           if (gimple_assign_ssa_name_copy_p (stmt))
1215             from = gimple_assign_rhs1 (stmt);
1216         }
1217
1218       /* Any other kind of definition is deemed to introduce a new value
1219          to the SSA name.  */
1220       if (!from)
1221         return false;
1222     }
1223   return true;
1224 }
1225
1226 /* Check whether conditional predicates that BB is control-dependent on, are
1227    semi-invariant in LOOP.  Basic blocks dominated by SKIP_HEAD (if non-NULL),
1228    are excluded from LOOP.  Semi-invariant state of checked statement is cached
1229    in hash map STMT_STAT.  */
1230
1231 static bool
1232 control_dep_semi_invariant_p (struct loop *loop, basic_block bb,
1233                               const_basic_block skip_head,
1234                               hash_map<gimple *, bool> &stmt_stat)
1235 {
1236   hash_set<basic_block> *dep_bbs = find_control_dep_blocks (loop, bb);
1237
1238   if (!dep_bbs)
1239     return true;
1240
1241   for (hash_set<basic_block>::iterator iter = dep_bbs->begin ();
1242        iter != dep_bbs->end (); ++iter)
1243     {
1244       gimple *last = *gsi_last_bb (*iter);
1245       if (!last)
1246         return false;
1247
1248       /* Only check condition predicates.  */
1249       if (gimple_code (last) != GIMPLE_COND
1250           && gimple_code (last) != GIMPLE_SWITCH)
1251         return false;
1252
1253       if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat))
1254         return false;
1255     }
1256
1257   return true;
1258 }
1259
1260 /* Check whether STMT is semi-invariant in LOOP, iff all its operands are
1261    semi-invariant, consequently, all its defined values are semi-invariant.
1262    Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.
1263    Semi-invariant state of checked statement is cached in hash map
1264    STMT_STAT.  */
1265
1266 static bool
1267 stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt,
1268                          const_basic_block skip_head,
1269                          hash_map<gimple *, bool> &stmt_stat)
1270 {
1271   bool existed;
1272   bool &invar = stmt_stat.get_or_insert (stmt, &existed);
1273
1274   if (existed)
1275     return invar;
1276
1277   /* A statement might depend on itself, which is treated as variant.  So set
1278      state of statement under check to be variant to ensure that.  */
1279   invar = false;
1280
1281   if (gimple_code (stmt) == GIMPLE_PHI)
1282     {
1283       gphi *phi = as_a <gphi *> (stmt);
1284
1285       if (gimple_bb (stmt) == loop->header)
1286         {
1287           /* If the entry value is subject to abnormal coalescing
1288              avoid the transform since we're going to duplicate the
1289              loop header and thus likely introduce overlapping life-ranges
1290              between the PHI def and the entry on the path when the
1291              first loop is skipped.  */
1292           tree entry_def
1293             = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1294           if (TREE_CODE (entry_def) == SSA_NAME
1295               && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (entry_def))
1296             return false;
1297           invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head);
1298           return invar;
1299         }
1300
1301       /* For a loop PHI node that does not locate in loop header, it is semi-
1302          invariant only if two conditions are met.  The first is its source
1303          values are derived from CONSTANT (including loop-invariant value), or
1304          from SSA name defined by semi-invariant loop iteration PHI node.  The
1305          second is its source incoming edges are control-dependent on semi-
1306          invariant conditional predicates.  */
1307       for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i)
1308         {
1309           const_edge e = gimple_phi_arg_edge (phi, i);
1310           tree arg = gimple_phi_arg_def (phi, i);
1311
1312           if (TREE_CODE (arg) == SSA_NAME)
1313             {
1314               if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat))
1315                 return false;
1316
1317               /* If source value is defined in location from where the source
1318                  edge comes in, no need to check control dependency again
1319                  since this has been done in above SSA name check stage.  */
1320               if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg)))
1321                 continue;
1322             }
1323
1324           if (!control_dep_semi_invariant_p (loop, e->src, skip_head,
1325                                              stmt_stat))
1326             return false;
1327         }
1328     }
1329   else
1330     {
1331       ssa_op_iter iter;
1332       tree use;
1333
1334       /* Volatile memory load or return of normal (non-const/non-pure) call
1335          should not be treated as constant in each iteration of loop.  */
1336       if (gimple_has_side_effects (stmt))
1337         return false;
1338
1339       /* Check if any memory store may kill memory load at this place.  */
1340       if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head))
1341         return false;
1342
1343       /* Although operand of a statement might be SSA name, CONSTANT or
1344          VARDECL, here we only need to check SSA name operands.  This is
1345          because check on VARDECL operands, which involve memory loads,
1346          must have been done prior to invocation of this function in
1347          vuse_semi_invariant_p.  */
1348       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1349         if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat))
1350           return false;
1351     }
1352
1353   if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head,
1354                                      stmt_stat))
1355     return false;
1356
1357   /* Here we SHOULD NOT use invar = true, since hash map might be changed due
1358      to new insertion, and thus invar may point to invalid memory.  */
1359   stmt_stat.put (stmt, true);
1360   return true;
1361 }
1362
1363 /* A helper function to check whether STMT is semi-invariant in LOOP.  Basic
1364    blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP.  */
1365
1366 static bool
1367 stmt_semi_invariant_p (struct loop *loop, gimple *stmt,
1368                        const_basic_block skip_head)
1369 {
1370   hash_map<gimple *, bool> stmt_stat;
1371   return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat);
1372 }
1373
1374 /* Determine when conditional statement never transfers execution to one of its
1375    branch, whether we can remove the branch's leading basic block (BRANCH_BB)
1376    and those basic blocks dominated by BRANCH_BB.  */
1377
1378 static bool
1379 branch_removable_p (basic_block branch_bb)
1380 {
1381   edge_iterator ei;
1382   edge e;
1383
1384   if (single_pred_p (branch_bb))
1385     return true;
1386
1387   FOR_EACH_EDGE (e, ei, branch_bb->preds)
1388     {
1389       if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
1390         continue;
1391
1392       if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
1393         continue;
1394
1395        /* The branch can be reached from opposite branch, or from some
1396           statement not dominated by the conditional statement.  */
1397       return false;
1398     }
1399
1400   return true;
1401 }
1402
1403 /* Find out which branch of a conditional statement (COND) is invariant in the
1404    execution context of LOOP.  That is: once the branch is selected in certain
1405    iteration of the loop, any operand that contributes to computation of the
1406    conditional statement remains unchanged in all following iterations.  */
1407
1408 static edge
1409 get_cond_invariant_branch (struct loop *loop, gcond *cond)
1410 {
1411   basic_block cond_bb = gimple_bb (cond);
1412   basic_block targ_bb[2];
1413   bool invar[2];
1414   unsigned invar_checks = 0;
1415
1416   for (unsigned i = 0; i < 2; i++)
1417     {
1418       targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest;
1419
1420       /* One branch directs to loop exit, no need to perform loop split upon
1421          this conditional statement.  Firstly, it is trivial if the exit branch
1422          is semi-invariant, for the statement is just to break loop.  Secondly,
1423          if the opposite branch is semi-invariant, it means that the statement
1424          is real loop-invariant, which is covered by loop unswitch.  */
1425       if (!flow_bb_inside_loop_p (loop, targ_bb[i]))
1426         return NULL;
1427     }
1428
1429   for (unsigned i = 0; i < 2; i++)
1430     {
1431       invar[!i] = false;
1432
1433       if (!branch_removable_p (targ_bb[i]))
1434         continue;
1435
1436       /* Given a semi-invariant branch, if its opposite branch dominates
1437          loop latch, it and its following trace will only be executed in
1438          final iteration of loop, namely it is not part of repeated body
1439          of the loop.  Similar to the above case that the branch is loop
1440          exit, no need to split loop.  */
1441       if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i]))
1442         continue;
1443
1444       invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]);
1445       invar_checks++;
1446     }
1447
1448   /* With both branches being invariant (handled by loop unswitch) or
1449      variant is not what we want.  */
1450   if (invar[0] ^ !invar[1])
1451     return NULL;
1452
1453   /* Found a real loop-invariant condition, do nothing.  */
1454   if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL))
1455     return NULL;
1456
1457   return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1);
1458 }
1459
1460 /* Calculate increased code size measured by estimated insn number if applying
1461    loop split upon certain branch (BRANCH_EDGE) of a conditional statement.  */
1462
1463 static int
1464 compute_added_num_insns (struct loop *loop, const_edge branch_edge)
1465 {
1466   basic_block cond_bb = branch_edge->src;
1467   unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge;
1468   basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest;
1469   basic_block *bbs = ((split_info *) loop->aux)->bbs;
1470   int num = 0;
1471
1472   for (unsigned i = 0; i < loop->num_nodes; i++)
1473     {
1474       /* Do no count basic blocks only in opposite branch.  */
1475       if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb))
1476         continue;
1477
1478       num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights);
1479     }
1480
1481   /* It is unnecessary to evaluate expression of the conditional statement
1482      in new loop that contains only invariant branch.  This expression should
1483      be constant value (either true or false).  Exclude code size of insns
1484      that contribute to computation of the expression.  */
1485
1486   auto_vec<gimple *> worklist;
1487   hash_set<gimple *> removed;
1488   gimple *stmt = last_nondebug_stmt (cond_bb);
1489
1490   worklist.safe_push (stmt);
1491   removed.add (stmt);
1492   num -= estimate_num_insns (stmt, &eni_size_weights);
1493
1494   do
1495     {
1496       ssa_op_iter opnd_iter;
1497       use_operand_p opnd_p;
1498
1499       stmt = worklist.pop ();
1500       FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE)
1501         {
1502           tree opnd = USE_FROM_PTR (opnd_p);
1503
1504           if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd))
1505             continue;
1506
1507           gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd);
1508           use_operand_p use_p;
1509           imm_use_iterator use_iter;
1510
1511           if (removed.contains (opnd_stmt)
1512               || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt)))
1513             continue;
1514
1515           FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd)
1516             {
1517               gimple *use_stmt = USE_STMT (use_p);
1518
1519               if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt))
1520                 {
1521                   opnd_stmt = NULL;
1522                   break;
1523                 }
1524             }
1525
1526           if (opnd_stmt)
1527             {
1528               worklist.safe_push (opnd_stmt);
1529               removed.add (opnd_stmt);
1530               num -= estimate_num_insns (opnd_stmt, &eni_size_weights);
1531             }
1532         }
1533     } while (!worklist.is_empty ());
1534
1535   gcc_assert (num >= 0);
1536   return num;
1537 }
1538
1539 /* Find out loop-invariant branch of a conditional statement (COND) if it has,
1540    and check whether it is eligible and profitable to perform loop split upon
1541    this branch in LOOP.  */
1542
1543 static edge
1544 get_cond_branch_to_split_loop (struct loop *loop, gcond *cond)
1545 {
1546   edge invar_branch = get_cond_invariant_branch (loop, cond);
1547   if (!invar_branch)
1548     return NULL;
1549
1550   /* When accurate profile information is available, and execution
1551      frequency of the branch is too low, just let it go.  */
1552   profile_probability prob = invar_branch->probability;
1553   if (prob.reliable_p ())
1554     {
1555       int thres = param_min_loop_cond_split_prob;
1556
1557       if (prob < profile_probability::always ().apply_scale (thres, 100))
1558         return NULL;
1559     }
1560
1561   /* Add a threshold for increased code size to disable loop split.  */
1562   if (compute_added_num_insns (loop, invar_branch) > param_max_peeled_insns)
1563     return NULL;
1564
1565   return invar_branch;
1566 }
1567
1568 /* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some
1569    conditional statement, perform loop split transformation illustrated
1570    as the following graph.
1571
1572                .-------T------ if (true) ------F------.
1573                |                    .---------------. |
1574                |                    |               | |
1575                v                    |               v v
1576           pre-header                |            pre-header
1577                | .------------.     |                 | .------------.
1578                | |            |     |                 | |            |
1579                | v            |     |                 | v            |
1580              header           |     |               header           |
1581                |              |     |                 |              |
1582       .--- if (cond) ---.     |     |        .--- if (true) ---.     |
1583       |                 |     |     |        |                 |     |
1584   invariant             |     |     |    invariant             |     |
1585       |                 |     |     |        |                 |     |
1586       '---T--->.<---F---'     |     |        '---T--->.<---F---'     |
1587                |              |    /                  |              |
1588              stmts            |   /                 stmts            |
1589                |              F  T                    |              |
1590               / \             | /                    / \             |
1591      .-------*   *      [ if (cond) ]       .-------*   *            |
1592      |           |            |             |           |            |
1593      |         latch          |             |         latch          |
1594      |           |            |             |           |            |
1595      |           '------------'             |           '------------'
1596      '------------------------. .-----------'
1597              loop1            | |                   loop2
1598                               v v
1599                              exits
1600
1601    In the graph, loop1 represents the part derived from original one, and
1602    loop2 is duplicated using loop_version (), which corresponds to the part
1603    of original one being splitted out.  In original latch edge of loop1, we
1604    insert a new conditional statement duplicated from the semi-invariant cond,
1605    and one of its branch goes back to loop1 header as a latch edge, and the
1606    other branch goes to loop2 pre-header as an entry edge.  And also in loop2,
1607    we abandon the variant branch of the conditional statement by setting a
1608    constant bool condition, based on which branch is semi-invariant.  */
1609
1610 static bool
1611 do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
1612 {
1613   basic_block cond_bb = invar_branch->src;
1614   bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE);
1615   gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
1616
1617   gcc_assert (cond_bb->loop_father == loop1);
1618
1619   if (dump_enabled_p ())
1620     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond,
1621                      "loop split on semi-invariant condition at %s branch\n",
1622                      true_invar ? "true" : "false");
1623
1624   initialize_original_copy_tables ();
1625
1626   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
1627                                      invar_branch->probability.invert (),
1628                                      invar_branch->probability,
1629                                      profile_probability::always (),
1630                                      profile_probability::always (),
1631                                      true);
1632   if (!loop2)
1633     {
1634       free_original_copy_tables ();
1635       return false;
1636     }
1637
1638   basic_block cond_bb_copy = get_bb_copy (cond_bb);
1639   gcond *cond_copy = as_a<gcond *> (*gsi_last_bb (cond_bb_copy));
1640
1641   /* Replace the condition in loop2 with a bool constant to let PassManager
1642      remove the variant branch after current pass completes.  */
1643   if (true_invar)
1644     gimple_cond_make_true (cond_copy);
1645   else
1646     gimple_cond_make_false (cond_copy);
1647
1648   update_stmt (cond_copy);
1649
1650   /* Insert a new conditional statement on latch edge of loop1, its condition
1651      is duplicated from the semi-invariant.  This statement acts as a switch
1652      to transfer execution from loop1 to loop2, when loop1 enters into
1653      invariant state.  */
1654   basic_block latch_bb = split_edge (loop_latch_edge (loop1));
1655   basic_block break_bb = split_edge (single_pred_edge (latch_bb));
1656   gimple *break_cond = gimple_build_cond (gimple_cond_code(cond),
1657                                           gimple_cond_lhs (cond),
1658                                           gimple_cond_rhs (cond),
1659                                           NULL_TREE, NULL_TREE);
1660
1661   gimple_stmt_iterator gsi = gsi_last_bb (break_bb);
1662   gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT);
1663
1664   edge to_loop1 = single_succ_edge (break_bb);
1665   edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0);
1666
1667   to_loop1->flags &= ~EDGE_FALLTHRU;
1668   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
1669   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
1670
1671   /* Due to introduction of a control flow edge from loop1 latch to loop2
1672      pre-header, we should update PHIs in loop2 to reflect this connection
1673      between loop1 and loop2.  */
1674   connect_loop_phis (loop1, loop2, to_loop2);
1675
1676   edge true_edge, false_edge, skip_edge1, skip_edge2;
1677   extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
1678
1679   skip_edge1 = true_invar ? false_edge : true_edge;
1680   skip_edge2 = true_invar ? true_edge : false_edge;
1681   fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
1682
1683   /* Fix first loop's exit probability after scaling.  */
1684   to_loop1->probability = invar_branch->probability.invert ();
1685   to_loop2->probability = invar_branch->probability;
1686
1687   free_original_copy_tables ();
1688
1689   return true;
1690 }
1691
1692 /* Traverse all conditional statements in LOOP, to find out a good candidate
1693    upon which we can do loop split.  */
1694
1695 static bool
1696 split_loop_on_cond (struct loop *loop)
1697 {
1698   split_info *info = new split_info ();
1699   basic_block *bbs = info->bbs = get_loop_body (loop);
1700   bool do_split = false;
1701
1702   /* Allocate an area to keep temporary info, and associate its address
1703      with loop aux field.  */
1704   loop->aux = info;
1705
1706   for (unsigned i = 0; i < loop->num_nodes; i++)
1707     bbs[i]->aux = NULL;
1708
1709   for (unsigned i = 0; i < loop->num_nodes; i++)
1710     {
1711       basic_block bb = bbs[i];
1712
1713       /* We only consider conditional statement, which be executed at most once
1714          in each iteration of the loop.  So skip statements in inner loops.  */
1715       if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP))
1716         continue;
1717
1718       /* Actually this check is not a must constraint.  With it, we can ensure
1719          conditional statement will always be executed in each iteration.  */
1720       if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1721         continue;
1722
1723       gcond *cond = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1724       if (!cond)
1725         continue;
1726
1727       edge branch_edge = get_cond_branch_to_split_loop (loop, cond);
1728
1729       if (branch_edge)
1730         {
1731           do_split_loop_on_cond (loop, branch_edge);
1732           do_split = true;
1733           break;
1734         }
1735     }
1736
1737   delete info;
1738   loop->aux = NULL;
1739
1740   return do_split;
1741 }
1742
1743 /* Main entry point.  Perform loop splitting on all suitable loops.  */
1744
1745 static unsigned int
1746 tree_ssa_split_loops (void)
1747 {
1748   bool changed = false;
1749
1750   gcc_assert (scev_initialized_p ());
1751
1752   calculate_dominance_info (CDI_POST_DOMINATORS);
1753
1754   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1755     loop->aux = NULL;
1756
1757   /* Go through all loops starting from innermost.  */
1758   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
1759     {
1760       if (loop->aux)
1761         {
1762           /* If any of our inner loops was split, don't split us,
1763              and mark our containing loop as having had splits as well.
1764              This allows for delaying SSA update.  */
1765           loop_outer (loop)->aux = loop;
1766           continue;
1767         }
1768
1769       if (optimize_loop_for_size_p (loop))
1770         continue;
1771
1772       if (split_loop (loop) || split_loop_on_cond (loop))
1773         {
1774           /* Mark our containing loop as having had some split inner loops.  */
1775           loop_outer (loop)->aux = loop;
1776           changed = true;
1777         }
1778     }
1779
1780   for (auto loop : loops_list (cfun, LI_INCLUDE_ROOT))
1781     loop->aux = NULL;
1782
1783   clear_aux_for_blocks ();
1784
1785   free_dominance_info (CDI_POST_DOMINATORS);
1786
1787   if (changed)
1788     {
1789       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1790       return TODO_cleanup_cfg;
1791     }
1792   return 0;
1793 }
1794
1795 /* Loop splitting pass.  */
1796
1797 namespace {
1798
1799 const pass_data pass_data_loop_split =
1800 {
1801   GIMPLE_PASS, /* type */
1802   "lsplit", /* name */
1803   OPTGROUP_LOOP, /* optinfo_flags */
1804   TV_LOOP_SPLIT, /* tv_id */
1805   PROP_cfg, /* properties_required */
1806   0, /* properties_provided */
1807   0, /* properties_destroyed */
1808   0, /* todo_flags_start */
1809   0, /* todo_flags_finish */
1810 };
1811
1812 class pass_loop_split : public gimple_opt_pass
1813 {
1814 public:
1815   pass_loop_split (gcc::context *ctxt)
1816     : gimple_opt_pass (pass_data_loop_split, ctxt)
1817   {}
1818
1819   /* opt_pass methods: */
1820   bool gate (function *) final override { return flag_split_loops != 0; }
1821   unsigned int execute (function *) final override;
1822
1823 }; // class pass_loop_split
1824
1825 unsigned int
1826 pass_loop_split::execute (function *fun)
1827 {
1828   if (number_of_loops (fun) <= 1)
1829     return 0;
1830
1831   return tree_ssa_split_loops ();
1832 }
1833
1834 } // anon namespace
1835
1836 gimple_opt_pass *
1837 make_pass_loop_split (gcc::context *ctxt)
1838 {
1839   return new pass_loop_split (ctxt);
1840 }