gcc/cfgloopanal.cc

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "memmodel.h"
  28 #include "emit-rtl.h"
  29 #include "cfgloop.h"
  30 #include "explow.h"
  31 #include "expr.h"
  32 #include "graphds.h"
  33 #include "sreal.h"
  34 #include "regs.h"
  35 #include "function-abi.h"
  36
  37 struct target_cfgloop default_target_cfgloop;
  38 #if SWITCHABLE_TARGET
  39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  40 #endif
  41
  42 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  43
  44 bool
  45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
  46 {
  47   /* It must be executed at least once each iteration.  */
  48   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  49     return false;
  50
  51   /* And just once.  */
  52   if (bb->loop_father != loop)
  53     return false;
  54
  55   /* But this was not enough.  We might have some irreducible loop here.  */
  56   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  57     return false;
  58
  59   return true;
  60 }
  61
  62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  63    throw away all latch edges and mark blocks inside any remaining cycle.
  64    Everything is a bit complicated due to fact we do not want to do this
  65    for parts of cycles that only "pass" through some loop -- i.e. for
  66    each cycle, we want to mark blocks that belong directly to innermost
  67    loop containing the whole cycle.
  68
  69    LOOPS is the loop tree.  */
  70
  71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  72 #define BB_REPR(BB) ((BB)->index + 1)
  73
  74 bool
  75 mark_irreducible_loops (void)
  76 {
  77   basic_block act;
  78   struct graph_edge *ge;
  79   edge e;
  80   edge_iterator ei;
  81   int src, dest;
  82   unsigned depth;
  83   struct graph *g;
  84   int num = number_of_loops (cfun);
  85   class loop *cloop;
  86   bool irred_loop_found = false;
  87   int i;
  88
  89   gcc_assert (current_loops != NULL);
  90
  91   /* Reset the flags.  */
  92   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  93                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  94     {
  95       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  96       FOR_EACH_EDGE (e, ei, act->succs)
  97         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  98     }
  99
 100   /* Create the edge lists.  */
 101   g = new_graph (last_basic_block_for_fn (cfun) + num);
 102
 103   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 104                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 105     FOR_EACH_EDGE (e, ei, act->succs)
 106       {
 107         /* Ignore edges to exit.  */
 108         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 109           continue;
 110
 111         src = BB_REPR (act);
 112         dest = BB_REPR (e->dest);
 113
 114         /* Ignore latch edges.  */
 115         if (e->dest->loop_father->header == e->dest
 116             && dominated_by_p (CDI_DOMINATORS, act, e->dest))
 117           continue;
 118
 119         /* Edges inside a single loop should be left where they are.  Edges
 120            to subloop headers should lead to representative of the subloop,
 121            but from the same place.
 122
 123            Edges exiting loops should lead from representative
 124            of the son of nearest common ancestor of the loops in that
 125            act lays.  */
 126
 127         if (e->dest->loop_father->header == e->dest)
 128           dest = LOOP_REPR (e->dest->loop_father);
 129
 130         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 131           {
 132             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 133                                                       e->dest->loop_father));
 134             if (depth == loop_depth (act->loop_father))
 135               cloop = act->loop_father;
 136             else
 137               cloop = (*act->loop_father->superloops)[depth];
 138
 139             src = LOOP_REPR (cloop);
 140           }
 141
 142         add_edge (g, src, dest)->data = e;
 143       }
 144
 145   /* Find the strongly connected components.  */
 146   graphds_scc (g, NULL);
 147
 148   /* Mark the irreducible loops.  */
 149   for (i = 0; i < g->n_vertices; i++)
 150     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 151       {
 152         edge real = (edge) ge->data;
 153         /* edge E in graph G is irreducible if it connects two vertices in the
 154            same scc.  */
 155
 156         /* All edges should lead from a component with higher number to the
 157            one with lower one.  */
 158         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 159
 160         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 161           continue;
 162
 163         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 164         irred_loop_found = true;
 165         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 166           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 167       }
 168
 169   free_graph (g);
 170
 171   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 172   return irred_loop_found;
 173 }
 174
 175 /* Counts number of insns inside LOOP.  */
 176 int
 177 num_loop_insns (const class loop *loop)
 178 {
 179   basic_block *bbs, bb;
 180   unsigned i, ninsns = 0;
 181   rtx_insn *insn;
 182
 183   bbs = get_loop_body (loop);
 184   for (i = 0; i < loop->num_nodes; i++)
 185     {
 186       bb = bbs[i];
 187       FOR_BB_INSNS (bb, insn)
 188         if (NONDEBUG_INSN_P (insn))
 189           ninsns++;
 190     }
 191   free (bbs);
 192
 193   if (!ninsns)
 194     ninsns = 1; /* To avoid division by zero.  */
 195
 196   return ninsns;
 197 }
 198
 199 /* Counts number of insns executed on average per iteration LOOP.  */
 200 int
 201 average_num_loop_insns (const class loop *loop)
 202 {
 203   basic_block *bbs, bb;
 204   unsigned i, binsns;
 205   sreal ninsns;
 206   rtx_insn *insn;
 207
 208   ninsns = 0;
 209   bbs = get_loop_body (loop);
 210   for (i = 0; i < loop->num_nodes; i++)
 211     {
 212       bb = bbs[i];
 213
 214       binsns = 0;
 215       FOR_BB_INSNS (bb, insn)
 216         if (NONDEBUG_INSN_P (insn))
 217           binsns++;
 218
 219       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
 220       /* Avoid overflows.   */
 221       if (ninsns > 1000000)
 222         {
 223           free (bbs);
 224           return 1000000;
 225         }
 226     }
 227   free (bbs);
 228
 229   int64_t ret = ninsns.to_int ();
 230   if (!ret)
 231     ret = 1; /* To avoid division by zero.  */
 232
 233   return ret;
 234 }
 235
 236 /* Returns expected number of iterations of LOOP, according to
 237    measured or guessed profile.
 238
 239    This functions attempts to return "sane" value even if profile
 240    information is not good enough to derive osmething.
 241    If BY_PROFILE_ONLY is set, this logic is bypassed and function
 242    return -1 in those scenarios.  */
 243
 244 gcov_type
 245 expected_loop_iterations_unbounded (const class loop *loop,
 246                                     bool *read_profile_p,
 247                                     bool by_profile_only)
 248 {
 249   edge e;
 250   edge_iterator ei;
 251   gcov_type expected = -1;
 252
 253   if (read_profile_p)
 254     *read_profile_p = false;
 255
 256   /* If we have no profile at all, use AVG_LOOP_NITER.  */
 257   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 258     {
 259       if (by_profile_only)
 260         return -1;
 261       expected = param_avg_loop_niter;
 262     }
 263   else if (loop->latch && (loop->latch->count.initialized_p ()
 264                            || loop->header->count.initialized_p ()))
 265     {
 266       profile_count count_in = profile_count::zero (),
 267                     count_latch = profile_count::zero ();
 268
 269       FOR_EACH_EDGE (e, ei, loop->header->preds)
 270         if (e->src == loop->latch)
 271           count_latch = e->count ();
 272         else
 273           count_in += e->count ();
 274
 275       if (!count_latch.initialized_p ())
 276         {
 277           if (by_profile_only)
 278             return -1;
 279           expected = param_avg_loop_niter;
 280         }
 281       else if (!count_in.nonzero_p ())
 282         {
 283           if (by_profile_only)
 284             return -1;
 285           expected = count_latch.to_gcov_type () * 2;
 286         }
 287       else
 288         {
 289           expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
 290                       - 1) / count_in.to_gcov_type ();
 291           if (read_profile_p
 292               && count_latch.reliable_p () && count_in.reliable_p ())
 293             *read_profile_p = true;
 294         }
 295     }
 296   else
 297     {
 298       if (by_profile_only)
 299         return -1;
 300       expected = param_avg_loop_niter;
 301     }
 302
 303   if (!by_profile_only)
 304     {
 305       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 306       if (max != -1 && max < expected)
 307         return max;
 308     }
 309
 310   return expected;
 311 }
 312
 313 /* Returns expected number of LOOP iterations.  The returned value is bounded
 314    by REG_BR_PROB_BASE.  */
 315
 316 unsigned
 317 expected_loop_iterations (class loop *loop)
 318 {
 319   gcov_type expected = expected_loop_iterations_unbounded (loop);
 320   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 321 }
 322
 323 /* Returns the maximum level of nesting of subloops of LOOP.  */
 324
 325 unsigned
 326 get_loop_level (const class loop *loop)
 327 {
 328   const class loop *ploop;
 329   unsigned mx = 0, l;
 330
 331   for (ploop = loop->inner; ploop; ploop = ploop->next)
 332     {
 333       l = get_loop_level (ploop);
 334       if (l >= mx)
 335         mx = l + 1;
 336     }
 337   return mx;
 338 }
 339
 340 /* Initialize the constants for computing set costs.  */
 341
 342 void
 343 init_set_costs (void)
 344 {
 345   int speed;
 346   rtx_insn *seq;
 347   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 348   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 349   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 350   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 351   unsigned i;
 352
 353   target_avail_regs = 0;
 354   target_clobbered_regs = 0;
 355   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 356     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 357         && !fixed_regs[i])
 358       {
 359         target_avail_regs++;
 360         /* ??? This is only a rough heuristic.  It doesn't cope well
 361            with alternative ABIs, but that's an optimization rather than
 362            correctness issue.  */
 363         if (default_function_abi.clobbers_full_reg_p (i))
 364           target_clobbered_regs++;
 365       }
 366
 367   target_res_regs = 3;
 368
 369   for (speed = 0; speed < 2; speed++)
 370      {
 371       crtl->maybe_hot_insn_p = speed;
 372       /* Set up the costs for using extra registers:
 373
 374          1) If not many free registers remain, we should prefer having an
 375             additional move to decreasing the number of available registers.
 376             (TARGET_REG_COST).
 377          2) If no registers are available, we need to spill, which may require
 378             storing the old value to memory and loading it back
 379             (TARGET_SPILL_COST).  */
 380
 381       start_sequence ();
 382       emit_move_insn (reg1, reg2);
 383       seq = get_insns ();
 384       end_sequence ();
 385       target_reg_cost [speed] = seq_cost (seq, speed);
 386
 387       start_sequence ();
 388       emit_move_insn (mem, reg1);
 389       emit_move_insn (reg2, mem);
 390       seq = get_insns ();
 391       end_sequence ();
 392       target_spill_cost [speed] = seq_cost (seq, speed);
 393     }
 394   default_rtl_profile ();
 395 }
 396
 397 /* Estimates cost of increased register pressure caused by making N_NEW new
 398    registers live around the loop.  N_OLD is the number of registers live
 399    around the loop.  If CALL_P is true, also take into account that
 400    call-used registers may be clobbered in the loop body, reducing the
 401    number of available registers before we spill.  */
 402
 403 unsigned
 404 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 405                             bool call_p)
 406 {
 407   unsigned cost;
 408   unsigned regs_needed = n_new + n_old;
 409   unsigned available_regs = target_avail_regs;
 410
 411   /* If there is a call in the loop body, the call-clobbered registers
 412      are not available for loop invariants.  */
 413   if (call_p)
 414     available_regs = available_regs - target_clobbered_regs;
 415
 416   /* If we have enough registers, we should use them and not restrict
 417      the transformations unnecessarily.  */
 418   if (regs_needed + target_res_regs <= available_regs)
 419     return 0;
 420
 421   if (regs_needed <= available_regs)
 422     /* If we are close to running out of registers, try to preserve
 423        them.  */
 424     cost = target_reg_cost [speed] * n_new;
 425   else
 426     /* If we run out of registers, it is very expensive to add another
 427        one.  */
 428     cost = target_spill_cost [speed] * n_new;
 429
 430   if (optimize && (flag_ira_region == IRA_REGION_ALL
 431                    || flag_ira_region == IRA_REGION_MIXED)
 432       && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
 433     /* IRA regional allocation deals with high register pressure
 434        better.  So decrease the cost (to do more accurate the cost
 435        calculation for IRA, we need to know how many registers lives
 436        through the loop transparently).  */
 437     cost /= 2;
 438
 439   return cost;
 440 }
 441
 442 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 443
 444 void
 445 mark_loop_exit_edges (void)
 446 {
 447   basic_block bb;
 448   edge e;
 449
 450   if (number_of_loops (cfun) <= 1)
 451     return;
 452
 453   FOR_EACH_BB_FN (bb, cfun)
 454     {
 455       edge_iterator ei;
 456
 457       FOR_EACH_EDGE (e, ei, bb->succs)
 458         {
 459           if (loop_outer (bb->loop_father)
 460               && loop_exit_edge_p (bb->loop_father, e))
 461             e->flags |= EDGE_LOOP_EXIT;
 462           else
 463             e->flags &= ~EDGE_LOOP_EXIT;
 464         }
 465     }
 466 }
 467
 468 /* Return exit edge if loop has only one exit that is likely
 469    to be executed on runtime (i.e. it is not EH or leading
 470    to noreturn call.  */
 471
 472 edge
 473 single_likely_exit (class loop *loop, const vec<edge> &exits)
 474 {
 475   edge found = single_exit (loop);
 476   unsigned i;
 477   edge ex;
 478
 479   if (found)
 480     return found;
 481   FOR_EACH_VEC_ELT (exits, i, ex)
 482     {
 483       if (probably_never_executed_edge_p (cfun, ex)
 484           /* We want to rule out paths to noreturns but not low probabilities
 485              resulting from adjustments or combining.
 486              FIXME: once we have better quality tracking, make this more
 487              robust.  */
 488           || ex->probability <= profile_probability::very_unlikely ())
 489         continue;
 490       if (!found)
 491         found = ex;
 492       else
 493         return NULL;
 494     }
 495   return found;
 496 }
 497
 498
 499 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 500    order against direction of edges from latch.  Specially, if
 501    header != latch, latch is the 1-st block.  */
 502
 503 auto_vec<basic_block>
 504 get_loop_hot_path (const class loop *loop)
 505 {
 506   basic_block bb = loop->header;
 507   auto_vec<basic_block> path;
 508   bitmap visited = BITMAP_ALLOC (NULL);
 509
 510   while (true)
 511     {
 512       edge_iterator ei;
 513       edge e;
 514       edge best = NULL;
 515
 516       path.safe_push (bb);
 517       bitmap_set_bit (visited, bb->index);
 518       FOR_EACH_EDGE (e, ei, bb->succs)
 519         if ((!best || e->probability > best->probability)
 520             && !loop_exit_edge_p (loop, e)
 521             && !bitmap_bit_p (visited, e->dest->index))
 522           best = e;
 523       if (!best || best->dest == loop->header)
 524         break;
 525       bb = best->dest;
 526     }
 527   BITMAP_FREE (visited);
 528   return path;
 529 }