gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "rtl.h"
  25 #include "tree.h"
  26 #include "predict.h"
  27 #include "emit-rtl.h"
  28 #include "cfgloop.h"
  29 #include "explow.h"
  30 #include "expr.h"
  31 #include "graphds.h"
  32 #include "params.h"
  33
  34 struct target_cfgloop default_target_cfgloop;
  35 #if SWITCHABLE_TARGET
  36 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  37 #endif
  38
  39 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  40
  41 bool
  42 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  43 {
  44   /* It must be executed at least once each iteration.  */
  45   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  46     return false;
  47
  48   /* And just once.  */
  49   if (bb->loop_father != loop)
  50     return false;
  51
  52   /* But this was not enough.  We might have some irreducible loop here.  */
  53   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  54     return false;
  55
  56   return true;
  57 }
  58
  59 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  60    throw away all latch edges and mark blocks inside any remaining cycle.
  61    Everything is a bit complicated due to fact we do not want to do this
  62    for parts of cycles that only "pass" through some loop -- i.e. for
  63    each cycle, we want to mark blocks that belong directly to innermost
  64    loop containing the whole cycle.
  65
  66    LOOPS is the loop tree.  */
  67
  68 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  69 #define BB_REPR(BB) ((BB)->index + 1)
  70
  71 bool
  72 mark_irreducible_loops (void)
  73 {
  74   basic_block act;
  75   struct graph_edge *ge;
  76   edge e;
  77   edge_iterator ei;
  78   int src, dest;
  79   unsigned depth;
  80   struct graph *g;
  81   int num = number_of_loops (cfun);
  82   struct loop *cloop;
  83   bool irred_loop_found = false;
  84   int i;
  85
  86   gcc_assert (current_loops != NULL);
  87
  88   /* Reset the flags.  */
  89   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  90                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  91     {
  92       act->flags &= ~BB_IRREDUCIBLE_LOOP;
  93       FOR_EACH_EDGE (e, ei, act->succs)
  94         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
  95     }
  96
  97   /* Create the edge lists.  */
  98   g = new_graph (last_basic_block_for_fn (cfun) + num);
  99
 100   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 101                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 102     FOR_EACH_EDGE (e, ei, act->succs)
 103       {
 104         /* Ignore edges to exit.  */
 105         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 106           continue;
 107
 108         src = BB_REPR (act);
 109         dest = BB_REPR (e->dest);
 110
 111         /* Ignore latch edges.  */
 112         if (e->dest->loop_father->header == e->dest
 113             && e->dest->loop_father->latch == act)
 114           continue;
 115
 116         /* Edges inside a single loop should be left where they are.  Edges
 117            to subloop headers should lead to representative of the subloop,
 118            but from the same place.
 119
 120            Edges exiting loops should lead from representative
 121            of the son of nearest common ancestor of the loops in that
 122            act lays.  */
 123
 124         if (e->dest->loop_father->header == e->dest)
 125           dest = LOOP_REPR (e->dest->loop_father);
 126
 127         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 128           {
 129             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 130                                                       e->dest->loop_father));
 131             if (depth == loop_depth (act->loop_father))
 132               cloop = act->loop_father;
 133             else
 134               cloop = (*act->loop_father->superloops)[depth];
 135
 136             src = LOOP_REPR (cloop);
 137           }
 138
 139         add_edge (g, src, dest)->data = e;
 140       }
 141
 142   /* Find the strongly connected components.  */
 143   graphds_scc (g, NULL);
 144
 145   /* Mark the irreducible loops.  */
 146   for (i = 0; i < g->n_vertices; i++)
 147     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 148       {
 149         edge real = (edge) ge->data;
 150         /* edge E in graph G is irreducible if it connects two vertices in the
 151            same scc.  */
 152
 153         /* All edges should lead from a component with higher number to the
 154            one with lower one.  */
 155         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 156
 157         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 158           continue;
 159
 160         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 161         irred_loop_found = true;
 162         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 163           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 164       }
 165
 166   free_graph (g);
 167
 168   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 169   return irred_loop_found;
 170 }
 171
 172 /* Counts number of insns inside LOOP.  */
 173 int
 174 num_loop_insns (const struct loop *loop)
 175 {
 176   basic_block *bbs, bb;
 177   unsigned i, ninsns = 0;
 178   rtx_insn *insn;
 179
 180   bbs = get_loop_body (loop);
 181   for (i = 0; i < loop->num_nodes; i++)
 182     {
 183       bb = bbs[i];
 184       FOR_BB_INSNS (bb, insn)
 185         if (NONDEBUG_INSN_P (insn))
 186           ninsns++;
 187     }
 188   free (bbs);
 189
 190   if (!ninsns)
 191     ninsns = 1; /* To avoid division by zero.  */
 192
 193   return ninsns;
 194 }
 195
 196 /* Counts number of insns executed on average per iteration LOOP.  */
 197 int
 198 average_num_loop_insns (const struct loop *loop)
 199 {
 200   basic_block *bbs, bb;
 201   unsigned i, binsns, ninsns, ratio;
 202   rtx_insn *insn;
 203
 204   ninsns = 0;
 205   bbs = get_loop_body (loop);
 206   for (i = 0; i < loop->num_nodes; i++)
 207     {
 208       bb = bbs[i];
 209
 210       binsns = 0;
 211       FOR_BB_INSNS (bb, insn)
 212         if (NONDEBUG_INSN_P (insn))
 213           binsns++;
 214
 215       ratio = loop->header->frequency == 0
 216               ? BB_FREQ_MAX
 217               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 218       ninsns += binsns * ratio;
 219     }
 220   free (bbs);
 221
 222   ninsns /= BB_FREQ_MAX;
 223   if (!ninsns)
 224     ninsns = 1; /* To avoid division by zero.  */
 225
 226   return ninsns;
 227 }
 228
 229 /* Returns expected number of iterations of LOOP, according to
 230    measured or guessed profile.  No bounding is done on the
 231    value.  */
 232
 233 gcov_type
 234 expected_loop_iterations_unbounded (struct loop *loop)
 235 {
 236   edge e;
 237   edge_iterator ei;
 238   gcov_type expected;
 239
 240
 241   /* Average loop rolls about 3 times. If we have no profile at all, it is
 242      best we can do.  */
 243   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
 244     expected = 3;
 245   else if (loop->latch->count || loop->header->count)
 246     {
 247       gcov_type count_in, count_latch;
 248
 249       count_in = 0;
 250       count_latch = 0;
 251
 252       FOR_EACH_EDGE (e, ei, loop->header->preds)
 253         if (e->src == loop->latch)
 254           count_latch = e->count;
 255         else
 256           count_in += e->count;
 257
 258       if (count_in == 0)
 259         expected = count_latch * 2;
 260       else
 261         expected = (count_latch + count_in - 1) / count_in;
 262     }
 263   else
 264     {
 265       int freq_in, freq_latch;
 266
 267       freq_in = 0;
 268       freq_latch = 0;
 269
 270       FOR_EACH_EDGE (e, ei, loop->header->preds)
 271         if (e->src == loop->latch)
 272           freq_latch = EDGE_FREQUENCY (e);
 273         else
 274           freq_in += EDGE_FREQUENCY (e);
 275
 276       if (freq_in == 0)
 277         {
 278           /* If we have no profile at all, expect 3 iterations.  */
 279           if (!freq_latch)
 280             expected = 3;
 281           else
 282             expected = freq_latch * 2;
 283         }
 284       else
 285         expected = (freq_latch + freq_in - 1) / freq_in;
 286     }
 287
 288   HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
 289   if (max != -1 && max < expected)
 290     return max;
 291   return expected;
 292 }
 293
 294 /* Returns expected number of LOOP iterations.  The returned value is bounded
 295    by REG_BR_PROB_BASE.  */
 296
 297 unsigned
 298 expected_loop_iterations (struct loop *loop)
 299 {
 300   gcov_type expected = expected_loop_iterations_unbounded (loop);
 301   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 302 }
 303
 304 /* Returns the maximum level of nesting of subloops of LOOP.  */
 305
 306 unsigned
 307 get_loop_level (const struct loop *loop)
 308 {
 309   const struct loop *ploop;
 310   unsigned mx = 0, l;
 311
 312   for (ploop = loop->inner; ploop; ploop = ploop->next)
 313     {
 314       l = get_loop_level (ploop);
 315       if (l >= mx)
 316         mx = l + 1;
 317     }
 318   return mx;
 319 }
 320
 321 /* Initialize the constants for computing set costs.  */
 322
 323 void
 324 init_set_costs (void)
 325 {
 326   int speed;
 327   rtx_insn *seq;
 328   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 329   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 330   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 331   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 332   unsigned i;
 333
 334   target_avail_regs = 0;
 335   target_clobbered_regs = 0;
 336   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 337     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 338         && !fixed_regs[i])
 339       {
 340         target_avail_regs++;
 341         if (call_used_regs[i])
 342           target_clobbered_regs++;
 343       }
 344
 345   target_res_regs = 3;
 346
 347   for (speed = 0; speed < 2; speed++)
 348      {
 349       crtl->maybe_hot_insn_p = speed;
 350       /* Set up the costs for using extra registers:
 351
 352          1) If not many free registers remain, we should prefer having an
 353             additional move to decreasing the number of available registers.
 354             (TARGET_REG_COST).
 355          2) If no registers are available, we need to spill, which may require
 356             storing the old value to memory and loading it back
 357             (TARGET_SPILL_COST).  */
 358
 359       start_sequence ();
 360       emit_move_insn (reg1, reg2);
 361       seq = get_insns ();
 362       end_sequence ();
 363       target_reg_cost [speed] = seq_cost (seq, speed);
 364
 365       start_sequence ();
 366       emit_move_insn (mem, reg1);
 367       emit_move_insn (reg2, mem);
 368       seq = get_insns ();
 369       end_sequence ();
 370       target_spill_cost [speed] = seq_cost (seq, speed);
 371     }
 372   default_rtl_profile ();
 373 }
 374
 375 /* Estimates cost of increased register pressure caused by making N_NEW new
 376    registers live around the loop.  N_OLD is the number of registers live
 377    around the loop.  If CALL_P is true, also take into account that
 378    call-used registers may be clobbered in the loop body, reducing the
 379    number of available registers before we spill.  */
 380
 381 unsigned
 382 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 383                             bool call_p)
 384 {
 385   unsigned cost;
 386   unsigned regs_needed = n_new + n_old;
 387   unsigned available_regs = target_avail_regs;
 388
 389   /* If there is a call in the loop body, the call-clobbered registers
 390      are not available for loop invariants.  */
 391   if (call_p)
 392     available_regs = available_regs - target_clobbered_regs;
 393
 394   /* If we have enough registers, we should use them and not restrict
 395      the transformations unnecessarily.  */
 396   if (regs_needed + target_res_regs <= available_regs)
 397     return 0;
 398
 399   if (regs_needed <= available_regs)
 400     /* If we are close to running out of registers, try to preserve
 401        them.  */
 402     cost = target_reg_cost [speed] * n_new;
 403   else
 404     /* If we run out of registers, it is very expensive to add another
 405        one.  */
 406     cost = target_spill_cost [speed] * n_new;
 407
 408   if (optimize && (flag_ira_region == IRA_REGION_ALL
 409                    || flag_ira_region == IRA_REGION_MIXED)
 410       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 411     /* IRA regional allocation deals with high register pressure
 412        better.  So decrease the cost (to do more accurate the cost
 413        calculation for IRA, we need to know how many registers lives
 414        through the loop transparently).  */
 415     cost /= 2;
 416
 417   return cost;
 418 }
 419
 420 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 421
 422 void
 423 mark_loop_exit_edges (void)
 424 {
 425   basic_block bb;
 426   edge e;
 427
 428   if (number_of_loops (cfun) <= 1)
 429     return;
 430
 431   FOR_EACH_BB_FN (bb, cfun)
 432     {
 433       edge_iterator ei;
 434
 435       FOR_EACH_EDGE (e, ei, bb->succs)
 436         {
 437           if (loop_outer (bb->loop_father)
 438               && loop_exit_edge_p (bb->loop_father, e))
 439             e->flags |= EDGE_LOOP_EXIT;
 440           else
 441             e->flags &= ~EDGE_LOOP_EXIT;
 442         }
 443     }
 444 }
 445
 446 /* Return exit edge if loop has only one exit that is likely
 447    to be executed on runtime (i.e. it is not EH or leading
 448    to noreturn call.  */
 449
 450 edge
 451 single_likely_exit (struct loop *loop)
 452 {
 453   edge found = single_exit (loop);
 454   vec<edge> exits;
 455   unsigned i;
 456   edge ex;
 457
 458   if (found)
 459     return found;
 460   exits = get_loop_exit_edges (loop);
 461   FOR_EACH_VEC_ELT (exits, i, ex)
 462     {
 463       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 464         continue;
 465       /* The constant of 5 is set in a way so noreturn calls are
 466          ruled out by this test.  The static branch prediction algorithm
 467          will not assign such a low probability to conditionals for usual
 468          reasons.  */
 469       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 470           && ex->probability < 5 && !ex->count)
 471         continue;
 472       if (!found)
 473         found = ex;
 474       else
 475         {
 476           exits.release ();
 477           return NULL;
 478         }
 479     }
 480   exits.release ();
 481   return found;
 482 }
 483
 484
 485 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 486    order against direction of edges from latch.  Specially, if
 487    header != latch, latch is the 1-st block.  */
 488
 489 vec<basic_block>
 490 get_loop_hot_path (const struct loop *loop)
 491 {
 492   basic_block bb = loop->header;
 493   vec<basic_block> path = vNULL;
 494   bitmap visited = BITMAP_ALLOC (NULL);
 495
 496   while (true)
 497     {
 498       edge_iterator ei;
 499       edge e;
 500       edge best = NULL;
 501
 502       path.safe_push (bb);
 503       bitmap_set_bit (visited, bb->index);
 504       FOR_EACH_EDGE (e, ei, bb->succs)
 505         if ((!best || e->probability > best->probability)
 506             && !loop_exit_edge_p (loop, e)
 507             && !bitmap_bit_p (visited, e->dest->index))
 508           best = e;
 509       if (!best || best->dest == loop->header)
 510         break;
 511       bb = best->dest;
 512     }
 513   BITMAP_FREE (visited);
 514   return path;
 515 }