gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tm.h"
  24 #include "rtl.h"
  25 #include "hard-reg-set.h"
  26 #include "obstack.h"
  27 #include "predict.h"
  28 #include "vec.h"
  29 #include "hashtab.h"
  30 #include "hash-set.h"
  31 #include "machmode.h"
  32 #include "input.h"
  33 #include "function.h"
  34 #include "dominance.h"
  35 #include "cfg.h"
  36 #include "basic-block.h"
  37 #include "cfgloop.h"
  38 #include "symtab.h"
  39 #include "expr.h"
  40 #include "graphds.h"
  41 #include "params.h"
  42
  43 struct target_cfgloop default_target_cfgloop;
  44 #if SWITCHABLE_TARGET
  45 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  46 #endif
  47
  48 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  49
  50 bool
  51 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  52 {
  53   /* It must be executed at least once each iteration.  */
  54   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  55     return false;
  56
  57   /* And just once.  */
  58   if (bb->loop_father != loop)
  59     return false;
  60
  61   /* But this was not enough.  We might have some irreducible loop here.  */
  62   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  63     return false;
  64
  65   return true;
  66 }
  67
  68 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  69    throw away all latch edges and mark blocks inside any remaining cycle.
  70    Everything is a bit complicated due to fact we do not want to do this
  71    for parts of cycles that only "pass" through some loop -- i.e. for
  72    each cycle, we want to mark blocks that belong directly to innermost
  73    loop containing the whole cycle.
  74
  75    LOOPS is the loop tree.  */
  76
  77 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  78 #define BB_REPR(BB) ((BB)->index + 1)
  79
  80 bool
  81 mark_irreducible_loops (void)
  82 {
  83   basic_block act;
  84   struct graph_edge *ge;
  85   edge e;
  86   edge_iterator ei;
  87   int src, dest;
  88   unsigned depth;
  89   struct graph *g;
  90   int num = number_of_loops (cfun);
  91   struct loop *cloop;
  92   bool irred_loop_found = false;
  93   int i;
  94
  95   gcc_assert (current_loops != NULL);
  96
  97   /* Reset the flags.  */
  98   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  99                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 100     {
 101       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 102       FOR_EACH_EDGE (e, ei, act->succs)
 103         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 104     }
 105
 106   /* Create the edge lists.  */
 107   g = new_graph (last_basic_block_for_fn (cfun) + num);
 108
 109   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 110                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 111     FOR_EACH_EDGE (e, ei, act->succs)
 112       {
 113         /* Ignore edges to exit.  */
 114         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 115           continue;
 116
 117         src = BB_REPR (act);
 118         dest = BB_REPR (e->dest);
 119
 120         /* Ignore latch edges.  */
 121         if (e->dest->loop_father->header == e->dest
 122             && e->dest->loop_father->latch == act)
 123           continue;
 124
 125         /* Edges inside a single loop should be left where they are.  Edges
 126            to subloop headers should lead to representative of the subloop,
 127            but from the same place.
 128
 129            Edges exiting loops should lead from representative
 130            of the son of nearest common ancestor of the loops in that
 131            act lays.  */
 132
 133         if (e->dest->loop_father->header == e->dest)
 134           dest = LOOP_REPR (e->dest->loop_father);
 135
 136         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 137           {
 138             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 139                                                       e->dest->loop_father));
 140             if (depth == loop_depth (act->loop_father))
 141               cloop = act->loop_father;
 142             else
 143               cloop = (*act->loop_father->superloops)[depth];
 144
 145             src = LOOP_REPR (cloop);
 146           }
 147
 148         add_edge (g, src, dest)->data = e;
 149       }
 150
 151   /* Find the strongly connected components.  */
 152   graphds_scc (g, NULL);
 153
 154   /* Mark the irreducible loops.  */
 155   for (i = 0; i < g->n_vertices; i++)
 156     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 157       {
 158         edge real = (edge) ge->data;
 159         /* edge E in graph G is irreducible if it connects two vertices in the
 160            same scc.  */
 161
 162         /* All edges should lead from a component with higher number to the
 163            one with lower one.  */
 164         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 165
 166         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 167           continue;
 168
 169         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 170         irred_loop_found = true;
 171         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 172           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 173       }
 174
 175   free_graph (g);
 176
 177   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 178   return irred_loop_found;
 179 }
 180
 181 /* Counts number of insns inside LOOP.  */
 182 int
 183 num_loop_insns (const struct loop *loop)
 184 {
 185   basic_block *bbs, bb;
 186   unsigned i, ninsns = 0;
 187   rtx_insn *insn;
 188
 189   bbs = get_loop_body (loop);
 190   for (i = 0; i < loop->num_nodes; i++)
 191     {
 192       bb = bbs[i];
 193       FOR_BB_INSNS (bb, insn)
 194         if (NONDEBUG_INSN_P (insn))
 195           ninsns++;
 196     }
 197   free (bbs);
 198
 199   if (!ninsns)
 200     ninsns = 1; /* To avoid division by zero.  */
 201
 202   return ninsns;
 203 }
 204
 205 /* Counts number of insns executed on average per iteration LOOP.  */
 206 int
 207 average_num_loop_insns (const struct loop *loop)
 208 {
 209   basic_block *bbs, bb;
 210   unsigned i, binsns, ninsns, ratio;
 211   rtx_insn *insn;
 212
 213   ninsns = 0;
 214   bbs = get_loop_body (loop);
 215   for (i = 0; i < loop->num_nodes; i++)
 216     {
 217       bb = bbs[i];
 218
 219       binsns = 0;
 220       FOR_BB_INSNS (bb, insn)
 221         if (NONDEBUG_INSN_P (insn))
 222           binsns++;
 223
 224       ratio = loop->header->frequency == 0
 225               ? BB_FREQ_MAX
 226               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 227       ninsns += binsns * ratio;
 228     }
 229   free (bbs);
 230
 231   ninsns /= BB_FREQ_MAX;
 232   if (!ninsns)
 233     ninsns = 1; /* To avoid division by zero.  */
 234
 235   return ninsns;
 236 }
 237
 238 /* Returns expected number of iterations of LOOP, according to
 239    measured or guessed profile.  No bounding is done on the
 240    value.  */
 241
 242 gcov_type
 243 expected_loop_iterations_unbounded (const struct loop *loop)
 244 {
 245   edge e;
 246   edge_iterator ei;
 247
 248   if (loop->latch->count || loop->header->count)
 249     {
 250       gcov_type count_in, count_latch, expected;
 251
 252       count_in = 0;
 253       count_latch = 0;
 254
 255       FOR_EACH_EDGE (e, ei, loop->header->preds)
 256         if (e->src == loop->latch)
 257           count_latch = e->count;
 258         else
 259           count_in += e->count;
 260
 261       if (count_in == 0)
 262         expected = count_latch * 2;
 263       else
 264         expected = (count_latch + count_in - 1) / count_in;
 265
 266       return expected;
 267     }
 268   else
 269     {
 270       int freq_in, freq_latch;
 271
 272       freq_in = 0;
 273       freq_latch = 0;
 274
 275       FOR_EACH_EDGE (e, ei, loop->header->preds)
 276         if (e->src == loop->latch)
 277           freq_latch = EDGE_FREQUENCY (e);
 278         else
 279           freq_in += EDGE_FREQUENCY (e);
 280
 281       if (freq_in == 0)
 282         return freq_latch * 2;
 283
 284       return (freq_latch + freq_in - 1) / freq_in;
 285     }
 286 }
 287
 288 /* Returns expected number of LOOP iterations.  The returned value is bounded
 289    by REG_BR_PROB_BASE.  */
 290
 291 unsigned
 292 expected_loop_iterations (const struct loop *loop)
 293 {
 294   gcov_type expected = expected_loop_iterations_unbounded (loop);
 295   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 296 }
 297
 298 /* Returns the maximum level of nesting of subloops of LOOP.  */
 299
 300 unsigned
 301 get_loop_level (const struct loop *loop)
 302 {
 303   const struct loop *ploop;
 304   unsigned mx = 0, l;
 305
 306   for (ploop = loop->inner; ploop; ploop = ploop->next)
 307     {
 308       l = get_loop_level (ploop);
 309       if (l >= mx)
 310         mx = l + 1;
 311     }
 312   return mx;
 313 }
 314
 315 /* Initialize the constants for computing set costs.  */
 316
 317 void
 318 init_set_costs (void)
 319 {
 320   int speed;
 321   rtx_insn *seq;
 322   rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER);
 323   rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1);
 324   rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2);
 325   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 326   unsigned i;
 327
 328   target_avail_regs = 0;
 329   target_clobbered_regs = 0;
 330   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 331     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 332         && !fixed_regs[i])
 333       {
 334         target_avail_regs++;
 335         if (call_used_regs[i])
 336           target_clobbered_regs++;
 337       }
 338
 339   target_res_regs = 3;
 340
 341   for (speed = 0; speed < 2; speed++)
 342      {
 343       crtl->maybe_hot_insn_p = speed;
 344       /* Set up the costs for using extra registers:
 345
 346          1) If not many free registers remain, we should prefer having an
 347             additional move to decreasing the number of available registers.
 348             (TARGET_REG_COST).
 349          2) If no registers are available, we need to spill, which may require
 350             storing the old value to memory and loading it back
 351             (TARGET_SPILL_COST).  */
 352
 353       start_sequence ();
 354       emit_move_insn (reg1, reg2);
 355       seq = get_insns ();
 356       end_sequence ();
 357       target_reg_cost [speed] = seq_cost (seq, speed);
 358
 359       start_sequence ();
 360       emit_move_insn (mem, reg1);
 361       emit_move_insn (reg2, mem);
 362       seq = get_insns ();
 363       end_sequence ();
 364       target_spill_cost [speed] = seq_cost (seq, speed);
 365     }
 366   default_rtl_profile ();
 367 }
 368
 369 /* Estimates cost of increased register pressure caused by making N_NEW new
 370    registers live around the loop.  N_OLD is the number of registers live
 371    around the loop.  If CALL_P is true, also take into account that
 372    call-used registers may be clobbered in the loop body, reducing the
 373    number of available registers before we spill.  */
 374
 375 unsigned
 376 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 377                             bool call_p)
 378 {
 379   unsigned cost;
 380   unsigned regs_needed = n_new + n_old;
 381   unsigned available_regs = target_avail_regs;
 382
 383   /* If there is a call in the loop body, the call-clobbered registers
 384      are not available for loop invariants.  */
 385   if (call_p)
 386     available_regs = available_regs - target_clobbered_regs;
 387
 388   /* If we have enough registers, we should use them and not restrict
 389      the transformations unnecessarily.  */
 390   if (regs_needed + target_res_regs <= available_regs)
 391     return 0;
 392
 393   if (regs_needed <= available_regs)
 394     /* If we are close to running out of registers, try to preserve
 395        them.  */
 396     cost = target_reg_cost [speed] * n_new;
 397   else
 398     /* If we run out of registers, it is very expensive to add another
 399        one.  */
 400     cost = target_spill_cost [speed] * n_new;
 401
 402   if (optimize && (flag_ira_region == IRA_REGION_ALL
 403                    || flag_ira_region == IRA_REGION_MIXED)
 404       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 405     /* IRA regional allocation deals with high register pressure
 406        better.  So decrease the cost (to do more accurate the cost
 407        calculation for IRA, we need to know how many registers lives
 408        through the loop transparently).  */
 409     cost /= 2;
 410
 411   return cost;
 412 }
 413
 414 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 415
 416 void
 417 mark_loop_exit_edges (void)
 418 {
 419   basic_block bb;
 420   edge e;
 421
 422   if (number_of_loops (cfun) <= 1)
 423     return;
 424
 425   FOR_EACH_BB_FN (bb, cfun)
 426     {
 427       edge_iterator ei;
 428
 429       FOR_EACH_EDGE (e, ei, bb->succs)
 430         {
 431           if (loop_outer (bb->loop_father)
 432               && loop_exit_edge_p (bb->loop_father, e))
 433             e->flags |= EDGE_LOOP_EXIT;
 434           else
 435             e->flags &= ~EDGE_LOOP_EXIT;
 436         }
 437     }
 438 }
 439
 440 /* Return exit edge if loop has only one exit that is likely
 441    to be executed on runtime (i.e. it is not EH or leading
 442    to noreturn call.  */
 443
 444 edge
 445 single_likely_exit (struct loop *loop)
 446 {
 447   edge found = single_exit (loop);
 448   vec<edge> exits;
 449   unsigned i;
 450   edge ex;
 451
 452   if (found)
 453     return found;
 454   exits = get_loop_exit_edges (loop);
 455   FOR_EACH_VEC_ELT (exits, i, ex)
 456     {
 457       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 458         continue;
 459       /* The constant of 5 is set in a way so noreturn calls are
 460          ruled out by this test.  The static branch prediction algorithm
 461          will not assign such a low probability to conditionals for usual
 462          reasons.  */
 463       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 464           && ex->probability < 5 && !ex->count)
 465         continue;
 466       if (!found)
 467         found = ex;
 468       else
 469         {
 470           exits.release ();
 471           return NULL;
 472         }
 473     }
 474   exits.release ();
 475   return found;
 476 }
 477
 478
 479 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 480    order against direction of edges from latch.  Specially, if
 481    header != latch, latch is the 1-st block.  */
 482
 483 vec<basic_block>
 484 get_loop_hot_path (const struct loop *loop)
 485 {
 486   basic_block bb = loop->header;
 487   vec<basic_block> path = vNULL;
 488   bitmap visited = BITMAP_ALLOC (NULL);
 489
 490   while (true)
 491     {
 492       edge_iterator ei;
 493       edge e;
 494       edge best = NULL;
 495
 496       path.safe_push (bb);
 497       bitmap_set_bit (visited, bb->index);
 498       FOR_EACH_EDGE (e, ei, bb->succs)
 499         if ((!best || e->probability > best->probability)
 500             && !loop_exit_edge_p (loop, e)
 501             && !bitmap_bit_p (visited, e->dest->index))
 502           best = e;
 503       if (!best || best->dest == loop->header)
 504         break;
 505       bb = best->dest;
 506     }
 507   BITMAP_FREE (visited);
 508   return path;
 509 }