gcc/cfgloopanal.c

   1 /* Natural loop analysis code for GNU compiler.
   2    Copyright (C) 2002-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it under
   7 the terms of the GNU General Public License as published by the Free
   8 Software Foundation; either version 3, or (at your option) any later
   9 version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "predict.h"
  25 #include "rtl.h"
  26 #include "cfgloop.h"
  27 #include "tree.h"
  28 #include "flags.h"
  29 #include "alias.h"
  30 #include "insn-config.h"
  31 #include "expmed.h"
  32 #include "dojump.h"
  33 #include "explow.h"
  34 #include "calls.h"
  35 #include "emit-rtl.h"
  36 #include "varasm.h"
  37 #include "stmt.h"
  38 #include "expr.h"
  39 #include "graphds.h"
  40 #include "params.h"
  41
  42 struct target_cfgloop default_target_cfgloop;
  43 #if SWITCHABLE_TARGET
  44 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
  45 #endif
  46
  47 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
  48
  49 bool
  50 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb)
  51 {
  52   /* It must be executed at least once each iteration.  */
  53   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
  54     return false;
  55
  56   /* And just once.  */
  57   if (bb->loop_father != loop)
  58     return false;
  59
  60   /* But this was not enough.  We might have some irreducible loop here.  */
  61   if (bb->flags & BB_IRREDUCIBLE_LOOP)
  62     return false;
  63
  64   return true;
  65 }
  66
  67 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
  68    throw away all latch edges and mark blocks inside any remaining cycle.
  69    Everything is a bit complicated due to fact we do not want to do this
  70    for parts of cycles that only "pass" through some loop -- i.e. for
  71    each cycle, we want to mark blocks that belong directly to innermost
  72    loop containing the whole cycle.
  73
  74    LOOPS is the loop tree.  */
  75
  76 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
  77 #define BB_REPR(BB) ((BB)->index + 1)
  78
  79 bool
  80 mark_irreducible_loops (void)
  81 {
  82   basic_block act;
  83   struct graph_edge *ge;
  84   edge e;
  85   edge_iterator ei;
  86   int src, dest;
  87   unsigned depth;
  88   struct graph *g;
  89   int num = number_of_loops (cfun);
  90   struct loop *cloop;
  91   bool irred_loop_found = false;
  92   int i;
  93
  94   gcc_assert (current_loops != NULL);
  95
  96   /* Reset the flags.  */
  97   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
  98                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
  99     {
 100       act->flags &= ~BB_IRREDUCIBLE_LOOP;
 101       FOR_EACH_EDGE (e, ei, act->succs)
 102         e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
 103     }
 104
 105   /* Create the edge lists.  */
 106   g = new_graph (last_basic_block_for_fn (cfun) + num);
 107
 108   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
 109                   EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
 110     FOR_EACH_EDGE (e, ei, act->succs)
 111       {
 112         /* Ignore edges to exit.  */
 113         if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
 114           continue;
 115
 116         src = BB_REPR (act);
 117         dest = BB_REPR (e->dest);
 118
 119         /* Ignore latch edges.  */
 120         if (e->dest->loop_father->header == e->dest
 121             && e->dest->loop_father->latch == act)
 122           continue;
 123
 124         /* Edges inside a single loop should be left where they are.  Edges
 125            to subloop headers should lead to representative of the subloop,
 126            but from the same place.
 127
 128            Edges exiting loops should lead from representative
 129            of the son of nearest common ancestor of the loops in that
 130            act lays.  */
 131
 132         if (e->dest->loop_father->header == e->dest)
 133           dest = LOOP_REPR (e->dest->loop_father);
 134
 135         if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
 136           {
 137             depth = 1 + loop_depth (find_common_loop (act->loop_father,
 138                                                       e->dest->loop_father));
 139             if (depth == loop_depth (act->loop_father))
 140               cloop = act->loop_father;
 141             else
 142               cloop = (*act->loop_father->superloops)[depth];
 143
 144             src = LOOP_REPR (cloop);
 145           }
 146
 147         add_edge (g, src, dest)->data = e;
 148       }
 149
 150   /* Find the strongly connected components.  */
 151   graphds_scc (g, NULL);
 152
 153   /* Mark the irreducible loops.  */
 154   for (i = 0; i < g->n_vertices; i++)
 155     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
 156       {
 157         edge real = (edge) ge->data;
 158         /* edge E in graph G is irreducible if it connects two vertices in the
 159            same scc.  */
 160
 161         /* All edges should lead from a component with higher number to the
 162            one with lower one.  */
 163         gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
 164
 165         if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
 166           continue;
 167
 168         real->flags |= EDGE_IRREDUCIBLE_LOOP;
 169         irred_loop_found = true;
 170         if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
 171           real->src->flags |= BB_IRREDUCIBLE_LOOP;
 172       }
 173
 174   free_graph (g);
 175
 176   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
 177   return irred_loop_found;
 178 }
 179
 180 /* Counts number of insns inside LOOP.  */
 181 int
 182 num_loop_insns (const struct loop *loop)
 183 {
 184   basic_block *bbs, bb;
 185   unsigned i, ninsns = 0;
 186   rtx_insn *insn;
 187
 188   bbs = get_loop_body (loop);
 189   for (i = 0; i < loop->num_nodes; i++)
 190     {
 191       bb = bbs[i];
 192       FOR_BB_INSNS (bb, insn)
 193         if (NONDEBUG_INSN_P (insn))
 194           ninsns++;
 195     }
 196   free (bbs);
 197
 198   if (!ninsns)
 199     ninsns = 1; /* To avoid division by zero.  */
 200
 201   return ninsns;
 202 }
 203
 204 /* Counts number of insns executed on average per iteration LOOP.  */
 205 int
 206 average_num_loop_insns (const struct loop *loop)
 207 {
 208   basic_block *bbs, bb;
 209   unsigned i, binsns, ninsns, ratio;
 210   rtx_insn *insn;
 211
 212   ninsns = 0;
 213   bbs = get_loop_body (loop);
 214   for (i = 0; i < loop->num_nodes; i++)
 215     {
 216       bb = bbs[i];
 217
 218       binsns = 0;
 219       FOR_BB_INSNS (bb, insn)
 220         if (NONDEBUG_INSN_P (insn))
 221           binsns++;
 222
 223       ratio = loop->header->frequency == 0
 224               ? BB_FREQ_MAX
 225               : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency;
 226       ninsns += binsns * ratio;
 227     }
 228   free (bbs);
 229
 230   ninsns /= BB_FREQ_MAX;
 231   if (!ninsns)
 232     ninsns = 1; /* To avoid division by zero.  */
 233
 234   return ninsns;
 235 }
 236
 237 /* Returns expected number of iterations of LOOP, according to
 238    measured or guessed profile.  No bounding is done on the
 239    value.  */
 240
 241 gcov_type
 242 expected_loop_iterations_unbounded (const struct loop *loop)
 243 {
 244   edge e;
 245   edge_iterator ei;
 246
 247   if (loop->latch->count || loop->header->count)
 248     {
 249       gcov_type count_in, count_latch, expected;
 250
 251       count_in = 0;
 252       count_latch = 0;
 253
 254       FOR_EACH_EDGE (e, ei, loop->header->preds)
 255         if (e->src == loop->latch)
 256           count_latch = e->count;
 257         else
 258           count_in += e->count;
 259
 260       if (count_in == 0)
 261         expected = count_latch * 2;
 262       else
 263         expected = (count_latch + count_in - 1) / count_in;
 264
 265       return expected;
 266     }
 267   else
 268     {
 269       int freq_in, freq_latch;
 270
 271       freq_in = 0;
 272       freq_latch = 0;
 273
 274       FOR_EACH_EDGE (e, ei, loop->header->preds)
 275         if (e->src == loop->latch)
 276           freq_latch = EDGE_FREQUENCY (e);
 277         else
 278           freq_in += EDGE_FREQUENCY (e);
 279
 280       if (freq_in == 0)
 281         return freq_latch * 2;
 282
 283       return (freq_latch + freq_in - 1) / freq_in;
 284     }
 285 }
 286
 287 /* Returns expected number of LOOP iterations.  The returned value is bounded
 288    by REG_BR_PROB_BASE.  */
 289
 290 unsigned
 291 expected_loop_iterations (const struct loop *loop)
 292 {
 293   gcov_type expected = expected_loop_iterations_unbounded (loop);
 294   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
 295 }
 296
 297 /* Returns the maximum level of nesting of subloops of LOOP.  */
 298
 299 unsigned
 300 get_loop_level (const struct loop *loop)
 301 {
 302   const struct loop *ploop;
 303   unsigned mx = 0, l;
 304
 305   for (ploop = loop->inner; ploop; ploop = ploop->next)
 306     {
 307       l = get_loop_level (ploop);
 308       if (l >= mx)
 309         mx = l + 1;
 310     }
 311   return mx;
 312 }
 313
 314 /* Initialize the constants for computing set costs.  */
 315
 316 void
 317 init_set_costs (void)
 318 {
 319   int speed;
 320   rtx_insn *seq;
 321   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
 322   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
 323   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
 324   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
 325   unsigned i;
 326
 327   target_avail_regs = 0;
 328   target_clobbered_regs = 0;
 329   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 330     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
 331         && !fixed_regs[i])
 332       {
 333         target_avail_regs++;
 334         if (call_used_regs[i])
 335           target_clobbered_regs++;
 336       }
 337
 338   target_res_regs = 3;
 339
 340   for (speed = 0; speed < 2; speed++)
 341      {
 342       crtl->maybe_hot_insn_p = speed;
 343       /* Set up the costs for using extra registers:
 344
 345          1) If not many free registers remain, we should prefer having an
 346             additional move to decreasing the number of available registers.
 347             (TARGET_REG_COST).
 348          2) If no registers are available, we need to spill, which may require
 349             storing the old value to memory and loading it back
 350             (TARGET_SPILL_COST).  */
 351
 352       start_sequence ();
 353       emit_move_insn (reg1, reg2);
 354       seq = get_insns ();
 355       end_sequence ();
 356       target_reg_cost [speed] = seq_cost (seq, speed);
 357
 358       start_sequence ();
 359       emit_move_insn (mem, reg1);
 360       emit_move_insn (reg2, mem);
 361       seq = get_insns ();
 362       end_sequence ();
 363       target_spill_cost [speed] = seq_cost (seq, speed);
 364     }
 365   default_rtl_profile ();
 366 }
 367
 368 /* Estimates cost of increased register pressure caused by making N_NEW new
 369    registers live around the loop.  N_OLD is the number of registers live
 370    around the loop.  If CALL_P is true, also take into account that
 371    call-used registers may be clobbered in the loop body, reducing the
 372    number of available registers before we spill.  */
 373
 374 unsigned
 375 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
 376                             bool call_p)
 377 {
 378   unsigned cost;
 379   unsigned regs_needed = n_new + n_old;
 380   unsigned available_regs = target_avail_regs;
 381
 382   /* If there is a call in the loop body, the call-clobbered registers
 383      are not available for loop invariants.  */
 384   if (call_p)
 385     available_regs = available_regs - target_clobbered_regs;
 386
 387   /* If we have enough registers, we should use them and not restrict
 388      the transformations unnecessarily.  */
 389   if (regs_needed + target_res_regs <= available_regs)
 390     return 0;
 391
 392   if (regs_needed <= available_regs)
 393     /* If we are close to running out of registers, try to preserve
 394        them.  */
 395     cost = target_reg_cost [speed] * n_new;
 396   else
 397     /* If we run out of registers, it is very expensive to add another
 398        one.  */
 399     cost = target_spill_cost [speed] * n_new;
 400
 401   if (optimize && (flag_ira_region == IRA_REGION_ALL
 402                    || flag_ira_region == IRA_REGION_MIXED)
 403       && number_of_loops (cfun) <= (unsigned) IRA_MAX_LOOPS_NUM)
 404     /* IRA regional allocation deals with high register pressure
 405        better.  So decrease the cost (to do more accurate the cost
 406        calculation for IRA, we need to know how many registers lives
 407        through the loop transparently).  */
 408     cost /= 2;
 409
 410   return cost;
 411 }
 412
 413 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
 414
 415 void
 416 mark_loop_exit_edges (void)
 417 {
 418   basic_block bb;
 419   edge e;
 420
 421   if (number_of_loops (cfun) <= 1)
 422     return;
 423
 424   FOR_EACH_BB_FN (bb, cfun)
 425     {
 426       edge_iterator ei;
 427
 428       FOR_EACH_EDGE (e, ei, bb->succs)
 429         {
 430           if (loop_outer (bb->loop_father)
 431               && loop_exit_edge_p (bb->loop_father, e))
 432             e->flags |= EDGE_LOOP_EXIT;
 433           else
 434             e->flags &= ~EDGE_LOOP_EXIT;
 435         }
 436     }
 437 }
 438
 439 /* Return exit edge if loop has only one exit that is likely
 440    to be executed on runtime (i.e. it is not EH or leading
 441    to noreturn call.  */
 442
 443 edge
 444 single_likely_exit (struct loop *loop)
 445 {
 446   edge found = single_exit (loop);
 447   vec<edge> exits;
 448   unsigned i;
 449   edge ex;
 450
 451   if (found)
 452     return found;
 453   exits = get_loop_exit_edges (loop);
 454   FOR_EACH_VEC_ELT (exits, i, ex)
 455     {
 456       if (ex->flags & (EDGE_EH | EDGE_ABNORMAL_CALL))
 457         continue;
 458       /* The constant of 5 is set in a way so noreturn calls are
 459          ruled out by this test.  The static branch prediction algorithm
 460          will not assign such a low probability to conditionals for usual
 461          reasons.  */
 462       if (profile_status_for_fn (cfun) != PROFILE_ABSENT
 463           && ex->probability < 5 && !ex->count)
 464         continue;
 465       if (!found)
 466         found = ex;
 467       else
 468         {
 469           exits.release ();
 470           return NULL;
 471         }
 472     }
 473   exits.release ();
 474   return found;
 475 }
 476
 477
 478 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
 479    order against direction of edges from latch.  Specially, if
 480    header != latch, latch is the 1-st block.  */
 481
 482 vec<basic_block>
 483 get_loop_hot_path (const struct loop *loop)
 484 {
 485   basic_block bb = loop->header;
 486   vec<basic_block> path = vNULL;
 487   bitmap visited = BITMAP_ALLOC (NULL);
 488
 489   while (true)
 490     {
 491       edge_iterator ei;
 492       edge e;
 493       edge best = NULL;
 494
 495       path.safe_push (bb);
 496       bitmap_set_bit (visited, bb->index);
 497       FOR_EACH_EDGE (e, ei, bb->succs)
 498         if ((!best || e->probability > best->probability)
 499             && !loop_exit_edge_p (loop, e)
 500             && !bitmap_bit_p (visited, e->dest->index))
 501           best = e;
 502       if (!best || best->dest == loop->header)
 503         break;
 504       bb = best->dest;
 505     }
 506   BITMAP_FREE (visited);
 507   return path;
 508 }