gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     We separate inlining decisions from the inliner itself and store it
  25     inside callgraph as so called inline plan.  Refer to cgraph.c
  26     documentation about particular representation of inline plans in the
  27     callgraph.
  28
  29     There are three major parts of this file:
  30
  31     cgraph_mark_inline implementation
  32
  33       This function allows to mark given call inline and performs necessary
  34       modifications of cgraph (production of the clones and updating overall
  35       statistics)
  36
  37     inlining heuristics limits
  38
  39       These functions allow to check that particular inlining is allowed
  40       by the limits specified by user (allowed function growth, overall unit
  41       growth and so on).
  42
  43     inlining heuristics
  44
  45       This is implementation of IPA pass aiming to get as much of benefit
  46       from inlining obeying the limits checked above.
  47
  48       The implementation of particular heuristics is separated from
  49       the rest of code to make it easier to replace it with more complicated
  50       implementation in the future.  The rest of inlining code acts as a
  51       library aimed to modify the callgraph and verify that the parameters
  52       on code size growth fits.
  53
  54       To mark given call inline, use cgraph_mark_inline function, the
  55       verification is performed by cgraph_default_inline_p and
  56       cgraph_check_inline_limits.
  57
  58       The heuristics implements simple knapsack style algorithm ordering
  59       all functions by their "profitability" (estimated by code size growth)
  60       and inlining them in priority order.
  61
  62       cgraph_decide_inlining implements heuristics taking whole callgraph
  63       into account, while cgraph_decide_inlining_incrementally considers
  64       only one function at a time and is used in non-unit-at-a-time mode.  */
  65
  66 #include "config.h"
  67 #include "system.h"
  68 #include "coretypes.h"
  69 #include "tm.h"
  70 #include "tree.h"
  71 #include "tree-inline.h"
  72 #include "langhooks.h"
  73 #include "flags.h"
  74 #include "cgraph.h"
  75 #include "diagnostic.h"
  76 #include "timevar.h"
  77 #include "params.h"
  78 #include "fibheap.h"
  79 #include "intl.h"
  80 #include "tree-pass.h"
  81 #include "hashtab.h"
  82 #include "coverage.h"
  83 #include "ggc.h"
  84
  85 /* Statistics we collect about inlining algorithm.  */
  86 static int ncalls_inlined;
  87 static int nfunctions_inlined;
  88 static int initial_insns;
  89 static int overall_insns;
  90 static int max_insns;
  91 static gcov_type max_count;
  92
  93 /* Estimate size of the function after inlining WHAT into TO.  */
  94
  95 static int
  96 cgraph_estimate_size_after_inlining (int times, struct cgraph_node *to,
  97                                      struct cgraph_node *what)
  98 {
  99   int size;
 100   tree fndecl = what->decl, arg;
 101   int call_insns = PARAM_VALUE (PARAM_INLINE_CALL_COST);
 102
 103   for (arg = DECL_ARGUMENTS (fndecl); arg; arg = TREE_CHAIN (arg))
 104     call_insns += estimate_move_cost (TREE_TYPE (arg));
 105   size = (what->global.insns - call_insns) * times + to->global.insns;
 106   gcc_assert (size >= 0);
 107   return size;
 108 }
 109
 110 /* E is expected to be an edge being inlined.  Clone destination node of
 111    the edge and redirect it to the new clone.
 112    DUPLICATE is used for bookkeeping on whether we are actually creating new
 113    clones or re-using node originally representing out-of-line function call.
 114    */
 115 void
 116 cgraph_clone_inlined_nodes (struct cgraph_edge *e, bool duplicate)
 117 {
 118   struct cgraph_node *n;
 119
 120   /* We may eliminate the need for out-of-line copy to be output.  In that
 121      case just go ahead and re-use it.  */
 122   if (!e->callee->callers->next_caller
 123       && (!e->callee->needed || DECL_EXTERNAL (e->callee->decl))
 124       && duplicate
 125       && flag_unit_at_a_time)
 126     {
 127       gcc_assert (!e->callee->global.inlined_to);
 128       if (!DECL_EXTERNAL (e->callee->decl))
 129         overall_insns -= e->callee->global.insns, nfunctions_inlined++;
 130       duplicate = 0;
 131     }
 132    else if (duplicate)
 133     {
 134       n = cgraph_clone_node (e->callee, e->count, e->loop_nest, true);
 135       cgraph_redirect_edge_callee (e, n);
 136     }
 137
 138   if (e->caller->global.inlined_to)
 139     e->callee->global.inlined_to = e->caller->global.inlined_to;
 140   else
 141     e->callee->global.inlined_to = e->caller;
 142
 143   /* Recursively clone all bodies.  */
 144   for (e = e->callee->callees; e; e = e->next_callee)
 145     if (!e->inline_failed)
 146       cgraph_clone_inlined_nodes (e, duplicate);
 147 }
 148
 149 /* Mark edge E as inlined and update callgraph accordingly.  */
 150
 151 void
 152 cgraph_mark_inline_edge (struct cgraph_edge *e)
 153 {
 154   int old_insns = 0, new_insns = 0;
 155   struct cgraph_node *to = NULL, *what;
 156
 157   if (e->callee->inline_decl)
 158     cgraph_redirect_edge_callee (e, cgraph_node (e->callee->inline_decl));
 159
 160   gcc_assert (e->inline_failed);
 161   e->inline_failed = NULL;
 162
 163   if (!e->callee->global.inlined && flag_unit_at_a_time)
 164     DECL_POSSIBLY_INLINED (e->callee->decl) = true;
 165   e->callee->global.inlined = true;
 166
 167   cgraph_clone_inlined_nodes (e, true);
 168
 169   what = e->callee;
 170
 171   /* Now update size of caller and all functions caller is inlined into.  */
 172   for (;e && !e->inline_failed; e = e->caller->callers)
 173     {
 174       old_insns = e->caller->global.insns;
 175       new_insns = cgraph_estimate_size_after_inlining (1, e->caller,
 176                                                        what);
 177       gcc_assert (new_insns >= 0);
 178       to = e->caller;
 179       to->global.insns = new_insns;
 180     }
 181   gcc_assert (what->global.inlined_to == to);
 182   if (new_insns > old_insns)
 183     overall_insns += new_insns - old_insns;
 184   ncalls_inlined++;
 185 }
 186
 187 /* Mark all calls of EDGE->CALLEE inlined into EDGE->CALLER.
 188    Return following unredirected edge in the list of callers
 189    of EDGE->CALLEE  */
 190
 191 static struct cgraph_edge *
 192 cgraph_mark_inline (struct cgraph_edge *edge)
 193 {
 194   struct cgraph_node *to = edge->caller;
 195   struct cgraph_node *what = edge->callee;
 196   struct cgraph_edge *e, *next;
 197   int times = 0;
 198
 199   /* Look for all calls, mark them inline and clone recursively
 200      all inlined functions.  */
 201   for (e = what->callers; e; e = next)
 202     {
 203       next = e->next_caller;
 204       if (e->caller == to && e->inline_failed)
 205         {
 206           cgraph_mark_inline_edge (e);
 207           if (e == edge)
 208             edge = next;
 209           times++;
 210         }
 211     }
 212   gcc_assert (times);
 213   return edge;
 214 }
 215
 216 /* Estimate the growth caused by inlining NODE into all callees.  */
 217
 218 static int
 219 cgraph_estimate_growth (struct cgraph_node *node)
 220 {
 221   int growth = 0;
 222   struct cgraph_edge *e;
 223   if (node->global.estimated_growth != INT_MIN)
 224     return node->global.estimated_growth;
 225
 226   for (e = node->callers; e; e = e->next_caller)
 227     if (e->inline_failed)
 228       growth += (cgraph_estimate_size_after_inlining (1, e->caller, node)
 229                  - e->caller->global.insns);
 230
 231   /* ??? Wrong for self recursive functions or cases where we decide to not
 232      inline for different reasons, but it is not big deal as in that case
 233      we will keep the body around, but we will also avoid some inlining.  */
 234   if (!node->needed && !DECL_EXTERNAL (node->decl))
 235     growth -= node->global.insns;
 236
 237   node->global.estimated_growth = growth;
 238   return growth;
 239 }
 240
 241 /* Return false when inlining WHAT into TO is not good idea
 242    as it would cause too large growth of function bodies.  */
 243
 244 static bool
 245 cgraph_check_inline_limits (struct cgraph_node *to, struct cgraph_node *what,
 246                             const char **reason)
 247 {
 248   int times = 0;
 249   struct cgraph_edge *e;
 250   int newsize;
 251   int limit;
 252
 253   if (to->global.inlined_to)
 254     to = to->global.inlined_to;
 255
 256   for (e = to->callees; e; e = e->next_callee)
 257     if (e->callee == what)
 258       times++;
 259
 260   /* When inlining large function body called once into small function,
 261      take the inlined function as base for limiting the growth.  */
 262   if (to->local.self_insns > what->local.self_insns)
 263     limit = to->local.self_insns;
 264   else
 265     limit = what->local.self_insns;
 266
 267   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 268
 269   newsize = cgraph_estimate_size_after_inlining (times, to, what);
 270   if (newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 271       && newsize > limit)
 272     {
 273       if (reason)
 274         *reason = N_("--param large-function-growth limit reached");
 275       return false;
 276     }
 277   return true;
 278 }
 279
 280 /* Return true when function N is small enough to be inlined.  */
 281
 282 bool
 283 cgraph_default_inline_p (struct cgraph_node *n, const char **reason)
 284 {
 285   tree decl = n->decl;
 286
 287   if (n->inline_decl)
 288     decl = n->inline_decl;
 289   if (!DECL_INLINE (decl))
 290     {
 291       if (reason)
 292         *reason = N_("function not inlinable");
 293       return false;
 294     }
 295
 296   if (!DECL_STRUCT_FUNCTION (decl)->cfg)
 297     {
 298       if (reason)
 299         *reason = N_("function body not available");
 300       return false;
 301     }
 302
 303   if (DECL_DECLARED_INLINE_P (decl))
 304     {
 305       if (n->global.insns >= MAX_INLINE_INSNS_SINGLE)
 306         {
 307           if (reason)
 308             *reason = N_("--param max-inline-insns-single limit reached");
 309           return false;
 310         }
 311     }
 312   else
 313     {
 314       if (n->global.insns >= MAX_INLINE_INSNS_AUTO)
 315         {
 316           if (reason)
 317             *reason = N_("--param max-inline-insns-auto limit reached");
 318           return false;
 319         }
 320     }
 321
 322   return true;
 323 }
 324
 325 /* Return true when inlining WHAT would create recursive inlining.
 326    We call recursive inlining all cases where same function appears more than
 327    once in the single recursion nest path in the inline graph.  */
 328
 329 static bool
 330 cgraph_recursive_inlining_p (struct cgraph_node *to,
 331                              struct cgraph_node *what,
 332                              const char **reason)
 333 {
 334   bool recursive;
 335   if (to->global.inlined_to)
 336     recursive = what->decl == to->global.inlined_to->decl;
 337   else
 338     recursive = what->decl == to->decl;
 339   /* Marking recursive function inline has sane semantic and thus we should
 340      not warn on it.  */
 341   if (recursive && reason)
 342     *reason = (what->local.disregard_inline_limits
 343                ? N_("recursive inlining") : "");
 344   return recursive;
 345 }
 346
 347 /* Return true if the call can be hot.  */
 348 static bool
 349 cgraph_maybe_hot_edge_p (struct cgraph_edge *edge)
 350 {
 351   if (profile_info && flag_branch_probabilities
 352       && (edge->count
 353           <= profile_info->sum_max / PARAM_VALUE (HOT_BB_COUNT_FRACTION)))
 354     return false;
 355   return true;
 356 }
 357
 358 /* A cost model driving the inlining heuristics in a way so the edges with
 359    smallest badness are inlined first.  After each inlining is performed
 360    the costs of all caller edges of nodes affected are recomputed so the
 361    metrics may accurately depend on values such as number of inlinable callers
 362    of the function or function body size.
 363
 364    With profiling we use number of executions of each edge to drive the cost.
 365    We also should distinguish hot and cold calls where the cold calls are
 366    inlined into only when code size is overall improved.
 367    */
 368
 369 static int
 370 cgraph_edge_badness (struct cgraph_edge *edge)
 371 {
 372   if (max_count)
 373     {
 374       int growth =
 375         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 376       growth -= edge->caller->global.insns;
 377
 378       /* Always prefer inlining saving code size.  */
 379       if (growth <= 0)
 380         return INT_MIN - growth;
 381       return ((int)((double)edge->count * INT_MIN / max_count)) / growth;
 382     }
 383   else
 384   {
 385     int nest = MIN (edge->loop_nest, 8);
 386     int badness = cgraph_estimate_growth (edge->callee) * 256;
 387
 388     /* Decrease badness if call is nested.  */
 389     if (badness > 0)
 390       badness >>= nest;
 391     else
 392       badness <<= nest;
 393
 394     /* Make recursive inlining happen always after other inlining is done.  */
 395     if (cgraph_recursive_inlining_p (edge->caller, edge->callee, NULL))
 396       return badness + 1;
 397     else
 398       return badness;
 399   }
 400 }
 401
 402 /* Recompute heap nodes for each of caller edge.  */
 403
 404 static void
 405 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 406                     bitmap updated_nodes)
 407 {
 408   struct cgraph_edge *edge;
 409
 410   if (!node->local.inlinable || node->local.disregard_inline_limits
 411       || node->global.inlined_to)
 412     return;
 413   if (bitmap_bit_p (updated_nodes, node->uid))
 414     return;
 415   bitmap_set_bit (updated_nodes, node->uid);
 416   node->global.estimated_growth = INT_MIN;
 417
 418   for (edge = node->callers; edge; edge = edge->next_caller)
 419     if (edge->inline_failed)
 420       {
 421         int badness = cgraph_edge_badness (edge);
 422         if (edge->aux)
 423           {
 424             fibnode_t n = edge->aux;
 425             gcc_assert (n->data == edge);
 426             if (n->key == badness)
 427               continue;
 428
 429             /* fibheap_replace_key only increase the keys.  */
 430             if (fibheap_replace_key (heap, n, badness))
 431               continue;
 432             fibheap_delete_node (heap, edge->aux);
 433           }
 434         edge->aux = fibheap_insert (heap, badness, edge);
 435       }
 436 }
 437
 438 /* Recompute heap nodes for each of caller edges of each of callees.  */
 439
 440 static void
 441 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
 442                     bitmap updated_nodes)
 443 {
 444   struct cgraph_edge *e;
 445   node->global.estimated_growth = INT_MIN;
 446
 447   for (e = node->callees; e; e = e->next_callee)
 448     if (e->inline_failed)
 449       update_caller_keys (heap, e->callee, updated_nodes);
 450     else if (!e->inline_failed)
 451       update_callee_keys (heap, e->callee, updated_nodes);
 452 }
 453
 454 /* Enqueue all recursive calls from NODE into priority queue depending on
 455    how likely we want to recursively inline the call.  */
 456
 457 static void
 458 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
 459                         fibheap_t heap)
 460 {
 461   static int priority;
 462   struct cgraph_edge *e;
 463   for (e = where->callees; e; e = e->next_callee)
 464     if (e->callee == node)
 465       {
 466         /* When profile feedback is available, prioritize by expected number
 467            of calls.  Without profile feedback we maintain simple queue
 468            to order candidates via recursive depths.  */
 469         fibheap_insert (heap,
 470                         !max_count ? priority++
 471                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
 472                         e);
 473       }
 474   for (e = where->callees; e; e = e->next_callee)
 475     if (!e->inline_failed)
 476       lookup_recursive_calls (node, e->callee, heap);
 477 }
 478
 479 /* Find callgraph nodes closing a circle in the graph.  The
 480    resulting hashtab can be used to avoid walking the circles.
 481    Uses the cgraph nodes ->aux field which needs to be zero
 482    before and will be zero after operation.  */
 483
 484 static void
 485 cgraph_find_cycles (struct cgraph_node *node, htab_t cycles)
 486 {
 487   struct cgraph_edge *e;
 488
 489   if (node->aux)
 490     {
 491       void **slot;
 492       slot = htab_find_slot (cycles, node, INSERT);
 493       if (!*slot)
 494         {
 495           if (dump_file)
 496             fprintf (dump_file, "Cycle contains %s\n", cgraph_node_name (node));
 497           *slot = node;
 498         }
 499       return;
 500     }
 501
 502   node->aux = node;
 503   for (e = node->callees; e; e = e->next_callee)
 504     cgraph_find_cycles (e->callee, cycles);
 505   node->aux = 0;
 506 }
 507
 508
 509 static void
 510 cgraph_apply_inline_plan (void)
 511 {
 512   struct cgraph_node *node;
 513   struct cgraph_node **order =
 514     xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
 515   int order_pos = 0, new_order_pos = 0;
 516   int i;
 517
 518   timevar_push (TV_INTEGRATION);
 519   order_pos = cgraph_postorder (order);
 520   gcc_assert (order_pos == cgraph_n_nodes);
 521
 522   /* Garbage collector may remove inline clones we eliminate during
 523      optimization.  So we must be sure to not reference them.  */
 524   for (i = 0; i < order_pos; i++)
 525     if (!order[i]->global.inlined_to)
 526       order[new_order_pos++] = order[i];
 527
 528   /* Initialize the default bitmap obstack.  */
 529   bitmap_obstack_initialize (NULL);
 530
 531
 532   for (i = 0; i < new_order_pos; i++)
 533     {
 534       struct cgraph_edge *e;
 535
 536       node = order[i];
 537       for (e = node->callees; e; e = e->next_callee)
 538         if (!e->inline_failed || warn_inline)
 539           break;
 540       if (e)
 541         {
 542           if (cgraph_preserve_function_body_p (node->decl, true))
 543             save_inline_function_body (node);
 544           push_cfun (DECL_STRUCT_FUNCTION (node->decl));
 545           tree_register_cfg_hooks ();
 546           current_function_decl = node->decl;
 547           optimize_inline_calls (node->decl, false);
 548           free_dominance_info (CDI_DOMINATORS);
 549           free_dominance_info (CDI_POST_DOMINATORS);
 550           node->local.self_insns = node->global.insns;
 551           pop_cfun ();
 552           ggc_collect ();
 553         }
 554     }
 555   free (order);
 556   timevar_pop (TV_INTEGRATION);
 557 }
 558
 559 /* Leafify the cgraph node.  We have to be careful in recursing
 560    as to not run endlessly in circles of the callgraph.
 561    We do so by using a hashtab of cycle entering nodes as generated
 562    by cgraph_find_cycles.  */
 563
 564 static void
 565 cgraph_flatten_node (struct cgraph_node *node, htab_t cycles)
 566 {
 567   struct cgraph_edge *e;
 568
 569   for (e = node->callees; e; e = e->next_callee)
 570     {
 571       /* Inline call, if possible, and recurse.  Be sure we are not
 572          entering callgraph circles here.  */
 573       if (e->inline_failed
 574           && e->callee->local.inlinable
 575           && !cgraph_recursive_inlining_p (node, e->callee,
 576                                            &e->inline_failed)
 577           && !htab_find (cycles, e->callee))
 578         {
 579           if (dump_file)
 580             fprintf (dump_file, " inlining %s", cgraph_node_name (e->callee));
 581           cgraph_mark_inline_edge (e);
 582           cgraph_flatten_node (e->callee, cycles);
 583         }
 584       else if (dump_file)
 585         fprintf (dump_file, " !inlining %s", cgraph_node_name (e->callee));
 586     }
 587 }
 588
 589 /* Decide on recursive inlining: in the case function has recursive calls,
 590    inline until body size reaches given argument.  */
 591
 592 static bool
 593 cgraph_decide_recursive_inlining (struct cgraph_node *node)
 594 {
 595   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
 596   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 597   int probability = PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY);
 598   fibheap_t heap;
 599   struct cgraph_edge *e;
 600   struct cgraph_node *master_clone;
 601   int depth = 0;
 602   int n = 0;
 603
 604   if (DECL_DECLARED_INLINE_P (node->decl))
 605     {
 606       limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
 607       max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 608     }
 609
 610   /* Make sure that function is small enough to be considered for inlining.  */
 611   if (!max_depth
 612       || cgraph_estimate_size_after_inlining (1, node, node)  >= limit)
 613     return false;
 614   heap = fibheap_new ();
 615   lookup_recursive_calls (node, node, heap);
 616   if (fibheap_empty (heap))
 617     {
 618       fibheap_delete (heap);
 619       return false;
 620     }
 621
 622   if (dump_file)
 623     fprintf (dump_file,
 624              "  Performing recursive inlining on %s\n",
 625              cgraph_node_name (node));
 626
 627   /* We need original clone to copy around.  */
 628   master_clone = cgraph_clone_node (node, node->count, 1, false);
 629   master_clone->needed = true;
 630   for (e = master_clone->callees; e; e = e->next_callee)
 631     if (!e->inline_failed)
 632       cgraph_clone_inlined_nodes (e, true);
 633
 634   /* Do the inlining and update list of recursive call during process.  */
 635   while (!fibheap_empty (heap)
 636          && (cgraph_estimate_size_after_inlining (1, node, master_clone)
 637              <= limit))
 638     {
 639       struct cgraph_edge *curr = fibheap_extract_min (heap);
 640       struct cgraph_node *cnode;
 641
 642       depth = 1;
 643       for (cnode = curr->caller;
 644            cnode->global.inlined_to; cnode = cnode->callers->caller)
 645         if (node->decl == curr->callee->decl)
 646           depth++;
 647       if (depth > max_depth)
 648         {
 649           if (dump_file)
 650             fprintf (dump_file,
 651                      "   maxmal depth reached\n");
 652           continue;
 653         }
 654
 655       if (max_count)
 656         {
 657           if (!cgraph_maybe_hot_edge_p (curr))
 658             {
 659               if (dump_file)
 660                 fprintf (dump_file, "   Not inlining cold call\n");
 661               continue;
 662             }
 663           if (curr->count * 100 / node->count < probability)
 664             {
 665               if (dump_file)
 666                 fprintf (dump_file,
 667                          "   Probability of edge is too small\n");
 668               continue;
 669             }
 670         }
 671
 672       if (dump_file)
 673         {
 674           fprintf (dump_file,
 675                    "   Inlining call of depth %i", depth);
 676           if (node->count)
 677             {
 678               fprintf (dump_file, " called approx. %.2f times per call",
 679                        (double)curr->count / node->count);
 680             }
 681           fprintf (dump_file, "\n");
 682         }
 683       cgraph_redirect_edge_callee (curr, master_clone);
 684       cgraph_mark_inline_edge (curr);
 685       lookup_recursive_calls (node, curr->callee, heap);
 686       n++;
 687     }
 688   if (!fibheap_empty (heap) && dump_file)
 689     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
 690
 691   fibheap_delete (heap);
 692   if (dump_file)
 693     fprintf (dump_file,
 694              "\n   Inlined %i times, body grown from %i to %i insns\n", n,
 695              master_clone->global.insns, node->global.insns);
 696
 697   /* Remove master clone we used for inlining.  We rely that clones inlined
 698      into master clone gets queued just before master clone so we don't
 699      need recursion.  */
 700   for (node = cgraph_nodes; node != master_clone;
 701        node = node->next)
 702     if (node->global.inlined_to == master_clone)
 703       cgraph_remove_node (node);
 704   cgraph_remove_node (master_clone);
 705   /* FIXME: Recursive inlining actually reduces number of calls of the
 706      function.  At this place we should probably walk the function and
 707      inline clones and compensate the counts accordingly.  This probably
 708      doesn't matter much in practice.  */
 709   return true;
 710 }
 711
 712 /* Set inline_failed for all callers of given function to REASON.  */
 713
 714 static void
 715 cgraph_set_inline_failed (struct cgraph_node *node, const char *reason)
 716 {
 717   struct cgraph_edge *e;
 718
 719   if (dump_file)
 720     fprintf (dump_file, "Inlining failed: %s\n", reason);
 721   for (e = node->callers; e; e = e->next_caller)
 722     if (e->inline_failed)
 723       e->inline_failed = reason;
 724 }
 725
 726 /* We use greedy algorithm for inlining of small functions:
 727    All inline candidates are put into prioritized heap based on estimated
 728    growth of the overall number of instructions and then update the estimates.
 729
 730    INLINED and INLINED_CALEES are just pointers to arrays large enough
 731    to be passed to cgraph_inlined_into and cgraph_inlined_callees.  */
 732
 733 static void
 734 cgraph_decide_inlining_of_small_functions (void)
 735 {
 736   struct cgraph_node *node;
 737   struct cgraph_edge *edge;
 738   const char *failed_reason;
 739   fibheap_t heap = fibheap_new ();
 740   bitmap updated_nodes = BITMAP_ALLOC (NULL);
 741
 742   if (dump_file)
 743     fprintf (dump_file, "\nDeciding on smaller functions:\n");
 744
 745   /* Put all inline candidates into the heap.  */
 746
 747   for (node = cgraph_nodes; node; node = node->next)
 748     {
 749       if (!node->local.inlinable || !node->callers
 750           || node->local.disregard_inline_limits)
 751         continue;
 752       if (dump_file)
 753         fprintf (dump_file, "Considering inline candidate %s.\n", cgraph_node_name (node));
 754
 755       node->global.estimated_growth = INT_MIN;
 756       if (!cgraph_default_inline_p (node, &failed_reason))
 757         {
 758           cgraph_set_inline_failed (node, failed_reason);
 759           continue;
 760         }
 761
 762       for (edge = node->callers; edge; edge = edge->next_caller)
 763         if (edge->inline_failed)
 764           {
 765             gcc_assert (!edge->aux);
 766             edge->aux = fibheap_insert (heap, cgraph_edge_badness (edge), edge);
 767           }
 768     }
 769   while (overall_insns <= max_insns && (edge = fibheap_extract_min (heap)))
 770     {
 771       int old_insns = overall_insns;
 772       struct cgraph_node *where;
 773       int growth =
 774         cgraph_estimate_size_after_inlining (1, edge->caller, edge->callee);
 775
 776       growth -= edge->caller->global.insns;
 777
 778       if (dump_file)
 779         {
 780           fprintf (dump_file,
 781                    "\nConsidering %s with %i insns to be inlined into %s\n"
 782                    " Estimated growth after inlined into all callees is %+i insns.\n"
 783                    " Estimated badness is %i.\n",
 784                    cgraph_node_name (edge->callee),
 785                    edge->callee->global.insns,
 786                    cgraph_node_name (edge->caller),
 787                    cgraph_estimate_growth (edge->callee),
 788                    cgraph_edge_badness (edge));
 789           if (edge->count)
 790             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n", edge->count);
 791         }
 792       gcc_assert (edge->aux);
 793       edge->aux = NULL;
 794       if (!edge->inline_failed)
 795         continue;
 796
 797       /* When not having profile info ready we don't weight by any way the
 798          position of call in procedure itself.  This means if call of
 799          function A from function B seems profitable to inline, the recursive
 800          call of function A in inline copy of A in B will look profitable too
 801          and we end up inlining until reaching maximal function growth.  This
 802          is not good idea so prohibit the recursive inlining.
 803
 804          ??? When the frequencies are taken into account we might not need this
 805          restriction.   */
 806       if (!max_count)
 807         {
 808           where = edge->caller;
 809           while (where->global.inlined_to)
 810             {
 811               if (where->decl == edge->callee->decl)
 812                 break;
 813               where = where->callers->caller;
 814             }
 815           if (where->global.inlined_to)
 816             {
 817               edge->inline_failed
 818                 = (edge->callee->local.disregard_inline_limits ? N_("recursive inlining") : "");
 819               if (dump_file)
 820                 fprintf (dump_file, " inline_failed:Recursive inlining performed only for function itself.\n");
 821               continue;
 822             }
 823         }
 824
 825       if (!cgraph_maybe_hot_edge_p (edge) && growth > 0)
 826         {
 827           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 828                                             &edge->inline_failed))
 829             {
 830               edge->inline_failed =
 831                 N_("call is unlikely");
 832               if (dump_file)
 833                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 834             }
 835           continue;
 836         }
 837       if (!cgraph_default_inline_p (edge->callee, &edge->inline_failed))
 838         {
 839           if (!cgraph_recursive_inlining_p (edge->caller, edge->callee,
 840                                             &edge->inline_failed))
 841             {
 842               if (dump_file)
 843                 fprintf (dump_file, " inline_failed:%s.\n", edge->inline_failed);
 844             }
 845           continue;
 846         }
 847       if (cgraph_recursive_inlining_p (edge->caller, edge->callee,
 848                                        &edge->inline_failed))
 849         {
 850           where = edge->caller;
 851           if (where->global.inlined_to)
 852             where = where->global.inlined_to;
 853           if (!cgraph_decide_recursive_inlining (where))
 854             continue;
 855           update_callee_keys (heap, where, updated_nodes);
 856         }
 857       else
 858         {
 859           struct cgraph_node *callee;
 860           if (!cgraph_check_inline_limits (edge->caller, edge->callee,
 861                                            &edge->inline_failed))
 862             {
 863               if (dump_file)
 864                 fprintf (dump_file, " Not inlining into %s:%s.\n",
 865                          cgraph_node_name (edge->caller), edge->inline_failed);
 866               continue;
 867             }
 868           callee = edge->callee;
 869           cgraph_mark_inline_edge (edge);
 870           update_callee_keys (heap, callee, updated_nodes);
 871         }
 872       where = edge->caller;
 873       if (where->global.inlined_to)
 874         where = where->global.inlined_to;
 875
 876       /* Our profitability metric can depend on local properties
 877          such as number of inlinable calls and size of the function body.
 878          After inlining these properties might change for the function we
 879          inlined into (since it's body size changed) and for the functions
 880          called by function we inlined (since number of it inlinable callers
 881          might change).  */
 882       update_caller_keys (heap, where, updated_nodes);
 883       bitmap_clear (updated_nodes);
 884
 885       if (dump_file)
 886         fprintf (dump_file,
 887                  " Inlined into %s which now has %i insns.\n",
 888                  cgraph_node_name (edge->caller),
 889                  edge->caller->global.insns);
 890       if (dump_file)
 891         fprintf (dump_file,
 892                  " Inlined for a net change of %+i insns.\n",
 893                  overall_insns - old_insns);
 894     }
 895   while ((edge = fibheap_extract_min (heap)) != NULL)
 896     {
 897       gcc_assert (edge->aux);
 898       edge->aux = NULL;
 899       if (!edge->callee->local.disregard_inline_limits && edge->inline_failed
 900           && !cgraph_recursive_inlining_p (edge->caller, edge->callee,
 901                                            &edge->inline_failed))
 902         edge->inline_failed = N_("--param inline-unit-growth limit reached");
 903     }
 904   fibheap_delete (heap);
 905   BITMAP_FREE (updated_nodes);
 906 }
 907
 908 /* Decide on the inlining.  We do so in the topological order to avoid
 909    expenses on updating data structures.  */
 910
 911 static void
 912 cgraph_decide_inlining (void)
 913 {
 914   struct cgraph_node *node;
 915   int nnodes;
 916   struct cgraph_node **order =
 917     xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
 918   int old_insns = 0;
 919   int i;
 920
 921   timevar_push (TV_INLINE_HEURISTICS);
 922   max_count = 0;
 923   for (node = cgraph_nodes; node; node = node->next)
 924     {
 925       struct cgraph_edge *e;
 926       initial_insns += node->local.self_insns;
 927       for (e = node->callees; e; e = e->next_callee)
 928         if (max_count < e->count)
 929           max_count = e->count;
 930     }
 931   overall_insns = initial_insns;
 932   gcc_assert (!max_count || (profile_info && flag_branch_probabilities));
 933
 934   max_insns = ((HOST_WIDEST_INT) overall_insns
 935                * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
 936
 937   nnodes = cgraph_postorder (order);
 938
 939   if (dump_file)
 940     fprintf (dump_file,
 941              "\nDeciding on inlining.  Starting with %i insns.\n",
 942              initial_insns);
 943
 944   for (node = cgraph_nodes; node; node = node->next)
 945     node->aux = 0;
 946
 947   if (dump_file)
 948     fprintf (dump_file, "\nInlining always_inline functions:\n");
 949
 950   /* In the first pass mark all always_inline edges.  Do this with a priority
 951      so none of our later choices will make this impossible.  */
 952   for (i = nnodes - 1; i >= 0; i--)
 953     {
 954       struct cgraph_edge *e, *next;
 955
 956       node = order[i];
 957
 958       /* Handle nodes to be flattened, but don't update overall unit size.  */
 959       if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) != NULL)
 960         {
 961           int old_overall_insns = overall_insns;
 962           htab_t cycles;
 963           if (dump_file)
 964             fprintf (dump_file,
 965                      "Leafifying %s\n", cgraph_node_name (node));
 966           cycles = htab_create (7, htab_hash_pointer, htab_eq_pointer, NULL);
 967           cgraph_find_cycles (node, cycles);
 968           cgraph_flatten_node (node, cycles);
 969           htab_delete (cycles);
 970           overall_insns = old_overall_insns;
 971           /* We don't need to consider always_inline functions inside the flattened
 972              function anymore.  */
 973           continue;
 974         }
 975
 976       if (!node->local.disregard_inline_limits)
 977         continue;
 978       if (dump_file)
 979         fprintf (dump_file,
 980                  "\nConsidering %s %i insns (always inline)\n",
 981                  cgraph_node_name (node), node->global.insns);
 982       old_insns = overall_insns;
 983       for (e = node->callers; e; e = next)
 984         {
 985           next = e->next_caller;
 986           if (!e->inline_failed)
 987             continue;
 988           if (cgraph_recursive_inlining_p (e->caller, e->callee,
 989                                            &e->inline_failed))
 990             continue;
 991           cgraph_mark_inline_edge (e);
 992           if (dump_file)
 993             fprintf (dump_file,
 994                      " Inlined into %s which now has %i insns.\n",
 995                      cgraph_node_name (e->caller),
 996                      e->caller->global.insns);
 997         }
 998       if (dump_file)
 999         fprintf (dump_file,
1000                  " Inlined for a net change of %+i insns.\n",
1001                  overall_insns - old_insns);
1002     }
1003
1004   if (!flag_really_no_inline)
1005     {
1006       cgraph_decide_inlining_of_small_functions ();
1007
1008       if (dump_file)
1009         fprintf (dump_file, "\nDeciding on functions called once:\n");
1010
1011       /* And finally decide what functions are called once.  */
1012
1013       for (i = nnodes - 1; i >= 0; i--)
1014         {
1015           node = order[i];
1016
1017           if (node->callers && !node->callers->next_caller && !node->needed
1018               && node->local.inlinable && node->callers->inline_failed
1019               && !DECL_EXTERNAL (node->decl) && !DECL_COMDAT (node->decl))
1020             {
1021               bool ok = true;
1022               struct cgraph_node *node1;
1023
1024               /* Verify that we won't duplicate the caller.  */
1025               for (node1 = node->callers->caller;
1026                    node1->callers && !node1->callers->inline_failed
1027                    && ok; node1 = node1->callers->caller)
1028                 if (node1->callers->next_caller || node1->needed)
1029                   ok = false;
1030               if (ok)
1031                 {
1032                   if (dump_file)
1033                     fprintf (dump_file,
1034                              "\nConsidering %s %i insns.\n"
1035                              " Called once from %s %i insns.\n",
1036                              cgraph_node_name (node), node->global.insns,
1037                              cgraph_node_name (node->callers->caller),
1038                              node->callers->caller->global.insns);
1039
1040                   old_insns = overall_insns;
1041
1042                   if (cgraph_check_inline_limits (node->callers->caller, node,
1043                                                   NULL))
1044                     {
1045                       cgraph_mark_inline (node->callers);
1046                       if (dump_file)
1047                         fprintf (dump_file,
1048                                  " Inlined into %s which now has %i insns"
1049                                  " for a net change of %+i insns.\n",
1050                                  cgraph_node_name (node->callers->caller),
1051                                  node->callers->caller->global.insns,
1052                                  overall_insns - old_insns);
1053                     }
1054                   else
1055                     {
1056                       if (dump_file)
1057                         fprintf (dump_file,
1058                                  " Inline limit reached, not inlined.\n");
1059                     }
1060                 }
1061             }
1062         }
1063     }
1064
1065   cgraph_remove_unreachable_nodes (false, dump_file);
1066   cgraph_apply_inline_plan ();
1067   cgraph_remove_unreachable_nodes (false, dump_file);
1068
1069   if (dump_file)
1070     fprintf (dump_file,
1071              "\nInlined %i calls, eliminated %i functions, "
1072              "%i insns turned to %i insns.\n\n",
1073              ncalls_inlined, nfunctions_inlined, initial_insns,
1074              overall_insns);
1075   free (order);
1076   timevar_pop (TV_INLINE_HEURISTICS);
1077 }
1078
1079 /* Decide on the inlining.  We do so in the topological order to avoid
1080    expenses on updating data structures.  */
1081
1082 bool
1083 cgraph_decide_inlining_incrementally (struct cgraph_node *node, bool early)
1084 {
1085   struct cgraph_edge *e;
1086   bool inlined = false;
1087   const char *failed_reason;
1088
1089   /* First of all look for always inline functions.  */
1090   for (e = node->callees; e; e = e->next_callee)
1091     if (e->callee->local.disregard_inline_limits
1092         && e->inline_failed
1093         && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1094         /* ??? It is possible that renaming variable removed the function body
1095            in duplicate_decls. See gcc.c-torture/compile/20011119-2.c  */
1096         && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1097       {
1098         if (dump_file && early)
1099           fprintf (dump_file, "  Early inlining %s into %s\n",
1100                    cgraph_node_name (e->callee), cgraph_node_name (node));
1101         cgraph_mark_inline (e);
1102         inlined = true;
1103       }
1104
1105   /* Now do the automatic inlining.  */
1106   if (!flag_really_no_inline)
1107     for (e = node->callees; e; e = e->next_callee)
1108       if (e->callee->local.inlinable
1109           && e->inline_failed
1110           && !e->callee->local.disregard_inline_limits
1111           && !cgraph_recursive_inlining_p (node, e->callee, &e->inline_failed)
1112           && (!early
1113               || (cgraph_estimate_size_after_inlining (1, e->caller, node)
1114                   <= e->caller->global.insns))
1115           && cgraph_check_inline_limits (node, e->callee, &e->inline_failed)
1116           && (DECL_SAVED_TREE (e->callee->decl) || e->callee->inline_decl))
1117         {
1118           if (cgraph_default_inline_p (e->callee, &failed_reason))
1119             {
1120               if (dump_file && early)
1121                 fprintf (dump_file, "  Early inlining %s into %s\n",
1122                          cgraph_node_name (e->callee), cgraph_node_name (node));
1123               cgraph_mark_inline (e);
1124               inlined = true;
1125             }
1126           else if (!early)
1127             e->inline_failed = failed_reason;
1128         }
1129   if (inlined || (warn_inline && !early))
1130     {
1131       /* Initialize the default bitmap obstack.  */
1132       bitmap_obstack_initialize (NULL);
1133       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1134       tree_register_cfg_hooks ();
1135       current_function_decl = node->decl;
1136       optimize_inline_calls (current_function_decl, early);
1137       free_dominance_info (CDI_DOMINATORS);
1138       free_dominance_info (CDI_POST_DOMINATORS);
1139       node->local.self_insns = node->global.insns;
1140       current_function_decl = NULL;
1141       pop_cfun ();
1142       ggc_collect ();
1143     }
1144   return inlined;
1145 }
1146
1147 /* When inlining shall be performed.  */
1148 static bool
1149 cgraph_gate_inlining (void)
1150 {
1151   return flag_inline_trees /*&& 0*/;
1152 }
1153
1154 struct tree_opt_pass pass_ipa_inline =
1155 {
1156   "inline",                             /* name */
1157   cgraph_gate_inlining,                 /* gate */
1158   cgraph_decide_inlining,               /* execute */
1159   NULL,                                 /* sub */
1160   NULL,                                 /* next */
1161   0,                                    /* static_pass_number */
1162   TV_INTEGRATION,                       /* tv_id */
1163   0,                                    /* properties_required */
1164   PROP_cfg,                             /* properties_provided */
1165   0,                                    /* properties_destroyed */
1166   0,                                    /* todo_flags_start */
1167   TODO_dump_cgraph | TODO_dump_func,    /* todo_flags_finish */
1168   0                                     /* letter */
1169 };
1170
1171 /* Do inlining of small functions.  Doing so early helps profiling and other
1172    passes to be somewhat more effective and avoids some code duplication in
1173    later real inlining pass for testcases with very many function calls.  */
1174 static void
1175 cgraph_early_inlining (void)
1176 {
1177   struct cgraph_node *node;
1178   int nnodes;
1179   struct cgraph_node **order =
1180     xcalloc (cgraph_n_nodes, sizeof (struct cgraph_node *));
1181   int i;
1182
1183   if (sorrycount || errorcount)
1184     return;
1185 #ifdef ENABLE_CHECKING
1186   for (node = cgraph_nodes; node; node = node->next)
1187     gcc_assert (!node->aux);
1188 #endif
1189
1190   nnodes = cgraph_postorder (order);
1191   for (i = nnodes - 1; i >= 0; i--)
1192     {
1193       node = order[i];
1194       if (node->analyzed && node->local.inlinable
1195           && (node->needed || node->reachable)
1196           && node->callers)
1197         cgraph_decide_inlining_incrementally (node, true);
1198     }
1199   cgraph_remove_unreachable_nodes (true, dump_file);
1200 #ifdef ENABLE_CHECKING
1201   for (node = cgraph_nodes; node; node = node->next)
1202     gcc_assert (!node->global.inlined_to);
1203 #endif
1204   free (order);
1205 }
1206
1207 /* When inlining shall be performed.  */
1208 static bool
1209 cgraph_gate_early_inlining (void)
1210 {
1211   return flag_inline_trees && flag_early_inlining;
1212 }
1213
1214 struct tree_opt_pass pass_early_ipa_inline =
1215 {
1216   "einline",                            /* name */
1217   cgraph_gate_early_inlining,           /* gate */
1218   cgraph_early_inlining,                /* execute */
1219   NULL,                                 /* sub */
1220   NULL,                                 /* next */
1221   0,                                    /* static_pass_number */
1222   TV_INTEGRATION,                       /* tv_id */
1223   0,                                    /* properties_required */
1224   PROP_cfg,                             /* properties_provided */
1225   0,                                    /* properties_destroyed */
1226   0,                                    /* todo_flags_start */
1227   TODO_dump_cgraph | TODO_dump_func,    /* todo_flags_finish */
1228   0                                     /* letter */
1229 };