gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003, 2004, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4    Contributed by Jan Hubicka
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /*  Inlining decision heuristics
  23
  24     The implementation of inliner is organized as follows:
  25
  26     inlining heuristics limits
  27
  28       can_inline_edge_p allow to check that particular inlining is allowed
  29       by the limits specified by user (allowed function growth, growth and so
  30       on).
  31
  32       Functions are inlined when it is obvious the result is profitable (such
  33       as functions called once or when inlining reduce code size).
  34       In addition to that we perform inlining of small functions and recursive
  35       inlining.
  36
  37     inlining heuristics
  38
  39        The inliner itself is split into two passes:
  40
  41        pass_early_inlining
  42
  43          Simple local inlining pass inlining callees into current function.
  44          This pass makes no use of whole unit analysis and thus it can do only
  45          very simple decisions based on local properties.
  46
  47          The strength of the pass is that it is run in topological order
  48          (reverse postorder) on the callgraph. Functions are converted into SSA
  49          form just before this pass and optimized subsequently. As a result, the
  50          callees of the function seen by the early inliner was already optimized
  51          and results of early inlining adds a lot of optimization opportunities
  52          for the local optimization.
  53
  54          The pass handle the obvious inlining decisions within the compilation
  55          unit - inlining auto inline functions, inlining for size and
  56          flattening.
  57
  58          main strength of the pass is the ability to eliminate abstraction
  59          penalty in C++ code (via combination of inlining and early
  60          optimization) and thus improve quality of analysis done by real IPA
  61          optimizers.
  62
  63          Because of lack of whole unit knowledge, the pass can not really make
  64          good code size/performance tradeoffs.  It however does very simple
  65          speculative inlining allowing code size to grow by
  66          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  67          optimizations performed later are very likely to eliminate the cost.
  68
  69        pass_ipa_inline
  70
  71          This is the real inliner able to handle inlining with whole program
  72          knowledge. It performs following steps:
  73
  74          1) inlining of small functions.  This is implemented by greedy
  75          algorithm ordering all inlinable cgraph edges by their badness and
  76          inlining them in this order as long as inline limits allows doing so.
  77
  78          This heuristics is not very good on inlining recursive calls. Recursive
  79          calls can be inlined with results similar to loop unrolling. To do so,
  80          special purpose recursive inliner is executed on function when
  81          recursive edge is met as viable candidate.
  82
  83          2) Unreachable functions are removed from callgraph.  Inlining leads
  84          to devirtualization and other modification of callgraph so functions
  85          may become unreachable during the process. Also functions declared as
  86          extern inline or virtual functions are removed, since after inlining
  87          we no longer need the offline bodies.
  88
  89          3) Functions called once and not exported from the unit are inlined.
  90          This should almost always lead to reduction of code size by eliminating
  91          the need for offline copy of the function.  */
  92
  93 #include "config.h"
  94 #include "system.h"
  95 #include "coretypes.h"
  96 #include "tm.h"
  97 #include "tree.h"
  98 #include "tree-inline.h"
  99 #include "langhooks.h"
 100 #include "flags.h"
 101 #include "cgraph.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "timevar.h"
 105 #include "params.h"
 106 #include "fibheap.h"
 107 #include "intl.h"
 108 #include "tree-pass.h"
 109 #include "coverage.h"
 110 #include "ggc.h"
 111 #include "rtl.h"
 112 #include "tree-flow.h"
 113 #include "ipa-prop.h"
 114 #include "except.h"
 115 #include "target.h"
 116 #include "ipa-inline.h"
 117 #include "ipa-utils.h"
 118
 119 /* Statistics we collect about inlining algorithm.  */
 120 static int overall_size;
 121 static gcov_type max_count;
 122
 123 /* Return false when inlining edge E would lead to violating
 124    limits on function unit growth or stack usage growth.
 125
 126    The relative function body growth limit is present generally
 127    to avoid problems with non-linear behavior of the compiler.
 128    To allow inlining huge functions into tiny wrapper, the limit
 129    is always based on the bigger of the two functions considered.
 130
 131    For stack growth limits we always base the growth in stack usage
 132    of the callers.  We want to prevent applications from segfaulting
 133    on stack overflow when functions with huge stack frames gets
 134    inlined. */
 135
 136 static bool
 137 caller_growth_limits (struct cgraph_edge *e)
 138 {
 139   struct cgraph_node *to = e->caller;
 140   struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
 141   int newsize;
 142   int limit = 0;
 143   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 144   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 145
 146   /* Look for function e->caller is inlined to.  While doing
 147      so work out the largest function body on the way.  As
 148      described above, we want to base our function growth
 149      limits based on that.  Not on the self size of the
 150      outer function, not on the self size of inline code
 151      we immediately inline to.  This is the most relaxed
 152      interpretation of the rule "do not grow large functions
 153      too much in order to prevent compiler from exploding".  */
 154   while (true)
 155     {
 156       info = inline_summary (to);
 157       if (limit < info->self_size)
 158         limit = info->self_size;
 159       if (stack_size_limit < info->estimated_self_stack_size)
 160         stack_size_limit = info->estimated_self_stack_size;
 161       if (to->global.inlined_to)
 162         to = to->callers->caller;
 163       else
 164         break;
 165     }
 166
 167   what_info = inline_summary (what);
 168
 169   if (limit < what_info->self_size)
 170     limit = what_info->self_size;
 171
 172   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 173
 174   /* Check the size after inlining against the function limits.  But allow
 175      the function to shrink if it went over the limits by forced inlining.  */
 176   newsize = estimate_size_after_inlining (to, e);
 177   if (newsize >= info->size
 178       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 179       && newsize > limit)
 180     {
 181       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 182       return false;
 183     }
 184
 185   if (!what_info->estimated_stack_size)
 186     return true;
 187
 188   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 189      due to large i/o datastructures used by the Fortran front-end.
 190      We ought to ignore this limit when we know that the edge is executed
 191      on every invocation of the caller (i.e. its call statement dominates
 192      exit block).  We do not track this information, yet.  */
 193   stack_size_limit += ((gcov_type)stack_size_limit
 194                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 195
 196   inlined_stack = (outer_info->stack_frame_offset
 197                    + outer_info->estimated_self_stack_size
 198                    + what_info->estimated_stack_size);
 199   /* Check new stack consumption with stack consumption at the place
 200      stack is used.  */
 201   if (inlined_stack > stack_size_limit
 202       /* If function already has large stack usage from sibling
 203          inline call, we can inline, too.
 204          This bit overoptimistically assume that we are good at stack
 205          packing.  */
 206       && inlined_stack > info->estimated_stack_size
 207       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 208     {
 209       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 210       return false;
 211     }
 212   return true;
 213 }
 214
 215 /* Dump info about why inlining has failed.  */
 216
 217 static void
 218 report_inline_failed_reason (struct cgraph_edge *e)
 219 {
 220   if (dump_file)
 221     {
 222       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 223                cgraph_node_name (e->caller), e->caller->uid,
 224                cgraph_node_name (e->callee), e->callee->uid,
 225                cgraph_inline_failed_string (e->inline_failed));
 226     }
 227 }
 228
 229 /* Decide if we can inline the edge and possibly update
 230    inline_failed reason.
 231    We check whether inlining is possible at all and whether
 232    caller growth limits allow doing so.
 233
 234    if REPORT is true, output reason to the dump file.  */
 235
 236 static bool
 237 can_inline_edge_p (struct cgraph_edge *e, bool report)
 238 {
 239   bool inlinable = true;
 240   enum availability avail;
 241   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, &avail);
 242   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 243   tree callee_tree = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 244
 245   gcc_assert (e->inline_failed);
 246
 247   if (!callee || !callee->analyzed)
 248     {
 249       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 250       inlinable = false;
 251     }
 252   else if (!inline_summary (callee)->inlinable)
 253     {
 254       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 255       inlinable = false;
 256     }
 257   else if (avail <= AVAIL_OVERWRITABLE)
 258     {
 259       e->inline_failed = CIF_OVERWRITABLE;
 260       return false;
 261     }
 262   else if (e->call_stmt_cannot_inline_p)
 263     {
 264       e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 265       inlinable = false;
 266     }
 267   /* Don't inline if the functions have different EH personalities.  */
 268   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 269            && DECL_FUNCTION_PERSONALITY (callee->decl)
 270            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 271                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 272     {
 273       e->inline_failed = CIF_EH_PERSONALITY;
 274       inlinable = false;
 275     }
 276   /* Don't inline if the callee can throw non-call exceptions but the
 277      caller cannot.
 278      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 279      Move the flag into cgraph node or mirror it in the inline summary.  */
 280   else if (DECL_STRUCT_FUNCTION (callee->decl)
 281            && DECL_STRUCT_FUNCTION
 282                 (callee->decl)->can_throw_non_call_exceptions
 283            && !(DECL_STRUCT_FUNCTION (e->caller->decl)
 284                 && DECL_STRUCT_FUNCTION
 285                      (e->caller->decl)->can_throw_non_call_exceptions))
 286     {
 287       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 288       inlinable = false;
 289     }
 290   /* Check compatibility of target optimization options.  */
 291   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 292                                                 callee->decl))
 293     {
 294       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 295       inlinable = false;
 296     }
 297   /* Check if caller growth allows the inlining.  */
 298   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 299            && !lookup_attribute ("flatten",
 300                                  DECL_ATTRIBUTES
 301                                    (e->caller->global.inlined_to
 302                                     ? e->caller->global.inlined_to->decl
 303                                     : e->caller->decl))
 304            && !caller_growth_limits (e))
 305     inlinable = false;
 306   /* Don't inline a function with a higher optimization level than the
 307      caller.  FIXME: this is really just tip of iceberg of handling
 308      optimization attribute.  */
 309   else if (caller_tree != callee_tree)
 310     {
 311       struct cl_optimization *caller_opt
 312         = TREE_OPTIMIZATION ((caller_tree)
 313                              ? caller_tree
 314                              : optimization_default_node);
 315
 316       struct cl_optimization *callee_opt
 317         = TREE_OPTIMIZATION ((callee_tree)
 318                              ? callee_tree
 319                              : optimization_default_node);
 320
 321       if ((caller_opt->x_optimize > callee_opt->x_optimize)
 322           || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 323         {
 324           e->inline_failed = CIF_TARGET_OPTIMIZATION_MISMATCH;
 325           inlinable = false;
 326         }
 327     }
 328
 329   /* Be sure that the cannot_inline_p flag is up to date.  */
 330   gcc_checking_assert (!e->call_stmt
 331                        || (gimple_call_cannot_inline_p (e->call_stmt)
 332                            == e->call_stmt_cannot_inline_p)
 333                        /* In -flto-partition=none mode we really keep things out of
 334                           sync because call_stmt_cannot_inline_p is set at cgraph
 335                           merging when function bodies are not there yet.  */
 336                        || (in_lto_p && !gimple_call_cannot_inline_p (e->call_stmt)));
 337   if (!inlinable && report)
 338     report_inline_failed_reason (e);
 339   return inlinable;
 340 }
 341
 342
 343 /* Return true if the edge E is inlinable during early inlining.  */
 344
 345 static bool
 346 can_early_inline_edge_p (struct cgraph_edge *e)
 347 {
 348   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
 349                                                               NULL);
 350   /* Early inliner might get called at WPA stage when IPA pass adds new
 351      function.  In this case we can not really do any of early inlining
 352      because function bodies are missing.  */
 353   if (!gimple_has_body_p (callee->decl))
 354     {
 355       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 356       return false;
 357     }
 358   /* In early inliner some of callees may not be in SSA form yet
 359      (i.e. the callgraph is cyclic and we did not process
 360      the callee by early inliner, yet).  We don't have CIF code for this
 361      case; later we will re-do the decision in the real inliner.  */
 362   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 363       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 364     {
 365       if (dump_file)
 366         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 367       return false;
 368     }
 369   if (!can_inline_edge_p (e, true))
 370     return false;
 371   return true;
 372 }
 373
 374
 375 /* Return true when N is leaf function.  Accept cheap builtins
 376    in leaf functions.  */
 377
 378 static bool
 379 leaf_node_p (struct cgraph_node *n)
 380 {
 381   struct cgraph_edge *e;
 382   for (e = n->callees; e; e = e->next_callee)
 383     if (!is_inexpensive_builtin (e->callee->decl))
 384       return false;
 385   return true;
 386 }
 387
 388
 389 /* Return true if we are interested in inlining small function.  */
 390
 391 static bool
 392 want_early_inline_function_p (struct cgraph_edge *e)
 393 {
 394   bool want_inline = true;
 395   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 396
 397   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 398     ;
 399   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 400            && !flag_inline_small_functions)
 401     {
 402       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 403       report_inline_failed_reason (e);
 404       want_inline = false;
 405     }
 406   else
 407     {
 408       int growth = estimate_edge_growth (e);
 409       if (growth <= 0)
 410         ;
 411       else if (!cgraph_maybe_hot_edge_p (e)
 412                && growth > 0)
 413         {
 414           if (dump_file)
 415             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 416                      "call is cold and code would grow by %i\n",
 417                      cgraph_node_name (e->caller), e->caller->uid,
 418                      cgraph_node_name (callee), callee->uid,
 419                      growth);
 420           want_inline = false;
 421         }
 422       else if (!leaf_node_p (callee)
 423                && growth > 0)
 424         {
 425           if (dump_file)
 426             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 427                      "callee is not leaf and code would grow by %i\n",
 428                      cgraph_node_name (e->caller), e->caller->uid,
 429                      cgraph_node_name (callee), callee->uid,
 430                      growth);
 431           want_inline = false;
 432         }
 433       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 434         {
 435           if (dump_file)
 436             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 437                      "growth %i exceeds --param early-inlining-insns\n",
 438                      cgraph_node_name (e->caller), e->caller->uid,
 439                      cgraph_node_name (callee), callee->uid,
 440                      growth);
 441           want_inline = false;
 442         }
 443     }
 444   return want_inline;
 445 }
 446
 447 /* Return true if we are interested in inlining small function.
 448    When REPORT is true, report reason to dump file.  */
 449
 450 static bool
 451 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 452 {
 453   bool want_inline = true;
 454   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 455
 456   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 457     ;
 458   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 459            && !flag_inline_small_functions)
 460     {
 461       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 462       want_inline = false;
 463     }
 464   else
 465     {
 466       int growth = estimate_edge_growth (e);
 467
 468       if (growth <= 0)
 469         ;
 470       else if (DECL_DECLARED_INLINE_P (callee->decl)
 471                && growth >= MAX_INLINE_INSNS_SINGLE)
 472         {
 473           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 474           want_inline = false;
 475         }
 476       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 477                && !flag_inline_functions)
 478         {
 479           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 480           want_inline = false;
 481         }
 482       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 483                && growth >= MAX_INLINE_INSNS_AUTO)
 484         {
 485           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 486           want_inline = false;
 487         }
 488       /* If call is cold, do not inline when function body would grow.
 489          Still inline when the overall unit size will shrink because the offline
 490          copy of function being eliminated.
 491
 492          This is slightly wrong on aggressive side:  it is entirely possible
 493          that function is called many times with a context where inlining
 494          reduces code size and few times with a context where inlining increase
 495          code size.  Resoluting growth estimate will be negative even if it
 496          would make more sense to keep offline copy and do not inline into the
 497          call sites that makes the code size grow.
 498
 499          When badness orders the calls in a way that code reducing calls come
 500          first, this situation is not a problem at all: after inlining all
 501          "good" calls, we will realize that keeping the function around is
 502          better.  */
 503       else if (!cgraph_maybe_hot_edge_p (e)
 504                && (DECL_EXTERNAL (callee->decl)
 505
 506                    /* Unlike for functions called once, we play unsafe with
 507                       COMDATs.  We can allow that since we know functions
 508                       in consideration are small (and thus risk is small) and
 509                       moreover grow estimates already accounts that COMDAT
 510                       functions may or may not disappear when eliminated from
 511                       current unit. With good probability making aggressive
 512                       choice in all units is going to make overall program
 513                       smaller.
 514
 515                       Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 516                       instead of
 517                       cgraph_will_be_removed_from_program_if_no_direct_calls  */
 518
 519                    || !cgraph_can_remove_if_no_direct_calls_p (callee)
 520                    || estimate_growth (callee) > 0))
 521         {
 522           e->inline_failed = CIF_UNLIKELY_CALL;
 523           want_inline = false;
 524         }
 525     }
 526   if (!want_inline && report)
 527     report_inline_failed_reason (e);
 528   return want_inline;
 529 }
 530
 531 /* EDGE is self recursive edge.
 532    We hand two cases - when function A is inlining into itself
 533    or when function A is being inlined into another inliner copy of function
 534    A within function B.
 535
 536    In first case OUTER_NODE points to the toplevel copy of A, while
 537    in the second case OUTER_NODE points to the outermost copy of A in B.
 538
 539    In both cases we want to be extra selective since
 540    inlining the call will just introduce new recursive calls to appear.  */
 541
 542 static bool
 543 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 544                                    struct cgraph_node *outer_node,
 545                                    bool peeling,
 546                                    int depth)
 547 {
 548   char const *reason = NULL;
 549   bool want_inline = true;
 550   int caller_freq = CGRAPH_FREQ_BASE;
 551   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 552
 553   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 554     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 555
 556   if (!cgraph_maybe_hot_edge_p (edge))
 557     {
 558       reason = "recursive call is cold";
 559       want_inline = false;
 560     }
 561   else if (max_count && !outer_node->count)
 562     {
 563       reason = "not executed in profile";
 564       want_inline = false;
 565     }
 566   else if (depth > max_depth)
 567     {
 568       reason = "--param max-inline-recursive-depth exceeded.";
 569       want_inline = false;
 570     }
 571
 572   if (outer_node->global.inlined_to)
 573     caller_freq = outer_node->callers->frequency;
 574
 575   if (!want_inline)
 576     ;
 577   /* Inlining of self recursive function into copy of itself within other function
 578      is transformation similar to loop peeling.
 579
 580      Peeling is profitable if we can inline enough copies to make probability
 581      of actual call to the self recursive function very small.  Be sure that
 582      the probability of recursion is small.
 583
 584      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 585      This way the expected number of recision is at most max_depth.  */
 586   else if (peeling)
 587     {
 588       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 589                                          / max_depth);
 590       int i;
 591       for (i = 1; i < depth; i++)
 592         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 593       if (max_count
 594           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 595               >= max_prob))
 596         {
 597           reason = "profile of recursive call is too large";
 598           want_inline = false;
 599         }
 600       if (!max_count
 601           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 602               >= max_prob))
 603         {
 604           reason = "frequency of recursive call is too large";
 605           want_inline = false;
 606         }
 607     }
 608   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 609      depth is large.  We reduce function call overhead and increase chances that
 610      things fit in hardware return predictor.
 611
 612      Recursive inlining might however increase cost of stack frame setup
 613      actually slowing down functions whose recursion tree is wide rather than
 614      deep.
 615
 616      Deciding reliably on when to do recursive inlining without profile feedback
 617      is tricky.  For now we disable recursive inlining when probability of self
 618      recursion is low.
 619
 620      Recursive inlining of self recursive call within loop also results in large loop
 621      depths that generally optimize badly.  We may want to throttle down inlining
 622      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 623      methods.  */
 624   else
 625     {
 626       if (max_count
 627           && (edge->count * 100 / outer_node->count
 628               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 629         {
 630           reason = "profile of recursive call is too small";
 631           want_inline = false;
 632         }
 633       else if (!max_count
 634                && (edge->frequency * 100 / caller_freq
 635                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 636         {
 637           reason = "frequency of recursive call is too small";
 638           want_inline = false;
 639         }
 640     }
 641   if (!want_inline && dump_file)
 642     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 643   return want_inline;
 644 }
 645
 646
 647 /* Decide if NODE is called once inlining it would eliminate need
 648    for the offline copy of function.  */
 649
 650 static bool
 651 want_inline_function_called_once_p (struct cgraph_node *node)
 652 {
 653    if (node->alias)
 654      return false;
 655    /* Already inlined?  */
 656    if (node->global.inlined_to)
 657      return false;
 658    /* Zero or more then one callers?  */
 659    if (!node->callers
 660        || node->callers->next_caller)
 661      return false;
 662    /* Recursive call makes no sense to inline.  */
 663    if (node->callers->caller == node)
 664      return false;
 665    /* External functions are not really in the unit, so inlining
 666       them when called once would just increase the program size.  */
 667    if (DECL_EXTERNAL (node->decl))
 668      return false;
 669    /* Offline body must be optimized out.  */
 670    if (!cgraph_will_be_removed_from_program_if_no_direct_calls (node))
 671      return false;
 672    if (!can_inline_edge_p (node->callers, true))
 673      return false;
 674    return true;
 675 }
 676
 677
 678 /* Return relative time improvement for inlining EDGE in range
 679    1...2^9.  */
 680
 681 static inline int
 682 relative_time_benefit (struct inline_summary *callee_info,
 683                        struct cgraph_edge *edge,
 684                        int time_growth)
 685 {
 686   int relbenefit;
 687   gcov_type uninlined_call_time;
 688
 689   uninlined_call_time =
 690     ((gcov_type)
 691      (callee_info->time
 692       + inline_edge_summary (edge)->call_stmt_time
 693       + CGRAPH_FREQ_BASE / 2) * edge->frequency
 694      / CGRAPH_FREQ_BASE);
 695   /* Compute relative time benefit, i.e. how much the call becomes faster.
 696      ??? perhaps computing how much the caller+calle together become faster
 697      would lead to more realistic results.  */
 698   if (!uninlined_call_time)
 699     uninlined_call_time = 1;
 700   relbenefit =
 701     (uninlined_call_time - time_growth) * 256 / (uninlined_call_time);
 702   relbenefit = MIN (relbenefit, 512);
 703   relbenefit = MAX (relbenefit, 1);
 704   return relbenefit;
 705 }
 706
 707
 708 /* A cost model driving the inlining heuristics in a way so the edges with
 709    smallest badness are inlined first.  After each inlining is performed
 710    the costs of all caller edges of nodes affected are recomputed so the
 711    metrics may accurately depend on values such as number of inlinable callers
 712    of the function or function body size.  */
 713
 714 static int
 715 edge_badness (struct cgraph_edge *edge, bool dump)
 716 {
 717   gcov_type badness;
 718   int growth, time_growth;
 719   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 720                                                               NULL);
 721   struct inline_summary *callee_info = inline_summary (callee);
 722
 723   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 724     return INT_MIN;
 725
 726   growth = estimate_edge_growth (edge);
 727   time_growth = estimate_edge_time (edge);
 728
 729   if (dump)
 730     {
 731       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
 732                cgraph_node_name (edge->caller),
 733                cgraph_node_name (callee));
 734       fprintf (dump_file, "      size growth %i, time growth %i\n",
 735                growth,
 736                time_growth);
 737     }
 738
 739   /* Always prefer inlining saving code size.  */
 740   if (growth <= 0)
 741     {
 742       badness = INT_MIN / 2 + growth;
 743       if (dump)
 744         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 745                  growth);
 746     }
 747
 748   /* When profiling is available, compute badness as:
 749
 750                 relative_edge_count * relative_time_benefit
 751      goodness = -------------------------------------------
 752                 edge_growth
 753      badness = -goodness
 754
 755     The fraction is upside down, becuase on edge counts and time beneits
 756     the bounds are known. Edge growth is essentially unlimited.  */
 757
 758   else if (max_count)
 759     {
 760       int relbenefit = relative_time_benefit (callee_info, edge, time_growth);
 761       badness =
 762         ((int)
 763          ((double) edge->count * INT_MIN / 2 / max_count / 512) *
 764          relative_time_benefit (callee_info, edge, time_growth)) / growth;
 765
 766       /* Be sure that insanity of the profile won't lead to increasing counts
 767          in the scalling and thus to overflow in the computation above.  */
 768       gcc_assert (max_count >= edge->count);
 769       if (dump)
 770         {
 771           fprintf (dump_file,
 772                    "      %i (relative %f): profile info. Relative count %f"
 773                    " * Relative benefit %f\n",
 774                    (int) badness, (double) badness / INT_MIN,
 775                    (double) edge->count / max_count,
 776                    relbenefit * 100 / 256.0);
 777         }
 778     }
 779
 780   /* When function local profile is available. Compute badness as:
 781
 782
 783                growth_of_callee
 784      badness = -------------------------------------- + growth_for-all
 785                relative_time_benefit * edge_frequency
 786
 787   */
 788   else if (flag_guess_branch_prob)
 789     {
 790       int div = edge->frequency * (1<<10) / CGRAPH_FREQ_MAX;
 791       int growth_for_all;
 792
 793       div = MAX (div, 1);
 794       gcc_checking_assert (edge->frequency <= CGRAPH_FREQ_MAX);
 795       div *= relative_time_benefit (callee_info, edge, time_growth);
 796
 797       /* frequency is normalized in range 1...2^10.
 798          relbenefit in range 1...2^9
 799          DIV should be in range 1....2^19.  */
 800       gcc_checking_assert (div >= 1 && div <= (1<<19));
 801
 802       /* Result must be integer in range 0...INT_MAX.
 803          Set the base of fixed point calculation so we don't lose much of
 804          precision for small bandesses (those are interesting) yet we don't
 805          overflow for growths that are still in interesting range.  */
 806       badness = ((gcov_type)growth) * (1<<18);
 807       badness = (badness + div / 2) / div;
 808
 809       /* Overall growth of inlining all calls of function matters: we want to
 810          inline so offline copy of function is no longer needed.
 811
 812          Additionally functions that can be fully inlined without much of
 813          effort are better inline candidates than functions that can be fully
 814          inlined only after noticeable overall unit growths. The latter
 815          are better in a sense compressing of code size by factoring out common
 816          code into separate function shared by multiple code paths.
 817
 818          We might mix the valud into the fraction by taking into account
 819          relative growth of the unit, but for now just add the number
 820          into resulting fraction.  */
 821       growth_for_all = estimate_growth (callee);
 822       badness += growth_for_all;
 823       if (badness > INT_MAX - 1)
 824         badness = INT_MAX - 1;
 825       if (dump)
 826         {
 827           fprintf (dump_file,
 828                    "      %i: guessed profile. frequency %f, overall growth %i,"
 829                    " benefit %f%%, divisor %i\n",
 830                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE, growth_for_all,
 831                    relative_time_benefit (callee_info, edge, time_growth) * 100 / 256.0, div);
 832         }
 833     }
 834   /* When function local profile is not available or it does not give
 835      useful information (ie frequency is zero), base the cost on
 836      loop nest and overall size growth, so we optimize for overall number
 837      of functions fully inlined in program.  */
 838   else
 839     {
 840       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 841       badness = estimate_growth (callee) * 256;
 842
 843       /* Decrease badness if call is nested.  */
 844       if (badness > 0)
 845         badness >>= nest;
 846       else
 847         {
 848           badness <<= nest;
 849         }
 850       if (dump)
 851         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
 852                  nest);
 853     }
 854
 855   /* Ensure that we did not overflow in all the fixed point math above.  */
 856   gcc_assert (badness >= INT_MIN);
 857   gcc_assert (badness <= INT_MAX - 1);
 858   /* Make recursive inlining happen always after other inlining is done.  */
 859   if (cgraph_edge_recursive_p (edge))
 860     return badness + 1;
 861   else
 862     return badness;
 863 }
 864
 865 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
 866 static inline void
 867 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
 868 {
 869   int badness = edge_badness (edge, false);
 870   if (edge->aux)
 871     {
 872       fibnode_t n = (fibnode_t) edge->aux;
 873       gcc_checking_assert (n->data == edge);
 874
 875       /* fibheap_replace_key only decrease the keys.
 876          When we increase the key we do not update heap
 877          and instead re-insert the element once it becomes
 878          a minimum of heap.  */
 879       if (badness < n->key)
 880         {
 881           if (dump_file && (dump_flags & TDF_DETAILS))
 882             {
 883               fprintf (dump_file,
 884                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
 885                        cgraph_node_name (edge->caller), edge->caller->uid,
 886                        cgraph_node_name (edge->callee), edge->callee->uid,
 887                        (int)n->key,
 888                        badness);
 889             }
 890           fibheap_replace_key (heap, n, badness);
 891           gcc_checking_assert (n->key == badness);
 892         }
 893     }
 894   else
 895     {
 896        if (dump_file && (dump_flags & TDF_DETAILS))
 897          {
 898            fprintf (dump_file,
 899                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
 900                     cgraph_node_name (edge->caller), edge->caller->uid,
 901                     cgraph_node_name (edge->callee), edge->callee->uid,
 902                     badness);
 903          }
 904       edge->aux = fibheap_insert (heap, badness, edge);
 905     }
 906 }
 907
 908
 909 /* NODE was inlined.
 910    All caller edges needs to be resetted because
 911    size estimates change. Similarly callees needs reset
 912    because better context may be known.  */
 913
 914 static void
 915 reset_edge_caches (struct cgraph_node *node)
 916 {
 917   struct cgraph_edge *edge;
 918   struct cgraph_edge *e = node->callees;
 919   struct cgraph_node *where = node;
 920
 921   if (where->global.inlined_to)
 922     where = where->global.inlined_to;
 923
 924   /* WHERE body size has changed, the cached growth is invalid.  */
 925   reset_node_growth_cache (where);
 926
 927   for (edge = where->callers; edge; edge = edge->next_caller)
 928     if (edge->inline_failed)
 929       reset_edge_growth_cache (edge);
 930
 931   if (!e)
 932     return;
 933
 934   while (true)
 935     if (!e->inline_failed && e->callee->callees)
 936       e = e->callee->callees;
 937     else
 938       {
 939         if (e->inline_failed)
 940           reset_edge_growth_cache (e);
 941         if (e->next_callee)
 942           e = e->next_callee;
 943         else
 944           {
 945             do
 946               {
 947                 if (e->caller == node)
 948                   return;
 949                 e = e->caller->callers;
 950               }
 951             while (!e->next_callee);
 952             e = e->next_callee;
 953           }
 954       }
 955 }
 956
 957 /* Recompute HEAP nodes for each of caller of NODE.
 958    UPDATED_NODES track nodes we already visited, to avoid redundant work.
 959    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
 960    it is inlinable. Otherwise check all edges.  */
 961
 962 static void
 963 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
 964                     bitmap updated_nodes,
 965                     struct cgraph_edge *check_inlinablity_for)
 966 {
 967   struct cgraph_edge *edge;
 968
 969   if (!inline_summary (node)->inlinable
 970       || cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE
 971       || node->global.inlined_to)
 972     return;
 973   if (!bitmap_set_bit (updated_nodes, node->uid))
 974     return;
 975
 976   for (edge = node->callers; edge; edge = edge->next_caller)
 977     if (edge->inline_failed)
 978       {
 979         if (!check_inlinablity_for
 980             || check_inlinablity_for == edge)
 981           {
 982             if (can_inline_edge_p (edge, false)
 983                 && want_inline_small_function_p (edge, false))
 984               update_edge_key (heap, edge);
 985             else if (edge->aux)
 986               {
 987                 report_inline_failed_reason (edge);
 988                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
 989                 edge->aux = NULL;
 990               }
 991           }
 992         else if (edge->aux)
 993           update_edge_key (heap, edge);
 994       }
 995 }
 996
 997 /* Recompute HEAP nodes for each uninlined call in NODE.
 998    This is used when we know that edge badnesses are going only to increase
 999    (we introduced new call site) and thus all we need is to insert newly
1000    created edges into heap.  */
1001
1002 static void
1003 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1004                     bitmap updated_nodes)
1005 {
1006   struct cgraph_edge *e = node->callees;
1007
1008   if (!e)
1009     return;
1010   while (true)
1011     if (!e->inline_failed && e->callee->callees)
1012       e = e->callee->callees;
1013     else
1014       {
1015         enum availability avail;
1016         struct cgraph_node *callee;
1017         /* We do not reset callee growth cache here.  Since we added a new call,
1018            growth chould have just increased and consequentely badness metric
1019            don't need updating.  */
1020         if (e->inline_failed
1021             && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1022             && inline_summary (callee)->inlinable
1023             && cgraph_function_body_availability (callee) >= AVAIL_AVAILABLE
1024             && !bitmap_bit_p (updated_nodes, callee->uid))
1025           {
1026             if (can_inline_edge_p (e, false)
1027                 && want_inline_small_function_p (e, false))
1028               update_edge_key (heap, e);
1029             else if (e->aux)
1030               {
1031                 report_inline_failed_reason (e);
1032                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1033                 e->aux = NULL;
1034               }
1035           }
1036         if (e->next_callee)
1037           e = e->next_callee;
1038         else
1039           {
1040             do
1041               {
1042                 if (e->caller == node)
1043                   return;
1044                 e = e->caller->callers;
1045               }
1046             while (!e->next_callee);
1047             e = e->next_callee;
1048           }
1049       }
1050 }
1051
1052 /* Recompute heap nodes for each of caller edges of each of callees.
1053    Walk recursively into all inline clones.  */
1054
1055 static void
1056 update_all_callee_keys (fibheap_t heap, struct cgraph_node *node,
1057                         bitmap updated_nodes)
1058 {
1059   struct cgraph_edge *e = node->callees;
1060   if (!e)
1061     return;
1062   while (true)
1063     if (!e->inline_failed && e->callee->callees)
1064       e = e->callee->callees;
1065     else
1066       {
1067         struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
1068                                                                     NULL);
1069
1070         /* We inlined and thus callees might have different number of calls.
1071            Reset their caches  */
1072         reset_node_growth_cache (callee);
1073         if (e->inline_failed)
1074           update_caller_keys (heap, callee, updated_nodes, e);
1075         if (e->next_callee)
1076           e = e->next_callee;
1077         else
1078           {
1079             do
1080               {
1081                 if (e->caller == node)
1082                   return;
1083                 e = e->caller->callers;
1084               }
1085             while (!e->next_callee);
1086             e = e->next_callee;
1087           }
1088       }
1089 }
1090
1091 /* Enqueue all recursive calls from NODE into priority queue depending on
1092    how likely we want to recursively inline the call.  */
1093
1094 static void
1095 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1096                         fibheap_t heap)
1097 {
1098   struct cgraph_edge *e;
1099   enum availability avail;
1100
1101   for (e = where->callees; e; e = e->next_callee)
1102     if (e->callee == node
1103         || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1104             && avail > AVAIL_OVERWRITABLE))
1105       {
1106         /* When profile feedback is available, prioritize by expected number
1107            of calls.  */
1108         fibheap_insert (heap,
1109                         !max_count ? -e->frequency
1110                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1111                         e);
1112       }
1113   for (e = where->callees; e; e = e->next_callee)
1114     if (!e->inline_failed)
1115       lookup_recursive_calls (node, e->callee, heap);
1116 }
1117
1118 /* Decide on recursive inlining: in the case function has recursive calls,
1119    inline until body size reaches given argument.  If any new indirect edges
1120    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1121    is NULL.  */
1122
1123 static bool
1124 recursive_inlining (struct cgraph_edge *edge,
1125                     VEC (cgraph_edge_p, heap) **new_edges)
1126 {
1127   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1128   fibheap_t heap;
1129   struct cgraph_node *node;
1130   struct cgraph_edge *e;
1131   struct cgraph_node *master_clone = NULL, *next;
1132   int depth = 0;
1133   int n = 0;
1134
1135   node = edge->caller;
1136   if (node->global.inlined_to)
1137     node = node->global.inlined_to;
1138
1139   if (DECL_DECLARED_INLINE_P (node->decl))
1140     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1141
1142   /* Make sure that function is small enough to be considered for inlining.  */
1143   if (estimate_size_after_inlining (node, edge)  >= limit)
1144     return false;
1145   heap = fibheap_new ();
1146   lookup_recursive_calls (node, node, heap);
1147   if (fibheap_empty (heap))
1148     {
1149       fibheap_delete (heap);
1150       return false;
1151     }
1152
1153   if (dump_file)
1154     fprintf (dump_file,
1155              "  Performing recursive inlining on %s\n",
1156              cgraph_node_name (node));
1157
1158   /* Do the inlining and update list of recursive call during process.  */
1159   while (!fibheap_empty (heap))
1160     {
1161       struct cgraph_edge *curr
1162         = (struct cgraph_edge *) fibheap_extract_min (heap);
1163       struct cgraph_node *cnode;
1164
1165       if (estimate_size_after_inlining (node, curr) > limit)
1166         break;
1167
1168       if (!can_inline_edge_p (curr, true))
1169         continue;
1170
1171       depth = 1;
1172       for (cnode = curr->caller;
1173            cnode->global.inlined_to; cnode = cnode->callers->caller)
1174         if (node->decl == curr->callee->decl)
1175           depth++;
1176
1177       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1178         continue;
1179
1180       if (dump_file)
1181         {
1182           fprintf (dump_file,
1183                    "   Inlining call of depth %i", depth);
1184           if (node->count)
1185             {
1186               fprintf (dump_file, " called approx. %.2f times per call",
1187                        (double)curr->count / node->count);
1188             }
1189           fprintf (dump_file, "\n");
1190         }
1191       if (!master_clone)
1192         {
1193           /* We need original clone to copy around.  */
1194           master_clone = cgraph_clone_node (node, node->decl,
1195                                             node->count, CGRAPH_FREQ_BASE,
1196                                             false, NULL, true);
1197           for (e = master_clone->callees; e; e = e->next_callee)
1198             if (!e->inline_failed)
1199               clone_inlined_nodes (e, true, false, NULL);
1200         }
1201
1202       cgraph_redirect_edge_callee (curr, master_clone);
1203       inline_call (curr, false, new_edges, &overall_size);
1204       lookup_recursive_calls (node, curr->callee, heap);
1205       n++;
1206     }
1207
1208   if (!fibheap_empty (heap) && dump_file)
1209     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1210   fibheap_delete (heap);
1211
1212   if (!master_clone)
1213     return false;
1214
1215   if (dump_file)
1216     fprintf (dump_file,
1217              "\n   Inlined %i times, "
1218              "body grown from size %i to %i, time %i to %i\n", n,
1219              inline_summary (master_clone)->size, inline_summary (node)->size,
1220              inline_summary (master_clone)->time, inline_summary (node)->time);
1221
1222   /* Remove master clone we used for inlining.  We rely that clones inlined
1223      into master clone gets queued just before master clone so we don't
1224      need recursion.  */
1225   for (node = cgraph_nodes; node != master_clone;
1226        node = next)
1227     {
1228       next = node->next;
1229       if (node->global.inlined_to == master_clone)
1230         cgraph_remove_node (node);
1231     }
1232   cgraph_remove_node (master_clone);
1233   return true;
1234 }
1235
1236
1237 /* Given whole compilation unit estimate of INSNS, compute how large we can
1238    allow the unit to grow.  */
1239
1240 static int
1241 compute_max_insns (int insns)
1242 {
1243   int max_insns = insns;
1244   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1245     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1246
1247   return ((HOST_WIDEST_INT) max_insns
1248           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1249 }
1250
1251
1252 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1253
1254 static void
1255 add_new_edges_to_heap (fibheap_t heap, VEC (cgraph_edge_p, heap) *new_edges)
1256 {
1257   while (VEC_length (cgraph_edge_p, new_edges) > 0)
1258     {
1259       struct cgraph_edge *edge = VEC_pop (cgraph_edge_p, new_edges);
1260
1261       gcc_assert (!edge->aux);
1262       if (edge->inline_failed
1263           && can_inline_edge_p (edge, true)
1264           && want_inline_small_function_p (edge, true))
1265         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1266     }
1267 }
1268
1269
1270 /* We use greedy algorithm for inlining of small functions:
1271    All inline candidates are put into prioritized heap ordered in
1272    increasing badness.
1273
1274    The inlining of small functions is bounded by unit growth parameters.  */
1275
1276 static void
1277 inline_small_functions (void)
1278 {
1279   struct cgraph_node *node;
1280   struct cgraph_edge *edge;
1281   fibheap_t heap = fibheap_new ();
1282   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1283   int min_size, max_size;
1284   VEC (cgraph_edge_p, heap) *new_indirect_edges = NULL;
1285   int initial_size = 0;
1286
1287   if (flag_indirect_inlining)
1288     new_indirect_edges = VEC_alloc (cgraph_edge_p, heap, 8);
1289
1290   if (dump_file)
1291     fprintf (dump_file,
1292              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1293              initial_size);
1294
1295   /* Compute overall unit size and other global parameters used by badness
1296      metrics.  */
1297
1298   max_count = 0;
1299   initialize_growth_caches ();
1300
1301   FOR_EACH_DEFINED_FUNCTION (node)
1302     if (!node->global.inlined_to)
1303       {
1304         if (cgraph_function_with_gimple_body_p (node)
1305             || node->thunk.thunk_p)
1306           {
1307             struct inline_summary *info = inline_summary (node);
1308
1309             if (!DECL_EXTERNAL (node->decl))
1310               initial_size += info->size;
1311           }
1312
1313         for (edge = node->callers; edge; edge = edge->next_caller)
1314           if (max_count < edge->count)
1315             max_count = edge->count;
1316       }
1317
1318   overall_size = initial_size;
1319   max_size = compute_max_insns (overall_size);
1320   min_size = overall_size;
1321
1322   /* Populate the heeap with all edges we might inline.  */
1323
1324   FOR_EACH_DEFINED_FUNCTION (node)
1325     if (!node->global.inlined_to)
1326       {
1327         if (dump_file)
1328           fprintf (dump_file, "Enqueueing calls of %s/%i.\n",
1329                    cgraph_node_name (node), node->uid);
1330
1331         for (edge = node->callers; edge; edge = edge->next_caller)
1332           if (edge->inline_failed
1333               && can_inline_edge_p (edge, true)
1334               && want_inline_small_function_p (edge, true)
1335               && edge->inline_failed)
1336             {
1337               gcc_assert (!edge->aux);
1338               update_edge_key (heap, edge);
1339             }
1340       }
1341
1342   gcc_assert (in_lto_p
1343               || !max_count
1344               || (profile_info && flag_branch_probabilities));
1345
1346   while (!fibheap_empty (heap))
1347     {
1348       int old_size = overall_size;
1349       struct cgraph_node *where, *callee;
1350       int badness = fibheap_min_key (heap);
1351       int current_badness;
1352       int growth;
1353
1354       edge = (struct cgraph_edge *) fibheap_extract_min (heap);
1355       gcc_assert (edge->aux);
1356       edge->aux = NULL;
1357       if (!edge->inline_failed)
1358         continue;
1359
1360       /* Be sure that caches are maintained consistent.  */
1361 #ifdef ENABLE_CHECKING
1362       reset_edge_growth_cache (edge);
1363       reset_node_growth_cache (edge->callee);
1364 #endif
1365
1366       /* When updating the edge costs, we only decrease badness in the keys.
1367          Increases of badness are handled lazilly; when we see key with out
1368          of date value on it, we re-insert it now.  */
1369       current_badness = edge_badness (edge, false);
1370       gcc_assert (current_badness >= badness);
1371       if (current_badness != badness)
1372         {
1373           edge->aux = fibheap_insert (heap, current_badness, edge);
1374           continue;
1375         }
1376
1377       if (!can_inline_edge_p (edge, true))
1378         continue;
1379
1380       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1381       growth = estimate_edge_growth (edge);
1382       if (dump_file)
1383         {
1384           fprintf (dump_file,
1385                    "\nConsidering %s with %i size\n",
1386                    cgraph_node_name (callee),
1387                    inline_summary (callee)->size);
1388           fprintf (dump_file,
1389                    " to be inlined into %s in %s:%i\n"
1390                    " Estimated growth after inlined into all is %+i insns.\n"
1391                    " Estimated badness is %i, frequency %.2f.\n",
1392                    cgraph_node_name (edge->caller),
1393                    flag_wpa ? "unknown"
1394                    : gimple_filename ((const_gimple) edge->call_stmt),
1395                    flag_wpa ? -1
1396                    : gimple_lineno ((const_gimple) edge->call_stmt),
1397                    estimate_growth (callee),
1398                    badness,
1399                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1400           if (edge->count)
1401             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1402                      edge->count);
1403           if (dump_flags & TDF_DETAILS)
1404             edge_badness (edge, true);
1405         }
1406
1407       if (overall_size + growth > max_size
1408           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1409         {
1410           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1411           report_inline_failed_reason (edge);
1412           continue;
1413         }
1414
1415       if (!want_inline_small_function_p (edge, true))
1416         continue;
1417
1418       /* Heuristics for inlining small functions works poorly for
1419          recursive calls where we do efect similar to loop unrolling.
1420          When inliing such edge seems profitable, leave decision on
1421          specific inliner.  */
1422       if (cgraph_edge_recursive_p (edge))
1423         {
1424           where = edge->caller;
1425           if (where->global.inlined_to)
1426             where = where->global.inlined_to;
1427           if (!recursive_inlining (edge,
1428                                    flag_indirect_inlining
1429                                    ? &new_indirect_edges : NULL))
1430             {
1431               edge->inline_failed = CIF_RECURSIVE_INLINING;
1432               continue;
1433             }
1434           reset_edge_caches (where);
1435           /* Recursive inliner inlines all recursive calls of the function
1436              at once. Consequently we need to update all callee keys.  */
1437           if (flag_indirect_inlining)
1438             add_new_edges_to_heap (heap, new_indirect_edges);
1439           update_all_callee_keys (heap, where, updated_nodes);
1440         }
1441       else
1442         {
1443           struct cgraph_node *outer_node = NULL;
1444           int depth = 0;
1445
1446           /* Consider the case where self recursive function A is inlined into B.
1447              This is desired optimization in some cases, since it leads to effect
1448              similar of loop peeling and we might completely optimize out the
1449              recursive call.  However we must be extra selective.  */
1450
1451           where = edge->caller;
1452           while (where->global.inlined_to)
1453             {
1454               if (where->decl == edge->callee->decl)
1455                 outer_node = where, depth++;
1456               where = where->callers->caller;
1457             }
1458           if (outer_node
1459               && !want_inline_self_recursive_call_p (edge, outer_node,
1460                                                      true, depth))
1461             {
1462               edge->inline_failed
1463                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1464                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1465               continue;
1466             }
1467           else if (depth && dump_file)
1468             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1469
1470           gcc_checking_assert (!callee->global.inlined_to);
1471           inline_call (edge, true, &new_indirect_edges, &overall_size);
1472           if (flag_indirect_inlining)
1473             add_new_edges_to_heap (heap, new_indirect_edges);
1474
1475           reset_edge_caches (edge->callee);
1476           reset_node_growth_cache (callee);
1477
1478           /* We inlined last offline copy to the body.  This might lead
1479              to callees of function having fewer call sites and thus they
1480              may need updating.  */
1481           if (callee->global.inlined_to)
1482             update_all_callee_keys (heap, callee, updated_nodes);
1483           else
1484             update_callee_keys (heap, edge->callee, updated_nodes);
1485         }
1486       where = edge->caller;
1487       if (where->global.inlined_to)
1488         where = where->global.inlined_to;
1489
1490       /* Our profitability metric can depend on local properties
1491          such as number of inlinable calls and size of the function body.
1492          After inlining these properties might change for the function we
1493          inlined into (since it's body size changed) and for the functions
1494          called by function we inlined (since number of it inlinable callers
1495          might change).  */
1496       update_caller_keys (heap, where, updated_nodes, NULL);
1497
1498       /* We removed one call of the function we just inlined.  If offline
1499          copy is still needed, be sure to update the keys.  */
1500       if (callee != where && !callee->global.inlined_to)
1501         update_caller_keys (heap, callee, updated_nodes, NULL);
1502       bitmap_clear (updated_nodes);
1503
1504       if (dump_file)
1505         {
1506           fprintf (dump_file,
1507                    " Inlined into %s which now has time %i and size %i,"
1508                    "net change of %+i.\n",
1509                    cgraph_node_name (edge->caller),
1510                    inline_summary (edge->caller)->time,
1511                    inline_summary (edge->caller)->size,
1512                    overall_size - old_size);
1513         }
1514       if (min_size > overall_size)
1515         {
1516           min_size = overall_size;
1517           max_size = compute_max_insns (min_size);
1518
1519           if (dump_file)
1520             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1521         }
1522     }
1523
1524   free_growth_caches ();
1525   if (new_indirect_edges)
1526     VEC_free (cgraph_edge_p, heap, new_indirect_edges);
1527   fibheap_delete (heap);
1528   if (dump_file)
1529     fprintf (dump_file,
1530              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1531              initial_size, overall_size,
1532              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1533   BITMAP_FREE (updated_nodes);
1534 }
1535
1536 /* Flatten NODE.  Performed both during early inlining and
1537    at IPA inlining time.  */
1538
1539 static void
1540 flatten_function (struct cgraph_node *node, bool early)
1541 {
1542   struct cgraph_edge *e;
1543
1544   /* We shouldn't be called recursively when we are being processed.  */
1545   gcc_assert (node->aux == NULL);
1546
1547   node->aux = (void *) node;
1548
1549   for (e = node->callees; e; e = e->next_callee)
1550     {
1551       struct cgraph_node *orig_callee;
1552       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1553
1554       /* We've hit cycle?  It is time to give up.  */
1555       if (callee->aux)
1556         {
1557           if (dump_file)
1558             fprintf (dump_file,
1559                      "Not inlining %s into %s to avoid cycle.\n",
1560                      cgraph_node_name (callee),
1561                      cgraph_node_name (e->caller));
1562           e->inline_failed = CIF_RECURSIVE_INLINING;
1563           continue;
1564         }
1565
1566       /* When the edge is already inlined, we just need to recurse into
1567          it in order to fully flatten the leaves.  */
1568       if (!e->inline_failed)
1569         {
1570           flatten_function (callee, early);
1571           continue;
1572         }
1573
1574       /* Flatten attribute needs to be processed during late inlining. For
1575          extra code quality we however do flattening during early optimization,
1576          too.  */
1577       if (!early
1578           ? !can_inline_edge_p (e, true)
1579           : !can_early_inline_edge_p (e))
1580         continue;
1581
1582       if (cgraph_edge_recursive_p (e))
1583         {
1584           if (dump_file)
1585             fprintf (dump_file, "Not inlining: recursive call.\n");
1586           continue;
1587         }
1588
1589       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1590           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1591         {
1592           if (dump_file)
1593             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1594           continue;
1595         }
1596
1597       /* Inline the edge and flatten the inline clone.  Avoid
1598          recursing through the original node if the node was cloned.  */
1599       if (dump_file)
1600         fprintf (dump_file, " Inlining %s into %s.\n",
1601                  cgraph_node_name (callee),
1602                  cgraph_node_name (e->caller));
1603       orig_callee = callee;
1604       inline_call (e, true, NULL, NULL);
1605       if (e->callee != orig_callee)
1606         orig_callee->aux = (void *) node;
1607       flatten_function (e->callee, early);
1608       if (e->callee != orig_callee)
1609         orig_callee->aux = NULL;
1610     }
1611
1612   node->aux = NULL;
1613 }
1614
1615 /* Decide on the inlining.  We do so in the topological order to avoid
1616    expenses on updating data structures.  */
1617
1618 static unsigned int
1619 ipa_inline (void)
1620 {
1621   struct cgraph_node *node;
1622   int nnodes;
1623   struct cgraph_node **order =
1624     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1625   int i;
1626
1627   if (in_lto_p && flag_indirect_inlining)
1628     ipa_update_after_lto_read ();
1629   if (flag_indirect_inlining)
1630     ipa_create_all_structures_for_iinln ();
1631
1632   if (dump_file)
1633     dump_inline_summaries (dump_file);
1634
1635   nnodes = ipa_reverse_postorder (order);
1636
1637   for (node = cgraph_nodes; node; node = node->next)
1638     node->aux = 0;
1639
1640   if (dump_file)
1641     fprintf (dump_file, "\nFlattening functions:\n");
1642
1643   /* In the first pass handle functions to be flattened.  Do this with
1644      a priority so none of our later choices will make this impossible.  */
1645   for (i = nnodes - 1; i >= 0; i--)
1646     {
1647       node = order[i];
1648
1649       /* Handle nodes to be flattened.
1650          Ideally when processing callees we stop inlining at the
1651          entry of cycles, possibly cloning that entry point and
1652          try to flatten itself turning it into a self-recursive
1653          function.  */
1654       if (lookup_attribute ("flatten",
1655                             DECL_ATTRIBUTES (node->decl)) != NULL)
1656         {
1657           if (dump_file)
1658             fprintf (dump_file,
1659                      "Flattening %s\n", cgraph_node_name (node));
1660           flatten_function (node, false);
1661         }
1662     }
1663
1664   inline_small_functions ();
1665   cgraph_remove_unreachable_nodes (true, dump_file);
1666   free (order);
1667
1668   /* We already perform some inlining of functions called once during
1669      inlining small functions above.  After unreachable nodes are removed,
1670      we still might do a quick check that nothing new is found.  */
1671   if (flag_inline_functions_called_once)
1672     {
1673       int cold;
1674       if (dump_file)
1675         fprintf (dump_file, "\nDeciding on functions called once:\n");
1676
1677       /* Inlining one function called once has good chance of preventing
1678          inlining other function into the same callee.  Ideally we should
1679          work in priority order, but probably inlining hot functions first
1680          is good cut without the extra pain of maintaining the queue.
1681
1682          ??? this is not really fitting the bill perfectly: inlining function
1683          into callee often leads to better optimization of callee due to
1684          increased context for optimization.
1685          For example if main() function calls a function that outputs help
1686          and then function that does the main optmization, we should inline
1687          the second with priority even if both calls are cold by themselves.
1688
1689          We probably want to implement new predicate replacing our use of
1690          maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1691          to be hot.  */
1692       for (cold = 0; cold <= 1; cold ++)
1693         {
1694           for (node = cgraph_nodes; node; node = node->next)
1695             {
1696               if (want_inline_function_called_once_p (node)
1697                   && (cold
1698                       || cgraph_maybe_hot_edge_p (node->callers)))
1699                 {
1700                   struct cgraph_node *caller = node->callers->caller;
1701
1702                   if (dump_file)
1703                     {
1704                       fprintf (dump_file,
1705                                "\nInlining %s size %i.\n",
1706                                cgraph_node_name (node), inline_summary (node)->size);
1707                       fprintf (dump_file,
1708                                " Called once from %s %i insns.\n",
1709                                cgraph_node_name (node->callers->caller),
1710                                inline_summary (node->callers->caller)->size);
1711                     }
1712
1713                   inline_call (node->callers, true, NULL, NULL);
1714                   if (dump_file)
1715                     fprintf (dump_file,
1716                              " Inlined into %s which now has %i size\n",
1717                              cgraph_node_name (caller),
1718                              inline_summary (caller)->size);
1719                 }
1720             }
1721         }
1722     }
1723
1724   /* Free ipa-prop structures if they are no longer needed.  */
1725   if (flag_indirect_inlining)
1726     ipa_free_all_structures_after_iinln ();
1727
1728   if (dump_file)
1729     fprintf (dump_file,
1730              "\nInlined %i calls, eliminated %i functions\n\n",
1731              ncalls_inlined, nfunctions_inlined);
1732
1733   if (dump_file)
1734     dump_inline_summaries (dump_file);
1735   /* In WPA we use inline summaries for partitioning process.  */
1736   if (!flag_wpa)
1737     inline_free_summary ();
1738   return 0;
1739 }
1740
1741 /* Inline always-inline function calls in NODE.  */
1742
1743 static bool
1744 inline_always_inline_functions (struct cgraph_node *node)
1745 {
1746   struct cgraph_edge *e;
1747   bool inlined = false;
1748
1749   for (e = node->callees; e; e = e->next_callee)
1750     {
1751       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1752       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1753         continue;
1754
1755       if (cgraph_edge_recursive_p (e))
1756         {
1757           if (dump_file)
1758             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
1759                      cgraph_node_name (e->callee));
1760           e->inline_failed = CIF_RECURSIVE_INLINING;
1761           continue;
1762         }
1763
1764       if (!can_early_inline_edge_p (e))
1765         continue;
1766
1767       if (dump_file)
1768         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
1769                  cgraph_node_name (e->callee),
1770                  cgraph_node_name (e->caller));
1771       inline_call (e, true, NULL, NULL);
1772       inlined = true;
1773     }
1774
1775   return inlined;
1776 }
1777
1778 /* Decide on the inlining.  We do so in the topological order to avoid
1779    expenses on updating data structures.  */
1780
1781 static bool
1782 early_inline_small_functions (struct cgraph_node *node)
1783 {
1784   struct cgraph_edge *e;
1785   bool inlined = false;
1786
1787   for (e = node->callees; e; e = e->next_callee)
1788     {
1789       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1790       if (!inline_summary (callee)->inlinable
1791           || !e->inline_failed)
1792         continue;
1793
1794       /* Do not consider functions not declared inline.  */
1795       if (!DECL_DECLARED_INLINE_P (callee->decl)
1796           && !flag_inline_small_functions
1797           && !flag_inline_functions)
1798         continue;
1799
1800       if (dump_file)
1801         fprintf (dump_file, "Considering inline candidate %s.\n",
1802                  cgraph_node_name (callee));
1803
1804       if (!can_early_inline_edge_p (e))
1805         continue;
1806
1807       if (cgraph_edge_recursive_p (e))
1808         {
1809           if (dump_file)
1810             fprintf (dump_file, "  Not inlining: recursive call.\n");
1811           continue;
1812         }
1813
1814       if (!want_early_inline_function_p (e))
1815         continue;
1816
1817       if (dump_file)
1818         fprintf (dump_file, " Inlining %s into %s.\n",
1819                  cgraph_node_name (callee),
1820                  cgraph_node_name (e->caller));
1821       inline_call (e, true, NULL, NULL);
1822       inlined = true;
1823     }
1824
1825   return inlined;
1826 }
1827
1828 /* Do inlining of small functions.  Doing so early helps profiling and other
1829    passes to be somewhat more effective and avoids some code duplication in
1830    later real inlining pass for testcases with very many function calls.  */
1831 static unsigned int
1832 early_inliner (void)
1833 {
1834   struct cgraph_node *node = cgraph_get_node (current_function_decl);
1835   struct cgraph_edge *edge;
1836   unsigned int todo = 0;
1837   int iterations = 0;
1838   bool inlined = false;
1839
1840   if (seen_error ())
1841     return 0;
1842
1843   /* Do nothing if datastructures for ipa-inliner are already computed.  This
1844      happens when some pass decides to construct new function and
1845      cgraph_add_new_function calls lowering passes and early optimization on
1846      it.  This may confuse ourself when early inliner decide to inline call to
1847      function clone, because function clones don't have parameter list in
1848      ipa-prop matching their signature.  */
1849   if (ipa_node_params_vector)
1850     return 0;
1851
1852 #ifdef ENABLE_CHECKING
1853   verify_cgraph_node (node);
1854 #endif
1855
1856   /* Even when not optimizing or not inlining inline always-inline
1857      functions.  */
1858   inlined = inline_always_inline_functions (node);
1859
1860   if (!optimize
1861       || flag_no_inline
1862       || !flag_early_inlining
1863       /* Never inline regular functions into always-inline functions
1864          during incremental inlining.  This sucks as functions calling
1865          always inline functions will get less optimized, but at the
1866          same time inlining of functions calling always inline
1867          function into an always inline function might introduce
1868          cycles of edges to be always inlined in the callgraph.
1869
1870          We might want to be smarter and just avoid this type of inlining.  */
1871       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
1872     ;
1873   else if (lookup_attribute ("flatten",
1874                              DECL_ATTRIBUTES (node->decl)) != NULL)
1875     {
1876       /* When the function is marked to be flattened, recursively inline
1877          all calls in it.  */
1878       if (dump_file)
1879         fprintf (dump_file,
1880                  "Flattening %s\n", cgraph_node_name (node));
1881       flatten_function (node, true);
1882       inlined = true;
1883     }
1884   else
1885     {
1886       /* We iterate incremental inlining to get trivial cases of indirect
1887          inlining.  */
1888       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
1889              && early_inline_small_functions (node))
1890         {
1891           timevar_push (TV_INTEGRATION);
1892           todo |= optimize_inline_calls (current_function_decl);
1893
1894           /* Technically we ought to recompute inline parameters so the new
1895              iteration of early inliner works as expected.  We however have
1896              values approximately right and thus we only need to update edge
1897              info that might be cleared out for newly discovered edges.  */
1898           for (edge = node->callees; edge; edge = edge->next_callee)
1899             {
1900               struct inline_edge_summary *es = inline_edge_summary (edge);
1901               es->call_stmt_size
1902                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
1903               es->call_stmt_time
1904                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
1905             }
1906           timevar_pop (TV_INTEGRATION);
1907           iterations++;
1908           inlined = false;
1909         }
1910       if (dump_file)
1911         fprintf (dump_file, "Iterations: %i\n", iterations);
1912     }
1913
1914   if (inlined)
1915     {
1916       timevar_push (TV_INTEGRATION);
1917       todo |= optimize_inline_calls (current_function_decl);
1918       timevar_pop (TV_INTEGRATION);
1919     }
1920
1921   cfun->always_inline_functions_inlined = true;
1922
1923   return todo;
1924 }
1925
1926 struct gimple_opt_pass pass_early_inline =
1927 {
1928  {
1929   GIMPLE_PASS,
1930   "einline",                            /* name */
1931   NULL,                                 /* gate */
1932   early_inliner,                        /* execute */
1933   NULL,                                 /* sub */
1934   NULL,                                 /* next */
1935   0,                                    /* static_pass_number */
1936   TV_INLINE_HEURISTICS,                 /* tv_id */
1937   PROP_ssa,                             /* properties_required */
1938   0,                                    /* properties_provided */
1939   0,                                    /* properties_destroyed */
1940   0,                                    /* todo_flags_start */
1941   TODO_dump_func                        /* todo_flags_finish */
1942  }
1943 };
1944
1945
1946 /* When to run IPA inlining.  Inlining of always-inline functions
1947    happens during early inlining.  */
1948
1949 static bool
1950 gate_ipa_inline (void)
1951 {
1952   /* ???  We'd like to skip this if not optimizing or not inlining as
1953      all always-inline functions have been processed by early
1954      inlining already.  But this at least breaks EH with C++ as
1955      we need to unconditionally run fixup_cfg even at -O0.
1956      So leave it on unconditionally for now.  */
1957   return 1;
1958 }
1959
1960 struct ipa_opt_pass_d pass_ipa_inline =
1961 {
1962  {
1963   IPA_PASS,
1964   "inline",                             /* name */
1965   gate_ipa_inline,                      /* gate */
1966   ipa_inline,                           /* execute */
1967   NULL,                                 /* sub */
1968   NULL,                                 /* next */
1969   0,                                    /* static_pass_number */
1970   TV_INLINE_HEURISTICS,                 /* tv_id */
1971   0,                                    /* properties_required */
1972   0,                                    /* properties_provided */
1973   0,                                    /* properties_destroyed */
1974   TODO_remove_functions,                /* todo_flags_finish */
1975   TODO_dump_cgraph | TODO_dump_func
1976   | TODO_remove_functions | TODO_ggc_collect    /* todo_flags_finish */
1977  },
1978  inline_generate_summary,               /* generate_summary */
1979  inline_write_summary,                  /* write_summary */
1980  inline_read_summary,                   /* read_summary */
1981  NULL,                                  /* write_optimization_summary */
1982  NULL,                                  /* read_optimization_summary */
1983  NULL,                                  /* stmt_fixup */
1984  0,                                     /* TODOs */
1985  inline_transform,                      /* function_transform */
1986  NULL,                                  /* variable_transform */
1987 };