gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Follow clang and allow inlining for always_inline functions.  */
 268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 269     return true;
 270
 271   const sanitize_code codes[] =
 272     {
 273       SANITIZE_ADDRESS,
 274       SANITIZE_THREAD,
 275       SANITIZE_UNDEFINED,
 276       SANITIZE_UNDEFINED_NONDEFAULT,
 277       SANITIZE_POINTER_COMPARE,
 278       SANITIZE_POINTER_SUBTRACT
 279     };
 280
 281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
 282     if (sanitize_flags_p (codes[i], caller)
 283         != sanitize_flags_p (codes[i], callee))
 284       return false;
 285
 286   if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
 287     return false;
 288
 289   return true;
 290 }
 291
 292 /* Used for flags where it is safe to inline when caller's value is
 293    grater than callee's.  */
 294 #define check_maybe_up(flag) \
 295       (opts_for_fn (caller->decl)->x_##flag             \
 296        != opts_for_fn (callee->decl)->x_##flag          \
 297        && (!always_inline                               \
 298            || opts_for_fn (caller->decl)->x_##flag      \
 299               < opts_for_fn (callee->decl)->x_##flag))
 300 /* Used for flags where it is safe to inline when caller's value is
 301    smaller than callee's.  */
 302 #define check_maybe_down(flag) \
 303       (opts_for_fn (caller->decl)->x_##flag             \
 304        != opts_for_fn (callee->decl)->x_##flag          \
 305        && (!always_inline                               \
 306            || opts_for_fn (caller->decl)->x_##flag      \
 307               > opts_for_fn (callee->decl)->x_##flag))
 308 /* Used for flags where exact match is needed for correctness.  */
 309 #define check_match(flag) \
 310       (opts_for_fn (caller->decl)->x_##flag             \
 311        != opts_for_fn (callee->decl)->x_##flag)
 312
 313 /* Decide if we can inline the edge and possibly update
 314    inline_failed reason.
 315    We check whether inlining is possible at all and whether
 316    caller growth limits allow doing so.
 317
 318    if REPORT is true, output reason to the dump file. */
 319
 320 static bool
 321 can_inline_edge_p (struct cgraph_edge *e, bool report,
 322                    bool early = false)
 323 {
 324   gcc_checking_assert (e->inline_failed);
 325
 326   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 327     {
 328       if (report)
 329         report_inline_failed_reason (e);
 330       return false;
 331     }
 332
 333   bool inlinable = true;
 334   enum availability avail;
 335   cgraph_node *caller = (e->caller->inlined_to
 336                          ? e->caller->inlined_to : e->caller);
 337   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 338
 339   if (!callee->definition)
 340     {
 341       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 342       inlinable = false;
 343     }
 344   if (!early && (!opt_for_fn (callee->decl, optimize)
 345                  || !opt_for_fn (caller->decl, optimize)))
 346     {
 347       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 348       inlinable = false;
 349     }
 350   else if (callee->calls_comdat_local)
 351     {
 352       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 353       inlinable = false;
 354     }
 355   else if (avail <= AVAIL_INTERPOSABLE)
 356     {
 357       e->inline_failed = CIF_OVERWRITABLE;
 358       inlinable = false;
 359     }
 360   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 361      initialized to one of FINAL_ERROR reasons.  */
 362   else if (e->call_stmt_cannot_inline_p)
 363     gcc_unreachable ();
 364   /* Don't inline if the functions have different EH personalities.  */
 365   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 366            && DECL_FUNCTION_PERSONALITY (callee->decl)
 367            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 368                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 369     {
 370       e->inline_failed = CIF_EH_PERSONALITY;
 371       inlinable = false;
 372     }
 373   /* TM pure functions should not be inlined into non-TM_pure
 374      functions.  */
 375   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 376     {
 377       e->inline_failed = CIF_UNSPECIFIED;
 378       inlinable = false;
 379     }
 380   /* Check compatibility of target optimization options.  */
 381   else if (!targetm.target_option.can_inline_p (caller->decl,
 382                                                 callee->decl))
 383     {
 384       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 385       inlinable = false;
 386     }
 387   else if (ipa_fn_summaries->get (callee) == NULL
 388            || !ipa_fn_summaries->get (callee)->inlinable)
 389     {
 390       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 391       inlinable = false;
 392     }
 393   /* Don't inline a function with mismatched sanitization attributes. */
 394   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 395     {
 396       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
 397       inlinable = false;
 398     }
 399   if (!inlinable && report)
 400     report_inline_failed_reason (e);
 401   return inlinable;
 402 }
 403
 404 /* Return inlining_insns_single limit for function N.  If HINT or HINT2 is true
 405    scale up the bound.  */
 406
 407 static int
 408 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
 409 {
 410   if (hint && hint2)
 411     {
 412       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 413       spd = spd * spd;
 414       if (spd > 1000000)
 415         spd = 1000000;
 416       return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
 417     }
 418   if (hint || hint2)
 419     return opt_for_fn (n->decl, param_max_inline_insns_single)
 420            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 421   return opt_for_fn (n->decl, param_max_inline_insns_single);
 422 }
 423
 424 /* Return inlining_insns_auto limit for function N.  If HINT or HINT2 is true
 425    scale up the bound.   */
 426
 427 static int
 428 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
 429 {
 430   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 431   if (hint && hint2)
 432     {
 433       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 434       spd = spd * spd;
 435       if (spd > 1000000)
 436         spd = 1000000;
 437       return max_inline_insns_auto * spd / 100;
 438     }
 439   if (hint || hint2)
 440     return max_inline_insns_auto
 441            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 442   return max_inline_insns_auto;
 443 }
 444
 445 /* Decide if we can inline the edge and possibly update
 446    inline_failed reason.
 447    We check whether inlining is possible at all and whether
 448    caller growth limits allow doing so.
 449
 450    if REPORT is true, output reason to the dump file.
 451
 452    if DISREGARD_LIMITS is true, ignore size limits.  */
 453
 454 static bool
 455 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 456                              bool disregard_limits = false, bool early = false)
 457 {
 458   gcc_checking_assert (e->inline_failed);
 459
 460   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 461     {
 462       if (report)
 463         report_inline_failed_reason (e);
 464       return false;
 465     }
 466
 467   bool inlinable = true;
 468   enum availability avail;
 469   cgraph_node *caller = (e->caller->inlined_to
 470                          ? e->caller->inlined_to : e->caller);
 471   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 472   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 473   tree callee_tree
 474     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 475   /* Check if caller growth allows the inlining.  */
 476   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 477       && !disregard_limits
 478       && !lookup_attribute ("flatten",
 479                  DECL_ATTRIBUTES (caller->decl))
 480       && !caller_growth_limits (e))
 481     inlinable = false;
 482   else if (callee->externally_visible
 483            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 484            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 485     {
 486       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 487       inlinable = false;
 488     }
 489   /* Don't inline a function with a higher optimization level than the
 490      caller.  FIXME: this is really just tip of iceberg of handling
 491      optimization attribute.  */
 492   else if (caller_tree != callee_tree)
 493     {
 494       bool always_inline =
 495              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 496               && lookup_attribute ("always_inline",
 497                                    DECL_ATTRIBUTES (callee->decl)));
 498       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 499       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 500
 501      /* Until GCC 4.9 we did not check the semantics-altering flags
 502         below and inlined across optimization boundaries.
 503         Enabling checks below breaks several packages by refusing
 504         to inline library always_inline functions. See PR65873.
 505         Disable the check for early inlining for now until better solution
 506         is found.  */
 507      if (always_inline && early)
 508         ;
 509       /* There are some options that change IL semantics which means
 510          we cannot inline in these cases for correctness reason.
 511          Not even for always_inline declared functions.  */
 512      else if (check_match (flag_wrapv)
 513               || check_match (flag_trapv)
 514               || check_match (flag_pcc_struct_return)
 515               || check_maybe_down (optimize_debug)
 516               /* When caller or callee does FP math, be sure FP codegen flags
 517                  compatible.  */
 518               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 519                   && (check_maybe_up (flag_rounding_math)
 520                       || check_maybe_up (flag_trapping_math)
 521                       || check_maybe_down (flag_unsafe_math_optimizations)
 522                       || check_maybe_down (flag_finite_math_only)
 523                       || check_maybe_up (flag_signaling_nans)
 524                       || check_maybe_down (flag_cx_limited_range)
 525                       || check_maybe_up (flag_signed_zeros)
 526                       || check_maybe_down (flag_associative_math)
 527                       || check_maybe_down (flag_reciprocal_math)
 528                       || check_maybe_down (flag_fp_int_builtin_inexact)
 529                       /* Strictly speaking only when the callee contains function
 530                          calls that may end up setting errno.  */
 531                       || check_maybe_up (flag_errno_math)))
 532               /* We do not want to make code compiled with exceptions to be
 533                  brought into a non-EH function unless we know that the callee
 534                  does not throw.
 535                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 536               || (check_maybe_up (flag_non_call_exceptions)
 537                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 538               || (check_maybe_up (flag_exceptions)
 539                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 540               /* When devirtualization is disabled for callee, it is not safe
 541                  to inline it as we possibly mangled the type info.
 542                  Allow early inlining of always inlines.  */
 543               || (!early && check_maybe_down (flag_devirtualize)))
 544         {
 545           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 546           inlinable = false;
 547         }
 548       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 549       else if (always_inline)
 550         ;
 551       /* When user added an attribute to the callee honor it.  */
 552       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 553                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 554         {
 555           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 556           inlinable = false;
 557         }
 558       /* If explicit optimize attribute are not used, the mismatch is caused
 559          by different command line options used to build different units.
 560          Do not care about COMDAT functions - those are intended to be
 561          optimized with the optimization flags of module they are used in.
 562          Also do not care about mixing up size/speed optimization when
 563          DECL_DISREGARD_INLINE_LIMITS is set.  */
 564       else if ((callee->merged_comdat
 565                 && !lookup_attribute ("optimize",
 566                                       DECL_ATTRIBUTES (caller->decl)))
 567                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 568         ;
 569       /* If mismatch is caused by merging two LTO units with different
 570          optimization flags we want to be bit nicer.  However never inline
 571          if one of functions is not optimized at all.  */
 572       else if (!opt_for_fn (callee->decl, optimize)
 573                || !opt_for_fn (caller->decl, optimize))
 574         {
 575           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 576           inlinable = false;
 577         }
 578       /* If callee is optimized for size and caller is not, allow inlining if
 579          code shrinks or we are in param_max_inline_insns_single limit and
 580          callee is inline (and thus likely an unified comdat).
 581          This will allow caller to run faster.  */
 582       else if (opt_for_fn (callee->decl, optimize_size)
 583                > opt_for_fn (caller->decl, optimize_size))
 584         {
 585           int growth = estimate_edge_growth (e);
 586           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 587               && (!DECL_DECLARED_INLINE_P (callee->decl)
 588                   && growth >= MAX (inline_insns_single (caller, false, false),
 589                                     inline_insns_auto (caller, false, false))))
 590             {
 591               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 592               inlinable = false;
 593             }
 594         }
 595       /* If callee is more aggressively optimized for performance than caller,
 596          we generally want to inline only cheap (runtime wise) functions.  */
 597       else if (opt_for_fn (callee->decl, optimize_size)
 598                < opt_for_fn (caller->decl, optimize_size)
 599                || (opt_for_fn (callee->decl, optimize)
 600                    > opt_for_fn (caller->decl, optimize)))
 601         {
 602           if (estimate_edge_time (e)
 603               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 604             {
 605               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 606               inlinable = false;
 607             }
 608         }
 609
 610     }
 611
 612   if (!inlinable && report)
 613     report_inline_failed_reason (e);
 614   return inlinable;
 615 }
 616
 617
 618 /* Return true if the edge E is inlinable during early inlining.  */
 619
 620 static bool
 621 can_early_inline_edge_p (struct cgraph_edge *e)
 622 {
 623   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 624   /* Early inliner might get called at WPA stage when IPA pass adds new
 625      function.  In this case we cannot really do any of early inlining
 626      because function bodies are missing.  */
 627   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 628     return false;
 629   if (!gimple_has_body_p (callee->decl))
 630     {
 631       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 632       return false;
 633     }
 634   /* In early inliner some of callees may not be in SSA form yet
 635      (i.e. the callgraph is cyclic and we did not process
 636      the callee by early inliner, yet).  We don't have CIF code for this
 637      case; later we will re-do the decision in the real inliner.  */
 638   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 639       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 640     {
 641       if (dump_enabled_p ())
 642         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 643                          "  edge not inlinable: not in SSA form\n");
 644       return false;
 645     }
 646   if (!can_inline_edge_p (e, true, true)
 647       || !can_inline_edge_by_limits_p (e, true, false, true))
 648     return false;
 649   return true;
 650 }
 651
 652
 653 /* Return number of calls in N.  Ignore cheap builtins.  */
 654
 655 static int
 656 num_calls (struct cgraph_node *n)
 657 {
 658   struct cgraph_edge *e;
 659   int num = 0;
 660
 661   for (e = n->callees; e; e = e->next_callee)
 662     if (!is_inexpensive_builtin (e->callee->decl))
 663       num++;
 664   return num;
 665 }
 666
 667
 668 /* Return true if we are interested in inlining small function.  */
 669
 670 static bool
 671 want_early_inline_function_p (struct cgraph_edge *e)
 672 {
 673   bool want_inline = true;
 674   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 675
 676   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 677     ;
 678   /* For AutoFDO, we need to make sure that before profile summary, all
 679      hot paths' IR look exactly the same as profiled binary. As a result,
 680      in einliner, we will disregard size limit and inline those callsites
 681      that are:
 682        * inlined in the profiled binary, and
 683        * the cloned callee has enough samples to be considered "hot".  */
 684   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 685     ;
 686   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 687            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 688     {
 689       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 690       report_inline_failed_reason (e);
 691       want_inline = false;
 692     }
 693   else
 694     {
 695       /* First take care of very large functions.  */
 696       int min_growth = estimate_min_edge_growth (e), growth = 0;
 697       int n;
 698       int early_inlining_insns = param_early_inlining_insns;
 699
 700       if (min_growth > early_inlining_insns)
 701         {
 702           if (dump_enabled_p ())
 703             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 704                              "  will not early inline: %C->%C, "
 705                              "call is cold and code would grow "
 706                              "at least by %i\n",
 707                              e->caller, callee,
 708                              min_growth);
 709           want_inline = false;
 710         }
 711       else
 712         growth = estimate_edge_growth (e);
 713
 714
 715       if (!want_inline || growth <= param_max_inline_insns_size)
 716         ;
 717       else if (!e->maybe_hot_p ())
 718         {
 719           if (dump_enabled_p ())
 720             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 721                              "  will not early inline: %C->%C, "
 722                              "call is cold and code would grow by %i\n",
 723                              e->caller, callee,
 724                              growth);
 725           want_inline = false;
 726         }
 727       else if (growth > early_inlining_insns)
 728         {
 729           if (dump_enabled_p ())
 730             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 731                              "  will not early inline: %C->%C, "
 732                              "growth %i exceeds --param early-inlining-insns\n",
 733                              e->caller, callee, growth);
 734           want_inline = false;
 735         }
 736       else if ((n = num_calls (callee)) != 0
 737                && growth * (n + 1) > early_inlining_insns)
 738         {
 739           if (dump_enabled_p ())
 740             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 741                              "  will not early inline: %C->%C, "
 742                              "growth %i exceeds --param early-inlining-insns "
 743                              "divided by number of calls\n",
 744                              e->caller, callee, growth);
 745           want_inline = false;
 746         }
 747     }
 748   return want_inline;
 749 }
 750
 751 /* Compute time of the edge->caller + edge->callee execution when inlining
 752    does not happen.  */
 753
 754 inline sreal
 755 compute_uninlined_call_time (struct cgraph_edge *edge,
 756                              sreal uninlined_call_time,
 757                              sreal freq)
 758 {
 759   cgraph_node *caller = (edge->caller->inlined_to
 760                          ? edge->caller->inlined_to
 761                          : edge->caller);
 762
 763   if (freq > 0)
 764     uninlined_call_time *= freq;
 765   else
 766     uninlined_call_time = uninlined_call_time >> 11;
 767
 768   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 769   return uninlined_call_time + caller_time;
 770 }
 771
 772 /* Same as compute_uinlined_call_time but compute time when inlining
 773    does happen.  */
 774
 775 inline sreal
 776 compute_inlined_call_time (struct cgraph_edge *edge,
 777                            sreal time,
 778                            sreal freq)
 779 {
 780   cgraph_node *caller = (edge->caller->inlined_to
 781                          ? edge->caller->inlined_to
 782                          : edge->caller);
 783   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 784
 785   if (freq > 0)
 786     time *= freq;
 787   else
 788     time = time >> 11;
 789
 790   /* This calculation should match one in ipa-inline-analysis.c
 791      (estimate_edge_size_and_time).  */
 792   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 793   time += caller_time;
 794   if (time <= 0)
 795     time = ((sreal) 1) >> 8;
 796   gcc_checking_assert (time >= 0);
 797   return time;
 798 }
 799
 800 /* Determine time saved by inlining EDGE of frequency FREQ
 801    where callee's runtime w/o inlining is UNINLINED_TYPE
 802    and with inlined is INLINED_TYPE.  */
 803
 804 inline sreal
 805 inlining_speedup (struct cgraph_edge *edge,
 806                   sreal freq,
 807                   sreal uninlined_time,
 808                   sreal inlined_time)
 809 {
 810   sreal speedup = uninlined_time - inlined_time;
 811   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 812      (estimate_edge_size_and_time).  */
 813   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 814
 815   if (freq > 0)
 816     {
 817       speedup = (speedup + call_time);
 818       if (freq != 1)
 819        speedup = speedup * freq;
 820     }
 821   else if (freq == 0)
 822     speedup = speedup >> 11;
 823   gcc_checking_assert (speedup >= 0);
 824   return speedup;
 825 }
 826
 827 /* Return true if the speedup for inlining E is bigger than
 828    param_inline_min_speedup.  */
 829
 830 static bool
 831 big_speedup_p (struct cgraph_edge *e)
 832 {
 833   sreal unspec_time;
 834   sreal spec_time = estimate_edge_time (e, &unspec_time);
 835   sreal freq = e->sreal_frequency ();
 836   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 837   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 838   cgraph_node *caller = (e->caller->inlined_to
 839                          ? e->caller->inlined_to
 840                          : e->caller);
 841   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 842
 843   if ((time - inlined_time) * 100 > time * limit)
 844     return true;
 845   return false;
 846 }
 847
 848 /* Return true if we are interested in inlining small function.
 849    When REPORT is true, report reason to dump file.  */
 850
 851 static bool
 852 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 853 {
 854   bool want_inline = true;
 855   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 856   cgraph_node *to  = (e->caller->inlined_to
 857                       ? e->caller->inlined_to : e->caller);
 858
 859   /* Allow this function to be called before can_inline_edge_p,
 860      since it's usually cheaper.  */
 861   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 862     want_inline = false;
 863   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 864     ;
 865   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 866            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 867     {
 868       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 869       want_inline = false;
 870     }
 871   /* Do fast and conservative check if the function can be good
 872      inline candidate.  */
 873   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 874            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 875            && ipa_fn_summaries->get (callee)->min_size
 876                 - ipa_call_summaries->get (e)->call_stmt_size
 877               > inline_insns_auto (e->caller, true, true))
 878     {
 879       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 880       want_inline = false;
 881     }
 882   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 883             || e->count.ipa ().nonzero_p ())
 884            && ipa_fn_summaries->get (callee)->min_size
 885                 - ipa_call_summaries->get (e)->call_stmt_size
 886               > inline_insns_single (e->caller, true, true))
 887     {
 888       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 889                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 890                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 891       want_inline = false;
 892     }
 893   else
 894     {
 895       int growth = estimate_edge_growth (e);
 896       ipa_hints hints = estimate_edge_hints (e);
 897       /* We have two independent groups of hints.  If one matches in each
 898          of groups the limits are inreased.  If both groups matches, limit
 899          is increased even more.  */
 900       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 901                                    | INLINE_HINT_known_hot
 902                                    | INLINE_HINT_loop_iterations
 903                                    | INLINE_HINT_loop_stride));
 904       bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
 905
 906       if (growth <= opt_for_fn (to->decl,
 907                                 param_max_inline_insns_size))
 908         ;
 909       /* Apply param_max_inline_insns_single limit.  Do not do so when
 910          hints suggests that inlining given function is very profitable.
 911          Avoid computation of big_speedup_p when not necessary to change
 912          outcome of decision.  */
 913       else if (DECL_DECLARED_INLINE_P (callee->decl)
 914                && growth >= inline_insns_single (e->caller, apply_hints,
 915                                                  apply_hints2)
 916                && (apply_hints || apply_hints2
 917                    || growth >= inline_insns_single (e->caller, true,
 918                                                      apply_hints2)
 919                    || !big_speedup_p (e)))
 920         {
 921           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 922           want_inline = false;
 923         }
 924       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 925                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 926                && growth >= opt_for_fn (to->decl,
 927                                         param_max_inline_insns_small))
 928         {
 929           /* growth_positive_p is expensive, always test it last.  */
 930           if (growth >= inline_insns_single (e->caller, false, false)
 931               || growth_positive_p (callee, e, growth))
 932             {
 933               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 934               want_inline = false;
 935             }
 936         }
 937       /* Apply param_max_inline_insns_auto limit for functions not declared
 938          inline.  Bypass the limit when speedup seems big.  */
 939       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 940                && growth >= inline_insns_auto (e->caller, apply_hints,
 941                                                apply_hints2)
 942                && (apply_hints || apply_hints2
 943                    || growth >= inline_insns_auto (e->caller, true,
 944                                                    apply_hints2)
 945                    || !big_speedup_p (e)))
 946         {
 947           /* growth_positive_p is expensive, always test it last.  */
 948           if (growth >= inline_insns_single (e->caller, false, false)
 949               || growth_positive_p (callee, e, growth))
 950             {
 951               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 952               want_inline = false;
 953             }
 954         }
 955       /* If call is cold, do not inline when function body would grow. */
 956       else if (!e->maybe_hot_p ()
 957                && (growth >= inline_insns_single (e->caller, false, false)
 958                    || growth_positive_p (callee, e, growth)))
 959         {
 960           e->inline_failed = CIF_UNLIKELY_CALL;
 961           want_inline = false;
 962         }
 963     }
 964   if (!want_inline && report)
 965     report_inline_failed_reason (e);
 966   return want_inline;
 967 }
 968
 969 /* EDGE is self recursive edge.
 970    We handle two cases - when function A is inlining into itself
 971    or when function A is being inlined into another inliner copy of function
 972    A within function B.
 973
 974    In first case OUTER_NODE points to the toplevel copy of A, while
 975    in the second case OUTER_NODE points to the outermost copy of A in B.
 976
 977    In both cases we want to be extra selective since
 978    inlining the call will just introduce new recursive calls to appear.  */
 979
 980 static bool
 981 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 982                                    struct cgraph_node *outer_node,
 983                                    bool peeling,
 984                                    int depth)
 985 {
 986   char const *reason = NULL;
 987   bool want_inline = true;
 988   sreal caller_freq = 1;
 989   int max_depth = opt_for_fn (outer_node->decl,
 990                               param_max_inline_recursive_depth_auto);
 991
 992   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 993     max_depth = opt_for_fn (outer_node->decl,
 994                             param_max_inline_recursive_depth);
 995
 996   if (!edge->maybe_hot_p ())
 997     {
 998       reason = "recursive call is cold";
 999       want_inline = false;
1000     }
1001   else if (depth > max_depth)
1002     {
1003       reason = "--param max-inline-recursive-depth exceeded.";
1004       want_inline = false;
1005     }
1006   else if (outer_node->inlined_to
1007            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1008     {
1009       reason = "caller frequency is 0";
1010       want_inline = false;
1011     }
1012
1013   if (!want_inline)
1014     ;
1015   /* Inlining of self recursive function into copy of itself within other
1016      function is transformation similar to loop peeling.
1017
1018      Peeling is profitable if we can inline enough copies to make probability
1019      of actual call to the self recursive function very small.  Be sure that
1020      the probability of recursion is small.
1021
1022      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1023      This way the expected number of recursion is at most max_depth.  */
1024   else if (peeling)
1025     {
1026       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1027       int i;
1028       for (i = 1; i < depth; i++)
1029         max_prob = max_prob * max_prob;
1030       if (edge->sreal_frequency () >= max_prob * caller_freq)
1031         {
1032           reason = "frequency of recursive call is too large";
1033           want_inline = false;
1034         }
1035     }
1036   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1037      recursion depth is large.  We reduce function call overhead and increase
1038      chances that things fit in hardware return predictor.
1039
1040      Recursive inlining might however increase cost of stack frame setup
1041      actually slowing down functions whose recursion tree is wide rather than
1042      deep.
1043
1044      Deciding reliably on when to do recursive inlining without profile feedback
1045      is tricky.  For now we disable recursive inlining when probability of self
1046      recursion is low.
1047
1048      Recursive inlining of self recursive call within loop also results in
1049      large loop depths that generally optimize badly.  We may want to throttle
1050      down inlining in those cases.  In particular this seems to happen in one
1051      of libstdc++ rb tree methods.  */
1052   else
1053     {
1054       if (edge->sreal_frequency () * 100
1055           <= caller_freq
1056              * opt_for_fn (outer_node->decl,
1057                            param_min_inline_recursive_probability))
1058         {
1059           reason = "frequency of recursive call is too small";
1060           want_inline = false;
1061         }
1062     }
1063   if (!want_inline && dump_enabled_p ())
1064     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1065                      "   not inlining recursively: %s\n", reason);
1066   return want_inline;
1067 }
1068
1069 /* Return true when NODE has uninlinable caller;
1070    set HAS_HOT_CALL if it has hot call.
1071    Worker for cgraph_for_node_and_aliases.  */
1072
1073 static bool
1074 check_callers (struct cgraph_node *node, void *has_hot_call)
1075 {
1076   struct cgraph_edge *e;
1077    for (e = node->callers; e; e = e->next_caller)
1078      {
1079        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1080            || !opt_for_fn (e->caller->decl, optimize))
1081          return true;
1082        if (!can_inline_edge_p (e, true))
1083          return true;
1084        if (e->recursive_p ())
1085          return true;
1086        if (!can_inline_edge_by_limits_p (e, true))
1087          return true;
1088        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1089          *(bool *)has_hot_call = true;
1090      }
1091   return false;
1092 }
1093
1094 /* If NODE has a caller, return true.  */
1095
1096 static bool
1097 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1098 {
1099   if (node->callers)
1100     return true;
1101   return false;
1102 }
1103
1104 /* Decide if inlining NODE would reduce unit size by eliminating
1105    the offline copy of function.
1106    When COLD is true the cold calls are considered, too.  */
1107
1108 static bool
1109 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1110 {
1111   bool has_hot_call = false;
1112
1113   /* Aliases gets inlined along with the function they alias.  */
1114   if (node->alias)
1115     return false;
1116   /* Already inlined?  */
1117   if (node->inlined_to)
1118     return false;
1119   /* Does it have callers?  */
1120   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1121     return false;
1122   /* Inlining into all callers would increase size?  */
1123   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1124     return false;
1125   /* All inlines must be possible.  */
1126   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1127                                          true))
1128     return false;
1129   if (!cold && !has_hot_call)
1130     return false;
1131   return true;
1132 }
1133
1134 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1135    in estimate_edge_badness.  */
1136
1137 static bool
1138 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1139 {
1140   return size < (DECL_DECLARED_INLINE_P (where->decl)
1141                  ? inline_insns_single (where, false, false)
1142                  : inline_insns_auto (where, false, false));
1143 }
1144
1145 /* A cost model driving the inlining heuristics in a way so the edges with
1146    smallest badness are inlined first.  After each inlining is performed
1147    the costs of all caller edges of nodes affected are recomputed so the
1148    metrics may accurately depend on values such as number of inlinable callers
1149    of the function or function body size.  */
1150
1151 static sreal
1152 edge_badness (struct cgraph_edge *edge, bool dump)
1153 {
1154   sreal badness;
1155   int growth;
1156   sreal edge_time, unspec_edge_time;
1157   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1158   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1159   ipa_hints hints;
1160   cgraph_node *caller = (edge->caller->inlined_to
1161                          ? edge->caller->inlined_to
1162                          : edge->caller);
1163
1164   growth = estimate_edge_growth (edge);
1165   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1166   hints = estimate_edge_hints (edge);
1167   gcc_checking_assert (edge_time >= 0);
1168   /* Check that inlined time is better, but tolerate some roundoff issues.
1169      FIXME: When callee profile drops to 0 we account calls more.  This
1170      should be fixed by never doing that.  */
1171   gcc_checking_assert ((edge_time * 100
1172                         - callee_info->time * 101).to_int () <= 0
1173                         || callee->count.ipa ().initialized_p ());
1174   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1175
1176   if (dump)
1177     {
1178       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1179                edge->caller->dump_name (),
1180                edge->callee->dump_name ());
1181       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1182                growth,
1183                edge_time.to_double (),
1184                unspec_edge_time.to_double ());
1185       ipa_dump_hints (dump_file, hints);
1186       if (big_speedup_p (edge))
1187         fprintf (dump_file, " big_speedup");
1188       fprintf (dump_file, "\n");
1189     }
1190
1191   /* Always prefer inlining saving code size.  */
1192   if (growth <= 0)
1193     {
1194       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1195       if (dump)
1196         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1197                  growth);
1198     }
1199    /* Inlining into EXTERNAL functions is not going to change anything unless
1200       they are themselves inlined.  */
1201    else if (DECL_EXTERNAL (caller->decl))
1202     {
1203       if (dump)
1204         fprintf (dump_file, "      max: function is external\n");
1205       return sreal::max ();
1206     }
1207   /* When profile is available. Compute badness as:
1208
1209                  time_saved * caller_count
1210      goodness =  -------------------------------------------------
1211                  growth_of_caller * overall_growth * combined_size
1212
1213      badness = - goodness
1214
1215      Again use negative value to make calls with profile appear hotter
1216      then calls without.
1217   */
1218   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1219            || caller->count.ipa ().nonzero_p ())
1220     {
1221       sreal numerator, denominator;
1222       int overall_growth;
1223       sreal freq = edge->sreal_frequency ();
1224
1225       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1226       if (numerator <= 0)
1227         numerator = ((sreal) 1 >> 8);
1228       if (caller->count.ipa ().nonzero_p ())
1229         numerator *= caller->count.ipa ().to_gcov_type ();
1230       else if (caller->count.ipa ().initialized_p ())
1231         numerator = numerator >> 11;
1232       denominator = growth;
1233
1234       overall_growth = callee_info->growth;
1235
1236       /* Look for inliner wrappers of the form:
1237
1238          inline_caller ()
1239            {
1240              do_fast_job...
1241              if (need_more_work)
1242                noninline_callee ();
1243            }
1244          Without penalizing this case, we usually inline noninline_callee
1245          into the inline_caller because overall_growth is small preventing
1246          further inlining of inline_caller.
1247
1248          Penalize only callgraph edges to functions with small overall
1249          growth ...
1250         */
1251       if (growth > overall_growth
1252           /* ... and having only one caller which is not inlined ... */
1253           && callee_info->single_caller
1254           && !edge->caller->inlined_to
1255           /* ... and edges executed only conditionally ... */
1256           && freq < 1
1257           /* ... consider case where callee is not inline but caller is ... */
1258           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1259                && DECL_DECLARED_INLINE_P (caller->decl))
1260               /* ... or when early optimizers decided to split and edge
1261                  frequency still indicates splitting is a win ... */
1262               || (callee->split_part && !caller->split_part
1263                   && freq * 100
1264                          < opt_for_fn (caller->decl,
1265                                        param_partial_inlining_entry_probability)
1266                   /* ... and do not overwrite user specified hints.   */
1267                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1268                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1269         {
1270           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1271           int caller_growth = caller_info->growth;
1272
1273           /* Only apply the penalty when caller looks like inline candidate,
1274              and it is not called once.  */
1275           if (!caller_info->single_caller && overall_growth < caller_growth
1276               && caller_info->inlinable
1277               && wrapper_heuristics_may_apply
1278                  (caller, ipa_size_summaries->get (caller)->size))
1279             {
1280               if (dump)
1281                 fprintf (dump_file,
1282                          "     Wrapper penalty. Increasing growth %i to %i\n",
1283                          overall_growth, caller_growth);
1284               overall_growth = caller_growth;
1285             }
1286         }
1287       if (overall_growth > 0)
1288         {
1289           /* Strongly prefer functions with few callers that can be inlined
1290              fully.  The square root here leads to smaller binaries at average.
1291              Watch however for extreme cases and return to linear function
1292              when growth is large.  */
1293           if (overall_growth < 256)
1294             overall_growth *= overall_growth;
1295           else
1296             overall_growth += 256 * 256 - 256;
1297           denominator *= overall_growth;
1298         }
1299       denominator *= ipa_size_summaries->get (caller)->size + growth;
1300
1301       badness = - numerator / denominator;
1302
1303       if (dump)
1304         {
1305           fprintf (dump_file,
1306                    "      %f: guessed profile. frequency %f, count %" PRId64
1307                    " caller count %" PRId64
1308                    " time saved %f"
1309                    " overall growth %i (current) %i (original)"
1310                    " %i (compensated)\n",
1311                    badness.to_double (),
1312                    freq.to_double (),
1313                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1314                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1315                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1316                    estimate_growth (callee),
1317                    callee_info->growth, overall_growth);
1318         }
1319     }
1320   /* When function local profile is not available or it does not give
1321      useful information (i.e. frequency is zero), base the cost on
1322      loop nest and overall size growth, so we optimize for overall number
1323      of functions fully inlined in program.  */
1324   else
1325     {
1326       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1327       badness = growth;
1328
1329       /* Decrease badness if call is nested.  */
1330       if (badness > 0)
1331         badness = badness >> nest;
1332       else
1333         badness = badness << nest;
1334       if (dump)
1335         fprintf (dump_file, "      %f: no profile. nest %i\n",
1336                  badness.to_double (), nest);
1337     }
1338   gcc_checking_assert (badness != 0);
1339
1340   if (edge->recursive_p ())
1341     badness = badness.shift (badness > 0 ? 4 : -4);
1342   if ((hints & (INLINE_HINT_indirect_call
1343                 | INLINE_HINT_loop_iterations
1344                 | INLINE_HINT_loop_stride))
1345       || callee_info->growth <= 0)
1346     badness = badness.shift (badness > 0 ? -2 : 2);
1347   if (hints & INLINE_HINT_builtin_constant_p)
1348     badness = badness.shift (badness > 0 ? -4 : 4);
1349   if (hints & (INLINE_HINT_same_scc))
1350     badness = badness.shift (badness > 0 ? 3 : -3);
1351   else if (hints & (INLINE_HINT_in_scc))
1352     badness = badness.shift (badness > 0 ? 2 : -2);
1353   else if (hints & (INLINE_HINT_cross_module))
1354     badness = badness.shift (badness > 0 ? 1 : -1);
1355   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1356     badness = badness.shift (badness > 0 ? -4 : 4);
1357   else if ((hints & INLINE_HINT_declared_inline))
1358     badness = badness.shift (badness > 0 ? -3 : 3);
1359   if (dump)
1360     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1361   return badness;
1362 }
1363
1364 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1365 static inline void
1366 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1367 {
1368   sreal badness = edge_badness (edge, false);
1369   if (edge->aux)
1370     {
1371       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1372       gcc_checking_assert (n->get_data () == edge);
1373
1374       /* fibonacci_heap::replace_key does busy updating of the
1375          heap that is unnecessarily expensive.
1376          We do lazy increases: after extracting minimum if the key
1377          turns out to be out of date, it is re-inserted into heap
1378          with correct value.  */
1379       if (badness < n->get_key ())
1380         {
1381           if (dump_file && (dump_flags & TDF_DETAILS))
1382             {
1383               fprintf (dump_file,
1384                        "  decreasing badness %s -> %s, %f to %f\n",
1385                        edge->caller->dump_name (),
1386                        edge->callee->dump_name (),
1387                        n->get_key ().to_double (),
1388                        badness.to_double ());
1389             }
1390           heap->decrease_key (n, badness);
1391         }
1392     }
1393   else
1394     {
1395        if (dump_file && (dump_flags & TDF_DETAILS))
1396          {
1397            fprintf (dump_file,
1398                     "  enqueuing call %s -> %s, badness %f\n",
1399                     edge->caller->dump_name (),
1400                     edge->callee->dump_name (),
1401                     badness.to_double ());
1402          }
1403       edge->aux = heap->insert (badness, edge);
1404     }
1405 }
1406
1407
1408 /* NODE was inlined.
1409    All caller edges needs to be reset because
1410    size estimates change. Similarly callees needs reset
1411    because better context may be known.  */
1412
1413 static void
1414 reset_edge_caches (struct cgraph_node *node)
1415 {
1416   struct cgraph_edge *edge;
1417   struct cgraph_edge *e = node->callees;
1418   struct cgraph_node *where = node;
1419   struct ipa_ref *ref;
1420
1421   if (where->inlined_to)
1422     where = where->inlined_to;
1423
1424   reset_node_cache (where);
1425
1426   if (edge_growth_cache != NULL)
1427     for (edge = where->callers; edge; edge = edge->next_caller)
1428       if (edge->inline_failed)
1429         edge_growth_cache->remove (edge);
1430
1431   FOR_EACH_ALIAS (where, ref)
1432     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1433
1434   if (!e)
1435     return;
1436
1437   while (true)
1438     if (!e->inline_failed && e->callee->callees)
1439       e = e->callee->callees;
1440     else
1441       {
1442         if (edge_growth_cache != NULL && e->inline_failed)
1443           edge_growth_cache->remove (e);
1444         if (e->next_callee)
1445           e = e->next_callee;
1446         else
1447           {
1448             do
1449               {
1450                 if (e->caller == node)
1451                   return;
1452                 e = e->caller->callers;
1453               }
1454             while (!e->next_callee);
1455             e = e->next_callee;
1456           }
1457       }
1458 }
1459
1460 /* Recompute HEAP nodes for each of caller of NODE.
1461    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1462    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1463    it is inlinable. Otherwise check all edges.  */
1464
1465 static void
1466 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1467                     bitmap updated_nodes,
1468                     struct cgraph_edge *check_inlinablity_for)
1469 {
1470   struct cgraph_edge *edge;
1471   struct ipa_ref *ref;
1472
1473   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1474       || node->inlined_to)
1475     return;
1476   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1477     return;
1478
1479   FOR_EACH_ALIAS (node, ref)
1480     {
1481       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1482       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1483     }
1484
1485   for (edge = node->callers; edge; edge = edge->next_caller)
1486     if (edge->inline_failed)
1487       {
1488         if (!check_inlinablity_for
1489             || check_inlinablity_for == edge)
1490           {
1491             if (can_inline_edge_p (edge, false)
1492                 && want_inline_small_function_p (edge, false)
1493                 && can_inline_edge_by_limits_p (edge, false))
1494               update_edge_key (heap, edge);
1495             else if (edge->aux)
1496               {
1497                 report_inline_failed_reason (edge);
1498                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1499                 edge->aux = NULL;
1500               }
1501           }
1502         else if (edge->aux)
1503           update_edge_key (heap, edge);
1504       }
1505 }
1506
1507 /* Recompute HEAP nodes for each uninlined call in NODE
1508    If UPDATE_SINCE is non-NULL check if edges called within that function
1509    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1510    where all edges have new context).
1511
1512    This is used when we know that edge badnesses are going only to increase
1513    (we introduced new call site) and thus all we need is to insert newly
1514    created edges into heap.  */
1515
1516 static void
1517 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1518                     struct cgraph_node *update_since,
1519                     bitmap updated_nodes)
1520 {
1521   struct cgraph_edge *e = node->callees;
1522   bool check_inlinability = update_since == node;
1523
1524   if (!e)
1525     return;
1526   while (true)
1527     if (!e->inline_failed && e->callee->callees)
1528       {
1529         if (e->callee == update_since)
1530           check_inlinability = true;
1531         e = e->callee->callees;
1532       }
1533     else
1534       {
1535         enum availability avail;
1536         struct cgraph_node *callee;
1537         if (!check_inlinability)
1538           {
1539             if (e->aux
1540                 && !bitmap_bit_p (updated_nodes,
1541                                   e->callee->ultimate_alias_target
1542                                     (&avail, e->caller)->get_uid ()))
1543               update_edge_key (heap, e);
1544           }
1545         /* We do not reset callee growth cache here.  Since we added a new call,
1546            growth should have just increased and consequently badness metric
1547            don't need updating.  */
1548         else if (e->inline_failed
1549                  && (callee = e->callee->ultimate_alias_target (&avail,
1550                                                                 e->caller))
1551                  && avail >= AVAIL_AVAILABLE
1552                  && ipa_fn_summaries->get (callee) != NULL
1553                  && ipa_fn_summaries->get (callee)->inlinable
1554                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1555           {
1556             if (can_inline_edge_p (e, false)
1557                 && want_inline_small_function_p (e, false)
1558                 && can_inline_edge_by_limits_p (e, false))
1559               {
1560                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1561                 gcc_checking_assert (check_inlinability || e->aux);
1562                 update_edge_key (heap, e);
1563               }
1564             else if (e->aux)
1565               {
1566                 report_inline_failed_reason (e);
1567                 heap->delete_node ((edge_heap_node_t *) e->aux);
1568                 e->aux = NULL;
1569               }
1570           }
1571         /* In case we redirected to unreachable node we only need to remove the
1572            fibheap entry.  */
1573         else if (e->aux)
1574           {
1575             heap->delete_node ((edge_heap_node_t *) e->aux);
1576             e->aux = NULL;
1577           }
1578         if (e->next_callee)
1579           e = e->next_callee;
1580         else
1581           {
1582             do
1583               {
1584                 if (e->caller == node)
1585                   return;
1586                 if (e->caller == update_since)
1587                   check_inlinability = false;
1588                 e = e->caller->callers;
1589               }
1590             while (!e->next_callee);
1591             e = e->next_callee;
1592           }
1593       }
1594 }
1595
1596 /* Enqueue all recursive calls from NODE into priority queue depending on
1597    how likely we want to recursively inline the call.  */
1598
1599 static void
1600 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1601                         edge_heap_t *heap)
1602 {
1603   struct cgraph_edge *e;
1604   enum availability avail;
1605
1606   for (e = where->callees; e; e = e->next_callee)
1607     if (e->callee == node
1608         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1609             && avail > AVAIL_INTERPOSABLE))
1610       heap->insert (-e->sreal_frequency (), e);
1611   for (e = where->callees; e; e = e->next_callee)
1612     if (!e->inline_failed)
1613       lookup_recursive_calls (node, e->callee, heap);
1614 }
1615
1616 /* Decide on recursive inlining: in the case function has recursive calls,
1617    inline until body size reaches given argument.  If any new indirect edges
1618    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1619    is NULL.  */
1620
1621 static bool
1622 recursive_inlining (struct cgraph_edge *edge,
1623                     vec<cgraph_edge *> *new_edges)
1624 {
1625   cgraph_node *to  = (edge->caller->inlined_to
1626                       ? edge->caller->inlined_to : edge->caller);
1627   int limit = opt_for_fn (to->decl,
1628                           param_max_inline_insns_recursive_auto);
1629   edge_heap_t heap (sreal::min ());
1630   struct cgraph_node *node;
1631   struct cgraph_edge *e;
1632   struct cgraph_node *master_clone = NULL, *next;
1633   int depth = 0;
1634   int n = 0;
1635
1636   node = edge->caller;
1637   if (node->inlined_to)
1638     node = node->inlined_to;
1639
1640   if (DECL_DECLARED_INLINE_P (node->decl))
1641     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1642
1643   /* Make sure that function is small enough to be considered for inlining.  */
1644   if (estimate_size_after_inlining (node, edge)  >= limit)
1645     return false;
1646   lookup_recursive_calls (node, node, &heap);
1647   if (heap.empty ())
1648     return false;
1649
1650   if (dump_file)
1651     fprintf (dump_file,
1652              "  Performing recursive inlining on %s\n", node->dump_name ());
1653
1654   /* Do the inlining and update list of recursive call during process.  */
1655   while (!heap.empty ())
1656     {
1657       struct cgraph_edge *curr = heap.extract_min ();
1658       struct cgraph_node *cnode, *dest = curr->callee;
1659
1660       if (!can_inline_edge_p (curr, true)
1661           || !can_inline_edge_by_limits_p (curr, true))
1662         continue;
1663
1664       /* MASTER_CLONE is produced in the case we already started modified
1665          the function. Be sure to redirect edge to the original body before
1666          estimating growths otherwise we will be seeing growths after inlining
1667          the already modified body.  */
1668       if (master_clone)
1669         {
1670           curr->redirect_callee (master_clone);
1671           if (edge_growth_cache != NULL)
1672             edge_growth_cache->remove (curr);
1673         }
1674
1675       if (estimate_size_after_inlining (node, curr) > limit)
1676         {
1677           curr->redirect_callee (dest);
1678           if (edge_growth_cache != NULL)
1679             edge_growth_cache->remove (curr);
1680           break;
1681         }
1682
1683       depth = 1;
1684       for (cnode = curr->caller;
1685            cnode->inlined_to; cnode = cnode->callers->caller)
1686         if (node->decl
1687             == curr->callee->ultimate_alias_target ()->decl)
1688           depth++;
1689
1690       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1691         {
1692           curr->redirect_callee (dest);
1693           if (edge_growth_cache != NULL)
1694             edge_growth_cache->remove (curr);
1695           continue;
1696         }
1697
1698       if (dump_file)
1699         {
1700           fprintf (dump_file,
1701                    "   Inlining call of depth %i", depth);
1702           if (node->count.nonzero_p () && curr->count.initialized_p ())
1703             {
1704               fprintf (dump_file, " called approx. %.2f times per call",
1705                        (double)curr->count.to_gcov_type ()
1706                        / node->count.to_gcov_type ());
1707             }
1708           fprintf (dump_file, "\n");
1709         }
1710       if (!master_clone)
1711         {
1712           /* We need original clone to copy around.  */
1713           master_clone = node->create_clone (node->decl, node->count,
1714             false, vNULL, true, NULL, NULL);
1715           for (e = master_clone->callees; e; e = e->next_callee)
1716             if (!e->inline_failed)
1717               clone_inlined_nodes (e, true, false, NULL);
1718           curr->redirect_callee (master_clone);
1719           if (edge_growth_cache != NULL)
1720             edge_growth_cache->remove (curr);
1721         }
1722
1723       inline_call (curr, false, new_edges, &overall_size, true);
1724       reset_node_cache (node);
1725       lookup_recursive_calls (node, curr->callee, &heap);
1726       n++;
1727     }
1728
1729   if (!heap.empty () && dump_file)
1730     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1731
1732   if (!master_clone)
1733     return false;
1734
1735   if (dump_enabled_p ())
1736     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1737                      "\n   Inlined %i times, "
1738                      "body grown from size %i to %i, time %f to %f\n", n,
1739                      ipa_size_summaries->get (master_clone)->size,
1740                      ipa_size_summaries->get (node)->size,
1741                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1742                      ipa_fn_summaries->get (node)->time.to_double ());
1743
1744   /* Remove master clone we used for inlining.  We rely that clones inlined
1745      into master clone gets queued just before master clone so we don't
1746      need recursion.  */
1747   for (node = symtab->first_function (); node != master_clone;
1748        node = next)
1749     {
1750       next = symtab->next_function (node);
1751       if (node->inlined_to == master_clone)
1752         node->remove ();
1753     }
1754   master_clone->remove ();
1755   return true;
1756 }
1757
1758
1759 /* Given whole compilation unit estimate of INSNS, compute how large we can
1760    allow the unit to grow.  */
1761
1762 static int64_t
1763 compute_max_insns (cgraph_node *node, int insns)
1764 {
1765   int max_insns = insns;
1766   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1767     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1768
1769   return ((int64_t) max_insns
1770           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1771 }
1772
1773
1774 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1775
1776 static void
1777 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1778 {
1779   while (new_edges.length () > 0)
1780     {
1781       struct cgraph_edge *edge = new_edges.pop ();
1782
1783       gcc_assert (!edge->aux);
1784       gcc_assert (edge->callee);
1785       if (edge->inline_failed
1786           && can_inline_edge_p (edge, true)
1787           && want_inline_small_function_p (edge, true)
1788           && can_inline_edge_by_limits_p (edge, true))
1789         edge->aux = heap->insert (edge_badness (edge, false), edge);
1790     }
1791 }
1792
1793 /* Remove EDGE from the fibheap.  */
1794
1795 static void
1796 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1797 {
1798   if (e->aux)
1799     {
1800       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1801       e->aux = NULL;
1802     }
1803 }
1804
1805 /* Return true if speculation of edge E seems useful.
1806    If ANTICIPATE_INLINING is true, be conservative and hope that E
1807    may get inlined.  */
1808
1809 bool
1810 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1811 {
1812   /* If we have already decided to inline the edge, it seems useful.  */
1813   if (!e->inline_failed)
1814     return true;
1815
1816   enum availability avail;
1817   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1818                                                                  e->caller);
1819
1820   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1821
1822   if (!e->maybe_hot_p ())
1823     return false;
1824
1825   /* See if IP optimizations found something potentially useful about the
1826      function.  For now we look only for CONST/PURE flags.  Almost everything
1827      else we propagate is useless.  */
1828   if (avail >= AVAIL_AVAILABLE)
1829     {
1830       int ecf_flags = flags_from_decl_or_type (target->decl);
1831       if (ecf_flags & ECF_CONST)
1832         {
1833           if (!(e->speculative_call_indirect_edge ()->indirect_info
1834                 ->ecf_flags & ECF_CONST))
1835             return true;
1836         }
1837       else if (ecf_flags & ECF_PURE)
1838         {
1839           if (!(e->speculative_call_indirect_edge ()->indirect_info
1840                 ->ecf_flags & ECF_PURE))
1841             return true;
1842         }
1843     }
1844   /* If we did not managed to inline the function nor redirect
1845      to an ipa-cp clone (that are seen by having local flag set),
1846      it is probably pointless to inline it unless hardware is missing
1847      indirect call predictor.  */
1848   if (!anticipate_inlining && !target->local)
1849     return false;
1850   /* For overwritable targets there is not much to do.  */
1851   if (!can_inline_edge_p (e, false)
1852       || !can_inline_edge_by_limits_p (e, false, true))
1853     return false;
1854   /* OK, speculation seems interesting.  */
1855   return true;
1856 }
1857
1858 /* We know that EDGE is not going to be inlined.
1859    See if we can remove speculation.  */
1860
1861 static void
1862 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1863 {
1864   if (edge->speculative && !speculation_useful_p (edge, false))
1865     {
1866       struct cgraph_node *node = edge->caller;
1867       struct cgraph_node *where = node->inlined_to
1868                                   ? node->inlined_to : node;
1869       auto_bitmap updated_nodes;
1870
1871       if (edge->count.ipa ().initialized_p ())
1872         spec_rem += edge->count.ipa ();
1873       cgraph_edge::resolve_speculation (edge);
1874       reset_edge_caches (where);
1875       ipa_update_overall_fn_summary (where);
1876       update_caller_keys (edge_heap, where,
1877                           updated_nodes, NULL);
1878       update_callee_keys (edge_heap, where, NULL,
1879                           updated_nodes);
1880     }
1881 }
1882
1883 /* Return true if NODE should be accounted for overall size estimate.
1884    Skip all nodes optimized for size so we can measure the growth of hot
1885    part of program no matter of the padding.  */
1886
1887 bool
1888 inline_account_function_p (struct cgraph_node *node)
1889 {
1890    return (!DECL_EXTERNAL (node->decl)
1891            && !opt_for_fn (node->decl, optimize_size)
1892            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1893 }
1894
1895 /* Count number of callers of NODE and store it into DATA (that
1896    points to int.  Worker for cgraph_for_node_and_aliases.  */
1897
1898 static bool
1899 sum_callers (struct cgraph_node *node, void *data)
1900 {
1901   struct cgraph_edge *e;
1902   int *num_calls = (int *)data;
1903
1904   for (e = node->callers; e; e = e->next_caller)
1905     (*num_calls)++;
1906   return false;
1907 }
1908
1909 /* We only propagate across edges with non-interposable callee.  */
1910
1911 inline bool
1912 ignore_edge_p (struct cgraph_edge *e)
1913 {
1914   enum availability avail;
1915   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1916   return (avail <= AVAIL_INTERPOSABLE);
1917 }
1918
1919 /* We use greedy algorithm for inlining of small functions:
1920    All inline candidates are put into prioritized heap ordered in
1921    increasing badness.
1922
1923    The inlining of small functions is bounded by unit growth parameters.  */
1924
1925 static void
1926 inline_small_functions (void)
1927 {
1928   struct cgraph_node *node;
1929   struct cgraph_edge *edge;
1930   edge_heap_t edge_heap (sreal::min ());
1931   auto_bitmap updated_nodes;
1932   int min_size;
1933   auto_vec<cgraph_edge *> new_indirect_edges;
1934   int initial_size = 0;
1935   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1936   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1937   new_indirect_edges.create (8);
1938
1939   edge_removal_hook_holder
1940     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1941
1942   /* Compute overall unit size and other global parameters used by badness
1943      metrics.  */
1944
1945   max_count = profile_count::uninitialized ();
1946   ipa_reduced_postorder (order, true, ignore_edge_p);
1947   free (order);
1948
1949   FOR_EACH_DEFINED_FUNCTION (node)
1950     if (!node->inlined_to)
1951       {
1952         if (!node->alias && node->analyzed
1953             && (node->has_gimple_body_p () || node->thunk)
1954             && opt_for_fn (node->decl, optimize))
1955           {
1956             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1957             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1958
1959             /* Do not account external functions, they will be optimized out
1960                if not inlined.  Also only count the non-cold portion of program.  */
1961             if (inline_account_function_p (node))
1962               initial_size += ipa_size_summaries->get (node)->size;
1963             info->growth = estimate_growth (node);
1964
1965             int num_calls = 0;
1966             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1967                                                true);
1968             if (num_calls == 1)
1969               info->single_caller = true;
1970             if (dfs && dfs->next_cycle)
1971               {
1972                 struct cgraph_node *n2;
1973                 int id = dfs->scc_no + 1;
1974                 for (n2 = node; n2;
1975                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1976                   if (opt_for_fn (n2->decl, optimize))
1977                     {
1978                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1979                          (n2->inlined_to ? n2->inlined_to : n2);
1980                       if (info2->scc_no)
1981                         break;
1982                       info2->scc_no = id;
1983                     }
1984               }
1985           }
1986
1987         for (edge = node->callers; edge; edge = edge->next_caller)
1988           max_count = max_count.max (edge->count.ipa ());
1989       }
1990   ipa_free_postorder_info ();
1991   initialize_growth_caches ();
1992
1993   if (dump_file)
1994     fprintf (dump_file,
1995              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1996              initial_size);
1997
1998   overall_size = initial_size;
1999   min_size = overall_size;
2000
2001   /* Populate the heap with all edges we might inline.  */
2002
2003   FOR_EACH_DEFINED_FUNCTION (node)
2004     {
2005       bool update = false;
2006       struct cgraph_edge *next = NULL;
2007       bool has_speculative = false;
2008
2009       if (!opt_for_fn (node->decl, optimize))
2010         continue;
2011
2012       if (dump_file)
2013         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2014
2015       for (edge = node->callees; edge; edge = edge->next_callee)
2016         {
2017           if (edge->inline_failed
2018               && !edge->aux
2019               && can_inline_edge_p (edge, true)
2020               && want_inline_small_function_p (edge, true)
2021               && can_inline_edge_by_limits_p (edge, true)
2022               && edge->inline_failed)
2023             {
2024               gcc_assert (!edge->aux);
2025               update_edge_key (&edge_heap, edge);
2026             }
2027           if (edge->speculative)
2028             has_speculative = true;
2029         }
2030       if (has_speculative)
2031         for (edge = node->callees; edge; edge = next)
2032           {
2033             next = edge->next_callee;
2034             if (edge->speculative
2035                 && !speculation_useful_p (edge, edge->aux != NULL))
2036               {
2037                 cgraph_edge::resolve_speculation (edge);
2038                 update = true;
2039               }
2040           }
2041       if (update)
2042         {
2043           struct cgraph_node *where = node->inlined_to
2044                                       ? node->inlined_to : node;
2045           ipa_update_overall_fn_summary (where);
2046           reset_edge_caches (where);
2047           update_caller_keys (&edge_heap, where,
2048                               updated_nodes, NULL);
2049           update_callee_keys (&edge_heap, where, NULL,
2050                               updated_nodes);
2051           bitmap_clear (updated_nodes);
2052         }
2053     }
2054
2055   gcc_assert (in_lto_p
2056               || !(max_count > 0)
2057               || (profile_info && flag_branch_probabilities));
2058
2059   while (!edge_heap.empty ())
2060     {
2061       int old_size = overall_size;
2062       struct cgraph_node *where, *callee;
2063       sreal badness = edge_heap.min_key ();
2064       sreal current_badness;
2065       int growth;
2066
2067       edge = edge_heap.extract_min ();
2068       gcc_assert (edge->aux);
2069       edge->aux = NULL;
2070       if (!edge->inline_failed || !edge->callee->analyzed)
2071         continue;
2072
2073       /* Be sure that caches are maintained consistent.
2074          This check is affected by scaling roundoff errors when compiling for
2075          IPA this we skip it in that case.  */
2076       if (flag_checking && !edge->callee->count.ipa_p ()
2077           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2078         {
2079           sreal cached_badness = edge_badness (edge, false);
2080
2081           int old_size_est = estimate_edge_size (edge);
2082           sreal old_time_est = estimate_edge_time (edge);
2083           int old_hints_est = estimate_edge_hints (edge);
2084
2085           if (edge_growth_cache != NULL)
2086             edge_growth_cache->remove (edge);
2087           reset_node_cache (edge->caller->inlined_to
2088                             ? edge->caller->inlined_to
2089                             : edge->caller);
2090           gcc_assert (old_size_est == estimate_edge_size (edge));
2091           gcc_assert (old_time_est == estimate_edge_time (edge));
2092           /* FIXME:
2093
2094              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2095
2096              fails with profile feedback because some hints depends on
2097              maybe_hot_edge_p predicate and because callee gets inlined to other
2098              calls, the edge may become cold.
2099              This ought to be fixed by computing relative probabilities
2100              for given invocation but that will be better done once whole
2101              code is converted to sreals.  Disable for now and revert to "wrong"
2102              value so enable/disable checking paths agree.  */
2103           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2104
2105           /* When updating the edge costs, we only decrease badness in the keys.
2106              Increases of badness are handled lazily; when we see key with out
2107              of date value on it, we re-insert it now.  */
2108           current_badness = edge_badness (edge, false);
2109           gcc_assert (cached_badness == current_badness);
2110           gcc_assert (current_badness >= badness);
2111         }
2112       else
2113         current_badness = edge_badness (edge, false);
2114       if (current_badness != badness)
2115         {
2116           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2117             {
2118               edge->aux = edge_heap.insert (current_badness, edge);
2119               continue;
2120             }
2121           else
2122             badness = current_badness;
2123         }
2124
2125       if (!can_inline_edge_p (edge, true)
2126           || !can_inline_edge_by_limits_p (edge, true))
2127         {
2128           resolve_noninline_speculation (&edge_heap, edge);
2129           continue;
2130         }
2131
2132       callee = edge->callee->ultimate_alias_target ();
2133       growth = estimate_edge_growth (edge);
2134       if (dump_file)
2135         {
2136           fprintf (dump_file,
2137                    "\nConsidering %s with %i size\n",
2138                    callee->dump_name (),
2139                    ipa_size_summaries->get (callee)->size);
2140           fprintf (dump_file,
2141                    " to be inlined into %s in %s:%i\n"
2142                    " Estimated badness is %f, frequency %.2f.\n",
2143                    edge->caller->dump_name (),
2144                    edge->call_stmt
2145                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2146                                                         edge->call_stmt))
2147                        > BUILTINS_LOCATION)
2148                    ? gimple_filename ((const gimple *) edge->call_stmt)
2149                    : "unknown",
2150                    edge->call_stmt
2151                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2152                    : -1,
2153                    badness.to_double (),
2154                    edge->sreal_frequency ().to_double ());
2155           if (edge->count.ipa ().initialized_p ())
2156             {
2157               fprintf (dump_file, " Called ");
2158               edge->count.ipa ().dump (dump_file);
2159               fprintf (dump_file, " times\n");
2160             }
2161           if (dump_flags & TDF_DETAILS)
2162             edge_badness (edge, true);
2163         }
2164
2165       where = edge->caller;
2166
2167       if (overall_size + growth > compute_max_insns (where, min_size)
2168           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2169         {
2170           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2171           report_inline_failed_reason (edge);
2172           resolve_noninline_speculation (&edge_heap, edge);
2173           continue;
2174         }
2175
2176       if (!want_inline_small_function_p (edge, true))
2177         {
2178           resolve_noninline_speculation (&edge_heap, edge);
2179           continue;
2180         }
2181
2182       profile_count old_count = callee->count;
2183
2184       /* Heuristics for inlining small functions work poorly for
2185          recursive calls where we do effects similar to loop unrolling.
2186          When inlining such edge seems profitable, leave decision on
2187          specific inliner.  */
2188       if (edge->recursive_p ())
2189         {
2190           if (where->inlined_to)
2191             where = where->inlined_to;
2192           if (!recursive_inlining (edge,
2193                                    opt_for_fn (edge->caller->decl,
2194                                                flag_indirect_inlining)
2195                                    ? &new_indirect_edges : NULL))
2196             {
2197               edge->inline_failed = CIF_RECURSIVE_INLINING;
2198               resolve_noninline_speculation (&edge_heap, edge);
2199               continue;
2200             }
2201           reset_edge_caches (where);
2202           /* Recursive inliner inlines all recursive calls of the function
2203              at once. Consequently we need to update all callee keys.  */
2204           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2205             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2206           update_callee_keys (&edge_heap, where, where, updated_nodes);
2207           bitmap_clear (updated_nodes);
2208         }
2209       else
2210         {
2211           struct cgraph_node *outer_node = NULL;
2212           int depth = 0;
2213
2214           /* Consider the case where self recursive function A is inlined
2215              into B.  This is desired optimization in some cases, since it
2216              leads to effect similar of loop peeling and we might completely
2217              optimize out the recursive call.  However we must be extra
2218              selective.  */
2219
2220           where = edge->caller;
2221           while (where->inlined_to)
2222             {
2223               if (where->decl == callee->decl)
2224                 outer_node = where, depth++;
2225               where = where->callers->caller;
2226             }
2227           if (outer_node
2228               && !want_inline_self_recursive_call_p (edge, outer_node,
2229                                                      true, depth))
2230             {
2231               edge->inline_failed
2232                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2233                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2234               resolve_noninline_speculation (&edge_heap, edge);
2235               continue;
2236             }
2237           else if (depth && dump_file)
2238             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2239
2240           gcc_checking_assert (!callee->inlined_to);
2241
2242           int old_size = ipa_size_summaries->get (where)->size;
2243           sreal old_time = ipa_fn_summaries->get (where)->time;
2244
2245           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2246           reset_edge_caches (edge->callee);
2247           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2248
2249           /* If caller's size and time increased we do not need to update
2250              all edges because badness is not going to decrease.  */
2251           if (old_size <= ipa_size_summaries->get (where)->size
2252               && old_time <= ipa_fn_summaries->get (where)->time
2253               /* Wrapper penalty may be non-monotonous in this respect.
2254                  Fortunately it only affects small functions.  */
2255               && !wrapper_heuristics_may_apply (where, old_size))
2256             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2257                                 updated_nodes);
2258           else
2259             update_callee_keys (&edge_heap, where,
2260                                 edge->callee,
2261                                 updated_nodes);
2262         }
2263       where = edge->caller;
2264       if (where->inlined_to)
2265         where = where->inlined_to;
2266
2267       /* Our profitability metric can depend on local properties
2268          such as number of inlinable calls and size of the function body.
2269          After inlining these properties might change for the function we
2270          inlined into (since it's body size changed) and for the functions
2271          called by function we inlined (since number of it inlinable callers
2272          might change).  */
2273       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2274       /* Offline copy count has possibly changed, recompute if profile is
2275          available.  */
2276       struct cgraph_node *n
2277               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2278       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2279           && n->count.ipa_p ())
2280         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2281       bitmap_clear (updated_nodes);
2282
2283       if (dump_enabled_p ())
2284         {
2285           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2286
2287           /* dump_printf can't handle %+i.  */
2288           char buf_net_change[100];
2289           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2290                     overall_size - old_size);
2291
2292           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2293                            " Inlined %C into %C which now has time %f and "
2294                            "size %i, net change of %s%s.\n",
2295                            edge->callee, edge->caller,
2296                            s->time.to_double (),
2297                            ipa_size_summaries->get (edge->caller)->size,
2298                            buf_net_change,
2299                            cross_module_call_p (edge) ? " (cross module)":"");
2300         }
2301       if (min_size > overall_size)
2302         {
2303           min_size = overall_size;
2304
2305           if (dump_file)
2306             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2307         }
2308     }
2309
2310   free_growth_caches ();
2311   if (dump_enabled_p ())
2312     dump_printf (MSG_NOTE,
2313                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2314                  initial_size, overall_size,
2315                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2316   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2317 }
2318
2319 /* Flatten NODE.  Performed both during early inlining and
2320    at IPA inlining time.  */
2321
2322 static void
2323 flatten_function (struct cgraph_node *node, bool early, bool update)
2324 {
2325   struct cgraph_edge *e;
2326
2327   /* We shouldn't be called recursively when we are being processed.  */
2328   gcc_assert (node->aux == NULL);
2329
2330   node->aux = (void *) node;
2331
2332   for (e = node->callees; e; e = e->next_callee)
2333     {
2334       struct cgraph_node *orig_callee;
2335       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2336
2337       /* We've hit cycle?  It is time to give up.  */
2338       if (callee->aux)
2339         {
2340           if (dump_enabled_p ())
2341             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2342                              "Not inlining %C into %C to avoid cycle.\n",
2343                              callee, e->caller);
2344           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2345             e->inline_failed = CIF_RECURSIVE_INLINING;
2346           continue;
2347         }
2348
2349       /* When the edge is already inlined, we just need to recurse into
2350          it in order to fully flatten the leaves.  */
2351       if (!e->inline_failed)
2352         {
2353           flatten_function (callee, early, false);
2354           continue;
2355         }
2356
2357       /* Flatten attribute needs to be processed during late inlining. For
2358          extra code quality we however do flattening during early optimization,
2359          too.  */
2360       if (!early
2361           ? !can_inline_edge_p (e, true)
2362             && !can_inline_edge_by_limits_p (e, true)
2363           : !can_early_inline_edge_p (e))
2364         continue;
2365
2366       if (e->recursive_p ())
2367         {
2368           if (dump_enabled_p ())
2369             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2370                              "Not inlining: recursive call.\n");
2371           continue;
2372         }
2373
2374       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2375           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2376         {
2377           if (dump_enabled_p ())
2378             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2379                              "Not inlining: SSA form does not match.\n");
2380           continue;
2381         }
2382
2383       /* Inline the edge and flatten the inline clone.  Avoid
2384          recursing through the original node if the node was cloned.  */
2385       if (dump_enabled_p ())
2386         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2387                          " Inlining %C into %C.\n",
2388                          callee, e->caller);
2389       orig_callee = callee;
2390       inline_call (e, true, NULL, NULL, false);
2391       if (e->callee != orig_callee)
2392         orig_callee->aux = (void *) node;
2393       flatten_function (e->callee, early, false);
2394       if (e->callee != orig_callee)
2395         orig_callee->aux = NULL;
2396     }
2397
2398   node->aux = NULL;
2399   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2400   if (update && opt_for_fn (where->decl, optimize))
2401     ipa_update_overall_fn_summary (where);
2402 }
2403
2404 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2405    DATA points to number of calls originally found so we avoid infinite
2406    recursion.  */
2407
2408 static bool
2409 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2410                          hash_set<cgraph_node *> *callers)
2411 {
2412   int *num_calls = (int *)data;
2413   bool callee_removed = false;
2414
2415   while (node->callers && !node->inlined_to)
2416     {
2417       struct cgraph_node *caller = node->callers->caller;
2418
2419       if (!can_inline_edge_p (node->callers, true)
2420           || !can_inline_edge_by_limits_p (node->callers, true)
2421           || node->callers->recursive_p ())
2422         {
2423           if (dump_file)
2424             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2425           *num_calls = 0;
2426           return false;
2427         }
2428
2429       if (dump_file)
2430         {
2431           cgraph_node *ultimate = node->ultimate_alias_target ();
2432           fprintf (dump_file,
2433                    "\nInlining %s size %i.\n",
2434                    ultimate->dump_name (),
2435                    ipa_size_summaries->get (ultimate)->size);
2436           fprintf (dump_file,
2437                    " Called once from %s %i insns.\n",
2438                    node->callers->caller->dump_name (),
2439                    ipa_size_summaries->get (node->callers->caller)->size);
2440         }
2441
2442       /* Remember which callers we inlined to, delaying updating the
2443          overall summary.  */
2444       callers->add (node->callers->caller);
2445       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2446       if (dump_file)
2447         fprintf (dump_file,
2448                  " Inlined into %s which now has %i size\n",
2449                  caller->dump_name (),
2450                  ipa_size_summaries->get (caller)->size);
2451       if (!(*num_calls)--)
2452         {
2453           if (dump_file)
2454             fprintf (dump_file, "New calls found; giving up.\n");
2455           return callee_removed;
2456         }
2457       if (callee_removed)
2458         return true;
2459     }
2460   return false;
2461 }
2462
2463 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2464    update.  */
2465
2466 static bool
2467 inline_to_all_callers (struct cgraph_node *node, void *data)
2468 {
2469   hash_set<cgraph_node *> callers;
2470   bool res = inline_to_all_callers_1 (node, data, &callers);
2471   /* Perform the delayed update of the overall summary of all callers
2472      processed.  This avoids quadratic behavior in the cases where
2473      we have a lot of calls to the same function.  */
2474   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2475        i != callers.end (); ++i)
2476     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2477   return res;
2478 }
2479
2480 /* Output overall time estimate.  */
2481 static void
2482 dump_overall_stats (void)
2483 {
2484   sreal sum_weighted = 0, sum = 0;
2485   struct cgraph_node *node;
2486
2487   FOR_EACH_DEFINED_FUNCTION (node)
2488     if (!node->inlined_to
2489         && !node->alias)
2490       {
2491         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2492         if (s != NULL)
2493           {
2494           sum += s->time;
2495           if (node->count.ipa ().initialized_p ())
2496             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2497           }
2498       }
2499   fprintf (dump_file, "Overall time estimate: "
2500            "%f weighted by profile: "
2501            "%f\n", sum.to_double (), sum_weighted.to_double ());
2502 }
2503
2504 /* Output some useful stats about inlining.  */
2505
2506 static void
2507 dump_inline_stats (void)
2508 {
2509   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2510   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2511   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2512   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2513   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2514   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2515   int64_t reason[CIF_N_REASONS][2];
2516   sreal reason_freq[CIF_N_REASONS];
2517   int i;
2518   struct cgraph_node *node;
2519
2520   memset (reason, 0, sizeof (reason));
2521   for (i=0; i < CIF_N_REASONS; i++)
2522     reason_freq[i] = 0;
2523   FOR_EACH_DEFINED_FUNCTION (node)
2524   {
2525     struct cgraph_edge *e;
2526     for (e = node->callees; e; e = e->next_callee)
2527       {
2528         if (e->inline_failed)
2529           {
2530             if (e->count.ipa ().initialized_p ())
2531               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2532             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2533             reason[(int) e->inline_failed][1] ++;
2534             if (DECL_VIRTUAL_P (e->callee->decl)
2535                 && e->count.ipa ().initialized_p ())
2536               {
2537                 if (e->indirect_inlining_edge)
2538                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2539                 else
2540                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2541               }
2542             else if (e->count.ipa ().initialized_p ())
2543               {
2544                 if (e->indirect_inlining_edge)
2545                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2546                 else
2547                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2548               }
2549           }
2550         else if (e->count.ipa ().initialized_p ())
2551           {
2552             if (e->speculative)
2553               {
2554                 if (DECL_VIRTUAL_P (e->callee->decl))
2555                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2556                 else
2557                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2558               }
2559             else if (DECL_VIRTUAL_P (e->callee->decl))
2560               {
2561                 if (e->indirect_inlining_edge)
2562                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2563                 else
2564                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2565               }
2566             else
2567               {
2568                 if (e->indirect_inlining_edge)
2569                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2570                 else
2571                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2572               }
2573           }
2574       }
2575     for (e = node->indirect_calls; e; e = e->next_callee)
2576       if (e->indirect_info->polymorphic
2577           & e->count.ipa ().initialized_p ())
2578         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2579       else if (e->count.ipa ().initialized_p ())
2580         indirect_cnt += e->count.ipa ().to_gcov_type ();
2581   }
2582   if (max_count.initialized_p ())
2583     {
2584       fprintf (dump_file,
2585                "Inlined %" PRId64 " + speculative "
2586                "%" PRId64 " + speculative polymorphic "
2587                "%" PRId64 " + previously indirect "
2588                "%" PRId64 " + virtual "
2589                "%" PRId64 " + virtual and previously indirect "
2590                "%" PRId64 "\n" "Not inlined "
2591                "%" PRId64 " + previously indirect "
2592                "%" PRId64 " + virtual "
2593                "%" PRId64 " + virtual and previously indirect "
2594                "%" PRId64 " + still indirect "
2595                "%" PRId64 " + still indirect polymorphic "
2596                "%" PRId64 "\n", inlined_cnt,
2597                inlined_speculative, inlined_speculative_ply,
2598                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2599                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2600                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2601       fprintf (dump_file, "Removed speculations ");
2602       spec_rem.dump (dump_file);
2603       fprintf (dump_file, "\n");
2604     }
2605   dump_overall_stats ();
2606   fprintf (dump_file, "\nWhy inlining failed?\n");
2607   for (i = 0; i < CIF_N_REASONS; i++)
2608     if (reason[i][1])
2609       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2610                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2611                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2612 }
2613
2614 /* Called when node is removed.  */
2615
2616 static void
2617 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2618 {
2619   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2620     return;
2621
2622   hash_set<struct cgraph_node *> *removed
2623     = (hash_set<struct cgraph_node *> *) data;
2624   removed->add (node);
2625 }
2626
2627 /* Decide on the inlining.  We do so in the topological order to avoid
2628    expenses on updating data structures.  */
2629
2630 static unsigned int
2631 ipa_inline (void)
2632 {
2633   struct cgraph_node *node;
2634   int nnodes;
2635   struct cgraph_node **order;
2636   int i, j;
2637   int cold;
2638   bool remove_functions = false;
2639
2640   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2641
2642   if (dump_file)
2643     ipa_dump_fn_summaries (dump_file);
2644
2645   nnodes = ipa_reverse_postorder (order);
2646   spec_rem = profile_count::zero ();
2647
2648   FOR_EACH_FUNCTION (node)
2649     {
2650       node->aux = 0;
2651
2652       /* Recompute the default reasons for inlining because they may have
2653          changed during merging.  */
2654       if (in_lto_p)
2655         {
2656           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2657             {
2658               gcc_assert (e->inline_failed);
2659               initialize_inline_failed (e);
2660             }
2661           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2662             initialize_inline_failed (e);
2663         }
2664     }
2665
2666   if (dump_file)
2667     fprintf (dump_file, "\nFlattening functions:\n");
2668
2669   /* First shrink order array, so that it only contains nodes with
2670      flatten attribute.  */
2671   for (i = nnodes - 1, j = i; i >= 0; i--)
2672     {
2673       node = order[i];
2674       if (node->definition
2675           /* Do not try to flatten aliases.  These may happen for example when
2676              creating local aliases.  */
2677           && !node->alias
2678           && lookup_attribute ("flatten",
2679                                DECL_ATTRIBUTES (node->decl)) != NULL)
2680         order[j--] = order[i];
2681     }
2682
2683   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2684      nodes with flatten attribute.  If there is more than one such
2685      node, we need to register a node removal hook, as flatten_function
2686      could remove other nodes with flatten attribute.  See PR82801.  */
2687   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2688   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2689   if (j < nnodes - 2)
2690     {
2691       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2692       node_removal_hook_holder
2693         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2694                                            flatten_removed_nodes);
2695     }
2696
2697   /* In the first pass handle functions to be flattened.  Do this with
2698      a priority so none of our later choices will make this impossible.  */
2699   for (i = nnodes - 1; i > j; i--)
2700     {
2701       node = order[i];
2702       if (flatten_removed_nodes
2703           && flatten_removed_nodes->contains (node))
2704         continue;
2705
2706       /* Handle nodes to be flattened.
2707          Ideally when processing callees we stop inlining at the
2708          entry of cycles, possibly cloning that entry point and
2709          try to flatten itself turning it into a self-recursive
2710          function.  */
2711       if (dump_file)
2712         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2713       flatten_function (node, false, true);
2714     }
2715
2716   if (j < nnodes - 2)
2717     {
2718       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2719       delete flatten_removed_nodes;
2720     }
2721   free (order);
2722
2723   if (dump_file)
2724     dump_overall_stats ();
2725
2726   inline_small_functions ();
2727
2728   gcc_assert (symtab->state == IPA_SSA);
2729   symtab->state = IPA_SSA_AFTER_INLINING;
2730   /* Do first after-inlining removal.  We want to remove all "stale" extern
2731      inline functions and virtual functions so we really know what is called
2732      once.  */
2733   symtab->remove_unreachable_nodes (dump_file);
2734
2735   /* Inline functions with a property that after inlining into all callers the
2736      code size will shrink because the out-of-line copy is eliminated.
2737      We do this regardless on the callee size as long as function growth limits
2738      are met.  */
2739   if (dump_file)
2740     fprintf (dump_file,
2741              "\nDeciding on functions to be inlined into all callers and "
2742              "removing useless speculations:\n");
2743
2744   /* Inlining one function called once has good chance of preventing
2745      inlining other function into the same callee.  Ideally we should
2746      work in priority order, but probably inlining hot functions first
2747      is good cut without the extra pain of maintaining the queue.
2748
2749      ??? this is not really fitting the bill perfectly: inlining function
2750      into callee often leads to better optimization of callee due to
2751      increased context for optimization.
2752      For example if main() function calls a function that outputs help
2753      and then function that does the main optimization, we should inline
2754      the second with priority even if both calls are cold by themselves.
2755
2756      We probably want to implement new predicate replacing our use of
2757      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2758      to be hot.  */
2759   for (cold = 0; cold <= 1; cold ++)
2760     {
2761       FOR_EACH_DEFINED_FUNCTION (node)
2762         {
2763           struct cgraph_edge *edge, *next;
2764           bool update=false;
2765
2766           if (!opt_for_fn (node->decl, optimize)
2767               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2768             continue;
2769
2770           for (edge = node->callees; edge; edge = next)
2771             {
2772               next = edge->next_callee;
2773               if (edge->speculative && !speculation_useful_p (edge, false))
2774                 {
2775                   if (edge->count.ipa ().initialized_p ())
2776                     spec_rem += edge->count.ipa ();
2777                   cgraph_edge::resolve_speculation (edge);
2778                   update = true;
2779                   remove_functions = true;
2780                 }
2781             }
2782           if (update)
2783             {
2784               struct cgraph_node *where = node->inlined_to
2785                                           ? node->inlined_to : node;
2786               reset_edge_caches (where);
2787               ipa_update_overall_fn_summary (where);
2788             }
2789           if (want_inline_function_to_all_callers_p (node, cold))
2790             {
2791               int num_calls = 0;
2792               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2793                                                  true);
2794               while (node->call_for_symbol_and_aliases
2795                        (inline_to_all_callers, &num_calls, true))
2796                 ;
2797               remove_functions = true;
2798             }
2799         }
2800     }
2801
2802   if (dump_enabled_p ())
2803     dump_printf (MSG_NOTE,
2804                  "\nInlined %i calls, eliminated %i functions\n\n",
2805                  ncalls_inlined, nfunctions_inlined);
2806   if (dump_file)
2807     dump_inline_stats ();
2808
2809   if (dump_file)
2810     ipa_dump_fn_summaries (dump_file);
2811   return remove_functions ? TODO_remove_functions : 0;
2812 }
2813
2814 /* Inline always-inline function calls in NODE.  */
2815
2816 static bool
2817 inline_always_inline_functions (struct cgraph_node *node)
2818 {
2819   struct cgraph_edge *e;
2820   bool inlined = false;
2821
2822   for (e = node->callees; e; e = e->next_callee)
2823     {
2824       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2825       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2826         continue;
2827
2828       if (e->recursive_p ())
2829         {
2830           if (dump_enabled_p ())
2831             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2832                              "  Not inlining recursive call to %C.\n",
2833                              e->callee);
2834           e->inline_failed = CIF_RECURSIVE_INLINING;
2835           continue;
2836         }
2837
2838       if (!can_early_inline_edge_p (e))
2839         {
2840           /* Set inlined to true if the callee is marked "always_inline" but
2841              is not inlinable.  This will allow flagging an error later in
2842              expand_call_inline in tree-inline.c.  */
2843           if (lookup_attribute ("always_inline",
2844                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2845             inlined = true;
2846           continue;
2847         }
2848
2849       if (dump_enabled_p ())
2850         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2851                          "  Inlining %C into %C (always_inline).\n",
2852                          e->callee, e->caller);
2853       inline_call (e, true, NULL, NULL, false);
2854       inlined = true;
2855     }
2856   if (inlined)
2857     ipa_update_overall_fn_summary (node);
2858
2859   return inlined;
2860 }
2861
2862 /* Decide on the inlining.  We do so in the topological order to avoid
2863    expenses on updating data structures.  */
2864
2865 static bool
2866 early_inline_small_functions (struct cgraph_node *node)
2867 {
2868   struct cgraph_edge *e;
2869   bool inlined = false;
2870
2871   for (e = node->callees; e; e = e->next_callee)
2872     {
2873       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2874
2875       /* We can encounter not-yet-analyzed function during
2876          early inlining on callgraphs with strongly
2877          connected components.  */
2878       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2879       if (s == NULL || !s->inlinable || !e->inline_failed)
2880         continue;
2881
2882       /* Do not consider functions not declared inline.  */
2883       if (!DECL_DECLARED_INLINE_P (callee->decl)
2884           && !opt_for_fn (node->decl, flag_inline_small_functions)
2885           && !opt_for_fn (node->decl, flag_inline_functions))
2886         continue;
2887
2888       if (dump_enabled_p ())
2889         dump_printf_loc (MSG_NOTE, e->call_stmt,
2890                          "Considering inline candidate %C.\n",
2891                          callee);
2892
2893       if (!can_early_inline_edge_p (e))
2894         continue;
2895
2896       if (e->recursive_p ())
2897         {
2898           if (dump_enabled_p ())
2899             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2900                              "  Not inlining: recursive call.\n");
2901           continue;
2902         }
2903
2904       if (!want_early_inline_function_p (e))
2905         continue;
2906
2907       if (dump_enabled_p ())
2908         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2909                          " Inlining %C into %C.\n",
2910                          callee, e->caller);
2911       inline_call (e, true, NULL, NULL, false);
2912       inlined = true;
2913     }
2914
2915   if (inlined)
2916     ipa_update_overall_fn_summary (node);
2917
2918   return inlined;
2919 }
2920
2921 unsigned int
2922 early_inliner (function *fun)
2923 {
2924   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2925   struct cgraph_edge *edge;
2926   unsigned int todo = 0;
2927   int iterations = 0;
2928   bool inlined = false;
2929
2930   if (seen_error ())
2931     return 0;
2932
2933   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2934      happens when some pass decides to construct new function and
2935      cgraph_add_new_function calls lowering passes and early optimization on
2936      it.  This may confuse ourself when early inliner decide to inline call to
2937      function clone, because function clones don't have parameter list in
2938      ipa-prop matching their signature.  */
2939   if (ipa_node_params_sum)
2940     return 0;
2941
2942   if (flag_checking)
2943     node->verify ();
2944   node->remove_all_references ();
2945
2946   /* Even when not optimizing or not inlining inline always-inline
2947      functions.  */
2948   inlined = inline_always_inline_functions (node);
2949
2950   if (!optimize
2951       || flag_no_inline
2952       || !flag_early_inlining
2953       /* Never inline regular functions into always-inline functions
2954          during incremental inlining.  This sucks as functions calling
2955          always inline functions will get less optimized, but at the
2956          same time inlining of functions calling always inline
2957          function into an always inline function might introduce
2958          cycles of edges to be always inlined in the callgraph.
2959
2960          We might want to be smarter and just avoid this type of inlining.  */
2961       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2962           && lookup_attribute ("always_inline",
2963                                DECL_ATTRIBUTES (node->decl))))
2964     ;
2965   else if (lookup_attribute ("flatten",
2966                              DECL_ATTRIBUTES (node->decl)) != NULL)
2967     {
2968       /* When the function is marked to be flattened, recursively inline
2969          all calls in it.  */
2970       if (dump_enabled_p ())
2971         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2972                      "Flattening %C\n", node);
2973       flatten_function (node, true, true);
2974       inlined = true;
2975     }
2976   else
2977     {
2978       /* If some always_inline functions was inlined, apply the changes.
2979          This way we will not account always inline into growth limits and
2980          moreover we will inline calls from always inlines that we skipped
2981          previously because of conditional above.  */
2982       if (inlined)
2983         {
2984           timevar_push (TV_INTEGRATION);
2985           todo |= optimize_inline_calls (current_function_decl);
2986           /* optimize_inline_calls call above might have introduced new
2987              statements that don't have inline parameters computed.  */
2988           for (edge = node->callees; edge; edge = edge->next_callee)
2989             {
2990               /* We can enounter not-yet-analyzed function during
2991                  early inlining on callgraphs with strongly
2992                  connected components.  */
2993               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2994               es->call_stmt_size
2995                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2996               es->call_stmt_time
2997                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2998             }
2999           ipa_update_overall_fn_summary (node);
3000           inlined = false;
3001           timevar_pop (TV_INTEGRATION);
3002         }
3003       /* We iterate incremental inlining to get trivial cases of indirect
3004          inlining.  */
3005       while (iterations < opt_for_fn (node->decl,
3006                                       param_early_inliner_max_iterations)
3007              && early_inline_small_functions (node))
3008         {
3009           timevar_push (TV_INTEGRATION);
3010           todo |= optimize_inline_calls (current_function_decl);
3011
3012           /* Technically we ought to recompute inline parameters so the new
3013              iteration of early inliner works as expected.  We however have
3014              values approximately right and thus we only need to update edge
3015              info that might be cleared out for newly discovered edges.  */
3016           for (edge = node->callees; edge; edge = edge->next_callee)
3017             {
3018               /* We have no summary for new bound store calls yet.  */
3019               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3020               es->call_stmt_size
3021                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3022               es->call_stmt_time
3023                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3024             }
3025           if (iterations < opt_for_fn (node->decl,
3026                                        param_early_inliner_max_iterations) - 1)
3027             ipa_update_overall_fn_summary (node);
3028           timevar_pop (TV_INTEGRATION);
3029           iterations++;
3030           inlined = false;
3031         }
3032       if (dump_file)
3033         fprintf (dump_file, "Iterations: %i\n", iterations);
3034     }
3035
3036   if (inlined)
3037     {
3038       timevar_push (TV_INTEGRATION);
3039       todo |= optimize_inline_calls (current_function_decl);
3040       timevar_pop (TV_INTEGRATION);
3041     }
3042
3043   fun->always_inline_functions_inlined = true;
3044
3045   return todo;
3046 }
3047
3048 /* Do inlining of small functions.  Doing so early helps profiling and other
3049    passes to be somewhat more effective and avoids some code duplication in
3050    later real inlining pass for testcases with very many function calls.  */
3051
3052 namespace {
3053
3054 const pass_data pass_data_early_inline =
3055 {
3056   GIMPLE_PASS, /* type */
3057   "einline", /* name */
3058   OPTGROUP_INLINE, /* optinfo_flags */
3059   TV_EARLY_INLINING, /* tv_id */
3060   PROP_ssa, /* properties_required */
3061   0, /* properties_provided */
3062   0, /* properties_destroyed */
3063   0, /* todo_flags_start */
3064   0, /* todo_flags_finish */
3065 };
3066
3067 class pass_early_inline : public gimple_opt_pass
3068 {
3069 public:
3070   pass_early_inline (gcc::context *ctxt)
3071     : gimple_opt_pass (pass_data_early_inline, ctxt)
3072   {}
3073
3074   /* opt_pass methods: */
3075   virtual unsigned int execute (function *);
3076
3077 }; // class pass_early_inline
3078
3079 unsigned int
3080 pass_early_inline::execute (function *fun)
3081 {
3082   return early_inliner (fun);
3083 }
3084
3085 } // anon namespace
3086
3087 gimple_opt_pass *
3088 make_pass_early_inline (gcc::context *ctxt)
3089 {
3090   return new pass_early_inline (ctxt);
3091 }
3092
3093 namespace {
3094
3095 const pass_data pass_data_ipa_inline =
3096 {
3097   IPA_PASS, /* type */
3098   "inline", /* name */
3099   OPTGROUP_INLINE, /* optinfo_flags */
3100   TV_IPA_INLINING, /* tv_id */
3101   0, /* properties_required */
3102   0, /* properties_provided */
3103   0, /* properties_destroyed */
3104   0, /* todo_flags_start */
3105   ( TODO_dump_symtab ), /* todo_flags_finish */
3106 };
3107
3108 class pass_ipa_inline : public ipa_opt_pass_d
3109 {
3110 public:
3111   pass_ipa_inline (gcc::context *ctxt)
3112     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3113                       NULL, /* generate_summary */
3114                       NULL, /* write_summary */
3115                       NULL, /* read_summary */
3116                       NULL, /* write_optimization_summary */
3117                       NULL, /* read_optimization_summary */
3118                       NULL, /* stmt_fixup */
3119                       0, /* function_transform_todo_flags_start */
3120                       inline_transform, /* function_transform */
3121                       NULL) /* variable_transform */
3122   {}
3123
3124   /* opt_pass methods: */
3125   virtual unsigned int execute (function *) { return ipa_inline (); }
3126
3127 }; // class pass_ipa_inline
3128
3129 } // anon namespace
3130
3131 ipa_opt_pass_d *
3132 make_pass_ipa_inline (gcc::context *ctxt)
3133 {
3134   return new pass_ipa_inline (ctxt);
3135 }