gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass cannot really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "profile.h"
 109 #include "symbol-summary.h"
 110 #include "tree-vrp.h"
 111 #include "ipa-prop.h"
 112 #include "ipa-fnsummary.h"
 113 #include "ipa-inline.h"
 114 #include "ipa-utils.h"
 115 #include "sreal.h"
 116 #include "auto-profile.h"
 117 #include "builtins.h"
 118 #include "fibonacci_heap.h"
 119 #include "stringpool.h"
 120 #include "attribs.h"
 121 #include "asan.h"
 122
 123 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 124 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static profile_count max_count;
 129 static profile_count spec_rem;
 130
 131 /* Return false when inlining edge E would lead to violating
 132    limits on function unit growth or stack usage growth.
 133
 134    The relative function body growth limit is present generally
 135    to avoid problems with non-linear behavior of the compiler.
 136    To allow inlining huge functions into tiny wrapper, the limit
 137    is always based on the bigger of the two functions considered.
 138
 139    For stack growth limits we always base the growth in stack usage
 140    of the callers.  We want to prevent applications from segfaulting
 141    on stack overflow when functions with huge stack frames gets
 142    inlined. */
 143
 144 static bool
 145 caller_growth_limits (struct cgraph_edge *e)
 146 {
 147   struct cgraph_node *to = e->caller;
 148   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 149   int newsize;
 150   int limit = 0;
 151   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 152   ipa_size_summary *outer_info = ipa_size_summaries->get (to);
 153
 154   /* Look for function e->caller is inlined to.  While doing
 155      so work out the largest function body on the way.  As
 156      described above, we want to base our function growth
 157      limits based on that.  Not on the self size of the
 158      outer function, not on the self size of inline code
 159      we immediately inline to.  This is the most relaxed
 160      interpretation of the rule "do not grow large functions
 161      too much in order to prevent compiler from exploding".  */
 162   while (true)
 163     {
 164       ipa_size_summary *size_info = ipa_size_summaries->get (to);
 165       if (limit < size_info->self_size)
 166         limit = size_info->self_size;
 167       if (stack_size_limit < size_info->estimated_self_stack_size)
 168         stack_size_limit = size_info->estimated_self_stack_size;
 169       if (to->inlined_to)
 170         to = to->callers->caller;
 171       else
 172         break;
 173     }
 174
 175   ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
 176   ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
 177
 178   if (limit < what_size_info->self_size)
 179     limit = what_size_info->self_size;
 180
 181   limit += limit * opt_for_fn (to->decl, param_large_function_growth) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= ipa_size_summaries->get (what)->size
 187       && newsize > opt_for_fn (to->decl, param_large_function_insns)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * opt_for_fn (to->decl, param_stack_frame_growth)
 204                        / 100);
 205
 206   inlined_stack = (ipa_get_stack_frame_offset (to)
 207                    + outer_info->estimated_self_stack_size
 208                    + what_info->estimated_stack_size);
 209   /* Check new stack consumption with stack consumption at the place
 210      stack is used.  */
 211   if (inlined_stack > stack_size_limit
 212       /* If function already has large stack usage from sibling
 213          inline call, we can inline, too.
 214          This bit overoptimistically assume that we are good at stack
 215          packing.  */
 216       && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
 217       && inlined_stack > opt_for_fn (to->decl, param_large_stack_frame))
 218     {
 219       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 220       return false;
 221     }
 222   return true;
 223 }
 224
 225 /* Dump info about why inlining has failed.  */
 226
 227 static void
 228 report_inline_failed_reason (struct cgraph_edge *e)
 229 {
 230   if (dump_enabled_p ())
 231     {
 232       dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 233                        "  not inlinable: %C -> %C, %s\n",
 234                        e->caller, e->callee,
 235                        cgraph_inline_failed_string (e->inline_failed));
 236       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 237            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 238           && e->caller->lto_file_data
 239           && e->callee->ultimate_alias_target ()->lto_file_data)
 240         {
 241           dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 242                            "  LTO objects: %s, %s\n",
 243                            e->caller->lto_file_data->file_name,
 244                            e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 245         }
 246       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 247         if (dump_file)
 248           cl_target_option_print_diff
 249             (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250              target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         if (dump_file)
 253           cl_optimization_print_diff
 254             (dump_file, 2, opts_for_fn (e->caller->decl),
 255              opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 256     }
 257 }
 258
 259  /* Decide whether sanitizer-related attributes allow inlining. */
 260
 261 static bool
 262 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 263 {
 264   if (!caller || !callee)
 265     return true;
 266
 267   /* Follow clang and allow inlining for always_inline functions.  */
 268   if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
 269     return true;
 270
 271   const sanitize_code codes[] =
 272     {
 273       SANITIZE_ADDRESS,
 274       SANITIZE_THREAD,
 275       SANITIZE_UNDEFINED,
 276       SANITIZE_UNDEFINED_NONDEFAULT,
 277       SANITIZE_POINTER_COMPARE,
 278       SANITIZE_POINTER_SUBTRACT
 279     };
 280
 281   for (unsigned i = 0; i < sizeof (codes) / sizeof (codes[0]); i++)
 282     if (sanitize_flags_p (codes[i], caller)
 283         != sanitize_flags_p (codes[i], callee))
 284       return false;
 285
 286   if (sanitize_coverage_p (caller) != sanitize_coverage_p (callee))
 287     return false;
 288
 289   return true;
 290 }
 291
 292 /* Used for flags where it is safe to inline when caller's value is
 293    grater than callee's.  */
 294 #define check_maybe_up(flag) \
 295       (opts_for_fn (caller->decl)->x_##flag             \
 296        != opts_for_fn (callee->decl)->x_##flag          \
 297        && (!always_inline                               \
 298            || opts_for_fn (caller->decl)->x_##flag      \
 299               < opts_for_fn (callee->decl)->x_##flag))
 300 /* Used for flags where it is safe to inline when caller's value is
 301    smaller than callee's.  */
 302 #define check_maybe_down(flag) \
 303       (opts_for_fn (caller->decl)->x_##flag             \
 304        != opts_for_fn (callee->decl)->x_##flag          \
 305        && (!always_inline                               \
 306            || opts_for_fn (caller->decl)->x_##flag      \
 307               > opts_for_fn (callee->decl)->x_##flag))
 308 /* Used for flags where exact match is needed for correctness.  */
 309 #define check_match(flag) \
 310       (opts_for_fn (caller->decl)->x_##flag             \
 311        != opts_for_fn (callee->decl)->x_##flag)
 312
 313 /* Decide if we can inline the edge and possibly update
 314    inline_failed reason.
 315    We check whether inlining is possible at all and whether
 316    caller growth limits allow doing so.
 317
 318    if REPORT is true, output reason to the dump file. */
 319
 320 static bool
 321 can_inline_edge_p (struct cgraph_edge *e, bool report,
 322                    bool early = false)
 323 {
 324   gcc_checking_assert (e->inline_failed);
 325
 326   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 327     {
 328       if (report)
 329         report_inline_failed_reason (e);
 330       return false;
 331     }
 332
 333   bool inlinable = true;
 334   enum availability avail;
 335   cgraph_node *caller = (e->caller->inlined_to
 336                          ? e->caller->inlined_to : e->caller);
 337   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 338
 339   if (!callee->definition)
 340     {
 341       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 342       inlinable = false;
 343     }
 344   if (!early && (!opt_for_fn (callee->decl, optimize)
 345                  || !opt_for_fn (caller->decl, optimize)))
 346     {
 347       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 348       inlinable = false;
 349     }
 350   else if (callee->calls_comdat_local)
 351     {
 352       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 353       inlinable = false;
 354     }
 355   else if (avail <= AVAIL_INTERPOSABLE)
 356     {
 357       e->inline_failed = CIF_OVERWRITABLE;
 358       inlinable = false;
 359     }
 360   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 361      initialized to one of FINAL_ERROR reasons.  */
 362   else if (e->call_stmt_cannot_inline_p)
 363     gcc_unreachable ();
 364   /* Don't inline if the functions have different EH personalities.  */
 365   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 366            && DECL_FUNCTION_PERSONALITY (callee->decl)
 367            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 368                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 369     {
 370       e->inline_failed = CIF_EH_PERSONALITY;
 371       inlinable = false;
 372     }
 373   /* TM pure functions should not be inlined into non-TM_pure
 374      functions.  */
 375   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 376     {
 377       e->inline_failed = CIF_UNSPECIFIED;
 378       inlinable = false;
 379     }
 380   /* Check compatibility of target optimization options.  */
 381   else if (!targetm.target_option.can_inline_p (caller->decl,
 382                                                 callee->decl))
 383     {
 384       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 385       inlinable = false;
 386     }
 387   else if (ipa_fn_summaries->get (callee) == NULL
 388            || !ipa_fn_summaries->get (callee)->inlinable)
 389     {
 390       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 391       inlinable = false;
 392     }
 393   /* Don't inline a function with mismatched sanitization attributes. */
 394   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 395     {
 396       e->inline_failed = CIF_SANITIZE_ATTRIBUTE_MISMATCH;
 397       inlinable = false;
 398     }
 399   else if (profile_arc_flag
 400            && (lookup_attribute ("no_profile_instrument_function",
 401                                  DECL_ATTRIBUTES (caller->decl)) == NULL_TREE)
 402            != (lookup_attribute ("no_profile_instrument_function",
 403                                  DECL_ATTRIBUTES (callee->decl)) == NULL_TREE))
 404     {
 405       cgraph_node *origin = caller;
 406       while (origin->clone_of)
 407         origin = origin->clone_of;
 408
 409       if (!DECL_STRUCT_FUNCTION (origin->decl)->always_inline_functions_inlined)
 410         {
 411           e->inline_failed = CIF_UNSPECIFIED;
 412           inlinable = false;
 413         }
 414     }
 415
 416   if (!inlinable && report)
 417     report_inline_failed_reason (e);
 418   return inlinable;
 419 }
 420
 421 /* Return inlining_insns_single limit for function N.  If HINT or HINT2 is true
 422    scale up the bound.  */
 423
 424 static int
 425 inline_insns_single (cgraph_node *n, bool hint, bool hint2)
 426 {
 427   if (hint && hint2)
 428     {
 429       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 430       spd = spd * spd;
 431       if (spd > 1000000)
 432         spd = 1000000;
 433       return opt_for_fn (n->decl, param_max_inline_insns_single) * spd / 100;
 434     }
 435   if (hint || hint2)
 436     return opt_for_fn (n->decl, param_max_inline_insns_single)
 437            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 438   return opt_for_fn (n->decl, param_max_inline_insns_single);
 439 }
 440
 441 /* Return inlining_insns_auto limit for function N.  If HINT or HINT2 is true
 442    scale up the bound.   */
 443
 444 static int
 445 inline_insns_auto (cgraph_node *n, bool hint, bool hint2)
 446 {
 447   int max_inline_insns_auto = opt_for_fn (n->decl, param_max_inline_insns_auto);
 448   if (hint && hint2)
 449     {
 450       int64_t spd = opt_for_fn (n->decl, param_inline_heuristics_hint_percent);
 451       spd = spd * spd;
 452       if (spd > 1000000)
 453         spd = 1000000;
 454       return max_inline_insns_auto * spd / 100;
 455     }
 456   if (hint || hint2)
 457     return max_inline_insns_auto
 458            * opt_for_fn (n->decl, param_inline_heuristics_hint_percent) / 100;
 459   return max_inline_insns_auto;
 460 }
 461
 462 /* Decide if we can inline the edge and possibly update
 463    inline_failed reason.
 464    We check whether inlining is possible at all and whether
 465    caller growth limits allow doing so.
 466
 467    if REPORT is true, output reason to the dump file.
 468
 469    if DISREGARD_LIMITS is true, ignore size limits.  */
 470
 471 static bool
 472 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 473                              bool disregard_limits = false, bool early = false)
 474 {
 475   gcc_checking_assert (e->inline_failed);
 476
 477   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 478     {
 479       if (report)
 480         report_inline_failed_reason (e);
 481       return false;
 482     }
 483
 484   bool inlinable = true;
 485   enum availability avail;
 486   cgraph_node *caller = (e->caller->inlined_to
 487                          ? e->caller->inlined_to : e->caller);
 488   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 489   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 490   tree callee_tree
 491     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 492   /* Check if caller growth allows the inlining.  */
 493   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 494       && !disregard_limits
 495       && !lookup_attribute ("flatten",
 496                  DECL_ATTRIBUTES (caller->decl))
 497       && !caller_growth_limits (e))
 498     inlinable = false;
 499   else if (callee->externally_visible
 500            && !DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 501            && flag_live_patching == LIVE_PATCHING_INLINE_ONLY_STATIC)
 502     {
 503       e->inline_failed = CIF_EXTERN_LIVE_ONLY_STATIC;
 504       inlinable = false;
 505     }
 506   /* Don't inline a function with a higher optimization level than the
 507      caller.  FIXME: this is really just tip of iceberg of handling
 508      optimization attribute.  */
 509   else if (caller_tree != callee_tree)
 510     {
 511       bool always_inline =
 512              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 513               && lookup_attribute ("always_inline",
 514                                    DECL_ATTRIBUTES (callee->decl)));
 515       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 516       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 517
 518      /* Until GCC 4.9 we did not check the semantics-altering flags
 519         below and inlined across optimization boundaries.
 520         Enabling checks below breaks several packages by refusing
 521         to inline library always_inline functions. See PR65873.
 522         Disable the check for early inlining for now until better solution
 523         is found.  */
 524      if (always_inline && early)
 525         ;
 526       /* There are some options that change IL semantics which means
 527          we cannot inline in these cases for correctness reason.
 528          Not even for always_inline declared functions.  */
 529      else if (check_match (flag_wrapv)
 530               || check_match (flag_trapv)
 531               || check_match (flag_pcc_struct_return)
 532               || check_maybe_down (optimize_debug)
 533               /* When caller or callee does FP math, be sure FP codegen flags
 534                  compatible.  */
 535               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 536                   && (check_maybe_up (flag_rounding_math)
 537                       || check_maybe_up (flag_trapping_math)
 538                       || check_maybe_down (flag_unsafe_math_optimizations)
 539                       || check_maybe_down (flag_finite_math_only)
 540                       || check_maybe_up (flag_signaling_nans)
 541                       || check_maybe_down (flag_cx_limited_range)
 542                       || check_maybe_up (flag_signed_zeros)
 543                       || check_maybe_down (flag_associative_math)
 544                       || check_maybe_down (flag_reciprocal_math)
 545                       || check_maybe_down (flag_fp_int_builtin_inexact)
 546                       /* Strictly speaking only when the callee contains function
 547                          calls that may end up setting errno.  */
 548                       || check_maybe_up (flag_errno_math)))
 549               /* We do not want to make code compiled with exceptions to be
 550                  brought into a non-EH function unless we know that the callee
 551                  does not throw.
 552                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 553               || (check_maybe_up (flag_non_call_exceptions)
 554                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 555               || (check_maybe_up (flag_exceptions)
 556                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 557               /* When devirtualization is disabled for callee, it is not safe
 558                  to inline it as we possibly mangled the type info.
 559                  Allow early inlining of always inlines.  */
 560               || (!early && check_maybe_down (flag_devirtualize)))
 561         {
 562           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 563           inlinable = false;
 564         }
 565       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 566       else if (always_inline)
 567         ;
 568       /* When user added an attribute to the callee honor it.  */
 569       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 570                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 571         {
 572           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 573           inlinable = false;
 574         }
 575       /* If explicit optimize attribute are not used, the mismatch is caused
 576          by different command line options used to build different units.
 577          Do not care about COMDAT functions - those are intended to be
 578          optimized with the optimization flags of module they are used in.
 579          Also do not care about mixing up size/speed optimization when
 580          DECL_DISREGARD_INLINE_LIMITS is set.  */
 581       else if ((callee->merged_comdat
 582                 && !lookup_attribute ("optimize",
 583                                       DECL_ATTRIBUTES (caller->decl)))
 584                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 585         ;
 586       /* If mismatch is caused by merging two LTO units with different
 587          optimization flags we want to be bit nicer.  However never inline
 588          if one of functions is not optimized at all.  */
 589       else if (!opt_for_fn (callee->decl, optimize)
 590                || !opt_for_fn (caller->decl, optimize))
 591         {
 592           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 593           inlinable = false;
 594         }
 595       /* If callee is optimized for size and caller is not, allow inlining if
 596          code shrinks or we are in param_max_inline_insns_single limit and
 597          callee is inline (and thus likely an unified comdat).
 598          This will allow caller to run faster.  */
 599       else if (opt_for_fn (callee->decl, optimize_size)
 600                > opt_for_fn (caller->decl, optimize_size))
 601         {
 602           int growth = estimate_edge_growth (e);
 603           if (growth > opt_for_fn (caller->decl, param_max_inline_insns_size)
 604               && (!DECL_DECLARED_INLINE_P (callee->decl)
 605                   && growth >= MAX (inline_insns_single (caller, false, false),
 606                                     inline_insns_auto (caller, false, false))))
 607             {
 608               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 609               inlinable = false;
 610             }
 611         }
 612       /* If callee is more aggressively optimized for performance than caller,
 613          we generally want to inline only cheap (runtime wise) functions.  */
 614       else if (opt_for_fn (callee->decl, optimize_size)
 615                < opt_for_fn (caller->decl, optimize_size)
 616                || (opt_for_fn (callee->decl, optimize)
 617                    > opt_for_fn (caller->decl, optimize)))
 618         {
 619           if (estimate_edge_time (e)
 620               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 621             {
 622               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 623               inlinable = false;
 624             }
 625         }
 626
 627     }
 628
 629   if (!inlinable && report)
 630     report_inline_failed_reason (e);
 631   return inlinable;
 632 }
 633
 634
 635 /* Return true if the edge E is inlinable during early inlining.  */
 636
 637 static bool
 638 can_early_inline_edge_p (struct cgraph_edge *e)
 639 {
 640   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 641   /* Early inliner might get called at WPA stage when IPA pass adds new
 642      function.  In this case we cannot really do any of early inlining
 643      because function bodies are missing.  */
 644   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 645     return false;
 646   if (!gimple_has_body_p (callee->decl))
 647     {
 648       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 649       return false;
 650     }
 651   /* In early inliner some of callees may not be in SSA form yet
 652      (i.e. the callgraph is cyclic and we did not process
 653      the callee by early inliner, yet).  We don't have CIF code for this
 654      case; later we will re-do the decision in the real inliner.  */
 655   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 656       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 657     {
 658       if (dump_enabled_p ())
 659         dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 660                          "  edge not inlinable: not in SSA form\n");
 661       return false;
 662     }
 663   if (!can_inline_edge_p (e, true, true)
 664       || !can_inline_edge_by_limits_p (e, true, false, true))
 665     return false;
 666   return true;
 667 }
 668
 669
 670 /* Return number of calls in N.  Ignore cheap builtins.  */
 671
 672 static int
 673 num_calls (struct cgraph_node *n)
 674 {
 675   struct cgraph_edge *e;
 676   int num = 0;
 677
 678   for (e = n->callees; e; e = e->next_callee)
 679     if (!is_inexpensive_builtin (e->callee->decl))
 680       num++;
 681   return num;
 682 }
 683
 684
 685 /* Return true if we are interested in inlining small function.  */
 686
 687 static bool
 688 want_early_inline_function_p (struct cgraph_edge *e)
 689 {
 690   bool want_inline = true;
 691   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 692
 693   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 694     ;
 695   /* For AutoFDO, we need to make sure that before profile summary, all
 696      hot paths' IR look exactly the same as profiled binary. As a result,
 697      in einliner, we will disregard size limit and inline those callsites
 698      that are:
 699        * inlined in the profiled binary, and
 700        * the cloned callee has enough samples to be considered "hot".  */
 701   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 702     ;
 703   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 704            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 705     {
 706       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 707       report_inline_failed_reason (e);
 708       want_inline = false;
 709     }
 710   else
 711     {
 712       /* First take care of very large functions.  */
 713       int min_growth = estimate_min_edge_growth (e), growth = 0;
 714       int n;
 715       int early_inlining_insns = param_early_inlining_insns;
 716
 717       if (min_growth > early_inlining_insns)
 718         {
 719           if (dump_enabled_p ())
 720             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 721                              "  will not early inline: %C->%C, "
 722                              "call is cold and code would grow "
 723                              "at least by %i\n",
 724                              e->caller, callee,
 725                              min_growth);
 726           want_inline = false;
 727         }
 728       else
 729         growth = estimate_edge_growth (e);
 730
 731
 732       if (!want_inline || growth <= param_max_inline_insns_size)
 733         ;
 734       else if (!e->maybe_hot_p ())
 735         {
 736           if (dump_enabled_p ())
 737             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 738                              "  will not early inline: %C->%C, "
 739                              "call is cold and code would grow by %i\n",
 740                              e->caller, callee,
 741                              growth);
 742           want_inline = false;
 743         }
 744       else if (growth > early_inlining_insns)
 745         {
 746           if (dump_enabled_p ())
 747             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 748                              "  will not early inline: %C->%C, "
 749                              "growth %i exceeds --param early-inlining-insns\n",
 750                              e->caller, callee, growth);
 751           want_inline = false;
 752         }
 753       else if ((n = num_calls (callee)) != 0
 754                && growth * (n + 1) > early_inlining_insns)
 755         {
 756           if (dump_enabled_p ())
 757             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
 758                              "  will not early inline: %C->%C, "
 759                              "growth %i exceeds --param early-inlining-insns "
 760                              "divided by number of calls\n",
 761                              e->caller, callee, growth);
 762           want_inline = false;
 763         }
 764     }
 765   return want_inline;
 766 }
 767
 768 /* Compute time of the edge->caller + edge->callee execution when inlining
 769    does not happen.  */
 770
 771 inline sreal
 772 compute_uninlined_call_time (struct cgraph_edge *edge,
 773                              sreal uninlined_call_time,
 774                              sreal freq)
 775 {
 776   cgraph_node *caller = (edge->caller->inlined_to
 777                          ? edge->caller->inlined_to
 778                          : edge->caller);
 779
 780   if (freq > 0)
 781     uninlined_call_time *= freq;
 782   else
 783     uninlined_call_time = uninlined_call_time >> 11;
 784
 785   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 786   return uninlined_call_time + caller_time;
 787 }
 788
 789 /* Same as compute_uinlined_call_time but compute time when inlining
 790    does happen.  */
 791
 792 inline sreal
 793 compute_inlined_call_time (struct cgraph_edge *edge,
 794                            sreal time,
 795                            sreal freq)
 796 {
 797   cgraph_node *caller = (edge->caller->inlined_to
 798                          ? edge->caller->inlined_to
 799                          : edge->caller);
 800   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 801
 802   if (freq > 0)
 803     time *= freq;
 804   else
 805     time = time >> 11;
 806
 807   /* This calculation should match one in ipa-inline-analysis.c
 808      (estimate_edge_size_and_time).  */
 809   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 810   time += caller_time;
 811   if (time <= 0)
 812     time = ((sreal) 1) >> 8;
 813   gcc_checking_assert (time >= 0);
 814   return time;
 815 }
 816
 817 /* Determine time saved by inlining EDGE of frequency FREQ
 818    where callee's runtime w/o inlining is UNINLINED_TYPE
 819    and with inlined is INLINED_TYPE.  */
 820
 821 inline sreal
 822 inlining_speedup (struct cgraph_edge *edge,
 823                   sreal freq,
 824                   sreal uninlined_time,
 825                   sreal inlined_time)
 826 {
 827   sreal speedup = uninlined_time - inlined_time;
 828   /* Handling of call_time should match one in ipa-inline-fnsummary.c
 829      (estimate_edge_size_and_time).  */
 830   sreal call_time = ipa_call_summaries->get (edge)->call_stmt_time;
 831
 832   if (freq > 0)
 833     {
 834       speedup = (speedup + call_time);
 835       if (freq != 1)
 836        speedup = speedup * freq;
 837     }
 838   else if (freq == 0)
 839     speedup = speedup >> 11;
 840   gcc_checking_assert (speedup >= 0);
 841   return speedup;
 842 }
 843
 844 /* Return true if the speedup for inlining E is bigger than
 845    param_inline_min_speedup.  */
 846
 847 static bool
 848 big_speedup_p (struct cgraph_edge *e)
 849 {
 850   sreal unspec_time;
 851   sreal spec_time = estimate_edge_time (e, &unspec_time);
 852   sreal freq = e->sreal_frequency ();
 853   sreal time = compute_uninlined_call_time (e, unspec_time, freq);
 854   sreal inlined_time = compute_inlined_call_time (e, spec_time, freq);
 855   cgraph_node *caller = (e->caller->inlined_to
 856                          ? e->caller->inlined_to
 857                          : e->caller);
 858   int limit = opt_for_fn (caller->decl, param_inline_min_speedup);
 859
 860   if ((time - inlined_time) * 100 > time * limit)
 861     return true;
 862   return false;
 863 }
 864
 865 /* Return true if we are interested in inlining small function.
 866    When REPORT is true, report reason to dump file.  */
 867
 868 static bool
 869 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 870 {
 871   bool want_inline = true;
 872   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 873   cgraph_node *to  = (e->caller->inlined_to
 874                       ? e->caller->inlined_to : e->caller);
 875
 876   /* Allow this function to be called before can_inline_edge_p,
 877      since it's usually cheaper.  */
 878   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 879     want_inline = false;
 880   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 881     ;
 882   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 883            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 884     {
 885       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 886       want_inline = false;
 887     }
 888   /* Do fast and conservative check if the function can be good
 889      inline candidate.  */
 890   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 891            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 892            && ipa_fn_summaries->get (callee)->min_size
 893                 - ipa_call_summaries->get (e)->call_stmt_size
 894               > inline_insns_auto (e->caller, true, true))
 895     {
 896       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 897       want_inline = false;
 898     }
 899   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 900             || e->count.ipa ().nonzero_p ())
 901            && ipa_fn_summaries->get (callee)->min_size
 902                 - ipa_call_summaries->get (e)->call_stmt_size
 903               > inline_insns_single (e->caller, true, true))
 904     {
 905       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 906                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 907                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 908       want_inline = false;
 909     }
 910   else
 911     {
 912       int growth = estimate_edge_growth (e);
 913       ipa_hints hints = estimate_edge_hints (e);
 914       /* We have two independent groups of hints.  If one matches in each
 915          of groups the limits are inreased.  If both groups matches, limit
 916          is increased even more.  */
 917       bool apply_hints = (hints & (INLINE_HINT_indirect_call
 918                                    | INLINE_HINT_known_hot
 919                                    | INLINE_HINT_loop_iterations
 920                                    | INLINE_HINT_loop_stride));
 921       bool apply_hints2 = (hints & INLINE_HINT_builtin_constant_p);
 922
 923       if (growth <= opt_for_fn (to->decl,
 924                                 param_max_inline_insns_size))
 925         ;
 926       /* Apply param_max_inline_insns_single limit.  Do not do so when
 927          hints suggests that inlining given function is very profitable.
 928          Avoid computation of big_speedup_p when not necessary to change
 929          outcome of decision.  */
 930       else if (DECL_DECLARED_INLINE_P (callee->decl)
 931                && growth >= inline_insns_single (e->caller, apply_hints,
 932                                                  apply_hints2)
 933                && (apply_hints || apply_hints2
 934                    || growth >= inline_insns_single (e->caller, true,
 935                                                      apply_hints2)
 936                    || !big_speedup_p (e)))
 937         {
 938           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 939           want_inline = false;
 940         }
 941       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 942                && !opt_for_fn (e->caller->decl, flag_inline_functions)
 943                && growth >= opt_for_fn (to->decl,
 944                                         param_max_inline_insns_small))
 945         {
 946           /* growth_positive_p is expensive, always test it last.  */
 947           if (growth >= inline_insns_single (e->caller, false, false)
 948               || growth_positive_p (callee, e, growth))
 949             {
 950               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 951               want_inline = false;
 952             }
 953         }
 954       /* Apply param_max_inline_insns_auto limit for functions not declared
 955          inline.  Bypass the limit when speedup seems big.  */
 956       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 957                && growth >= inline_insns_auto (e->caller, apply_hints,
 958                                                apply_hints2)
 959                && (apply_hints || apply_hints2
 960                    || growth >= inline_insns_auto (e->caller, true,
 961                                                    apply_hints2)
 962                    || !big_speedup_p (e)))
 963         {
 964           /* growth_positive_p is expensive, always test it last.  */
 965           if (growth >= inline_insns_single (e->caller, false, false)
 966               || growth_positive_p (callee, e, growth))
 967             {
 968               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 969               want_inline = false;
 970             }
 971         }
 972       /* If call is cold, do not inline when function body would grow. */
 973       else if (!e->maybe_hot_p ()
 974                && (growth >= inline_insns_single (e->caller, false, false)
 975                    || growth_positive_p (callee, e, growth)))
 976         {
 977           e->inline_failed = CIF_UNLIKELY_CALL;
 978           want_inline = false;
 979         }
 980     }
 981   if (!want_inline && report)
 982     report_inline_failed_reason (e);
 983   return want_inline;
 984 }
 985
 986 /* EDGE is self recursive edge.
 987    We handle two cases - when function A is inlining into itself
 988    or when function A is being inlined into another inliner copy of function
 989    A within function B.
 990
 991    In first case OUTER_NODE points to the toplevel copy of A, while
 992    in the second case OUTER_NODE points to the outermost copy of A in B.
 993
 994    In both cases we want to be extra selective since
 995    inlining the call will just introduce new recursive calls to appear.  */
 996
 997 static bool
 998 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 999                                    struct cgraph_node *outer_node,
1000                                    bool peeling,
1001                                    int depth)
1002 {
1003   char const *reason = NULL;
1004   bool want_inline = true;
1005   sreal caller_freq = 1;
1006   int max_depth = opt_for_fn (outer_node->decl,
1007                               param_max_inline_recursive_depth_auto);
1008
1009   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
1010     max_depth = opt_for_fn (outer_node->decl,
1011                             param_max_inline_recursive_depth);
1012
1013   if (!edge->maybe_hot_p ())
1014     {
1015       reason = "recursive call is cold";
1016       want_inline = false;
1017     }
1018   else if (depth > max_depth)
1019     {
1020       reason = "--param max-inline-recursive-depth exceeded.";
1021       want_inline = false;
1022     }
1023   else if (outer_node->inlined_to
1024            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
1025     {
1026       reason = "caller frequency is 0";
1027       want_inline = false;
1028     }
1029
1030   if (!want_inline)
1031     ;
1032   /* Inlining of self recursive function into copy of itself within other
1033      function is transformation similar to loop peeling.
1034
1035      Peeling is profitable if we can inline enough copies to make probability
1036      of actual call to the self recursive function very small.  Be sure that
1037      the probability of recursion is small.
1038
1039      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
1040      This way the expected number of recursion is at most max_depth.  */
1041   else if (peeling)
1042     {
1043       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
1044       int i;
1045       for (i = 1; i < depth; i++)
1046         max_prob = max_prob * max_prob;
1047       if (edge->sreal_frequency () >= max_prob * caller_freq)
1048         {
1049           reason = "frequency of recursive call is too large";
1050           want_inline = false;
1051         }
1052     }
1053   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
1054      recursion depth is large.  We reduce function call overhead and increase
1055      chances that things fit in hardware return predictor.
1056
1057      Recursive inlining might however increase cost of stack frame setup
1058      actually slowing down functions whose recursion tree is wide rather than
1059      deep.
1060
1061      Deciding reliably on when to do recursive inlining without profile feedback
1062      is tricky.  For now we disable recursive inlining when probability of self
1063      recursion is low.
1064
1065      Recursive inlining of self recursive call within loop also results in
1066      large loop depths that generally optimize badly.  We may want to throttle
1067      down inlining in those cases.  In particular this seems to happen in one
1068      of libstdc++ rb tree methods.  */
1069   else
1070     {
1071       if (edge->sreal_frequency () * 100
1072           <= caller_freq
1073              * opt_for_fn (outer_node->decl,
1074                            param_min_inline_recursive_probability))
1075         {
1076           reason = "frequency of recursive call is too small";
1077           want_inline = false;
1078         }
1079     }
1080   if (!want_inline && dump_enabled_p ())
1081     dump_printf_loc (MSG_MISSED_OPTIMIZATION, edge->call_stmt,
1082                      "   not inlining recursively: %s\n", reason);
1083   return want_inline;
1084 }
1085
1086 /* Return true when NODE has uninlinable caller;
1087    set HAS_HOT_CALL if it has hot call.
1088    Worker for cgraph_for_node_and_aliases.  */
1089
1090 static bool
1091 check_callers (struct cgraph_node *node, void *has_hot_call)
1092 {
1093   struct cgraph_edge *e;
1094    for (e = node->callers; e; e = e->next_caller)
1095      {
1096        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
1097            || !opt_for_fn (e->caller->decl, optimize))
1098          return true;
1099        if (!can_inline_edge_p (e, true))
1100          return true;
1101        if (e->recursive_p ())
1102          return true;
1103        if (!can_inline_edge_by_limits_p (e, true))
1104          return true;
1105        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
1106          *(bool *)has_hot_call = true;
1107      }
1108   return false;
1109 }
1110
1111 /* If NODE has a caller, return true.  */
1112
1113 static bool
1114 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
1115 {
1116   if (node->callers)
1117     return true;
1118   return false;
1119 }
1120
1121 /* Decide if inlining NODE would reduce unit size by eliminating
1122    the offline copy of function.
1123    When COLD is true the cold calls are considered, too.  */
1124
1125 static bool
1126 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
1127 {
1128   bool has_hot_call = false;
1129
1130   /* Aliases gets inlined along with the function they alias.  */
1131   if (node->alias)
1132     return false;
1133   /* Already inlined?  */
1134   if (node->inlined_to)
1135     return false;
1136   /* Does it have callers?  */
1137   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
1138     return false;
1139   /* Inlining into all callers would increase size?  */
1140   if (growth_positive_p (node, NULL, INT_MIN) > 0)
1141     return false;
1142   /* All inlines must be possible.  */
1143   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1144                                          true))
1145     return false;
1146   if (!cold && !has_hot_call)
1147     return false;
1148   return true;
1149 }
1150
1151 /* Return true if WHERE of SIZE is a possible candidate for wrapper heuristics
1152    in estimate_edge_badness.  */
1153
1154 static bool
1155 wrapper_heuristics_may_apply (struct cgraph_node *where, int size)
1156 {
1157   return size < (DECL_DECLARED_INLINE_P (where->decl)
1158                  ? inline_insns_single (where, false, false)
1159                  : inline_insns_auto (where, false, false));
1160 }
1161
1162 /* A cost model driving the inlining heuristics in a way so the edges with
1163    smallest badness are inlined first.  After each inlining is performed
1164    the costs of all caller edges of nodes affected are recomputed so the
1165    metrics may accurately depend on values such as number of inlinable callers
1166    of the function or function body size.  */
1167
1168 static sreal
1169 edge_badness (struct cgraph_edge *edge, bool dump)
1170 {
1171   sreal badness;
1172   int growth;
1173   sreal edge_time, unspec_edge_time;
1174   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1175   class ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1176   ipa_hints hints;
1177   cgraph_node *caller = (edge->caller->inlined_to
1178                          ? edge->caller->inlined_to
1179                          : edge->caller);
1180
1181   growth = estimate_edge_growth (edge);
1182   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1183   hints = estimate_edge_hints (edge);
1184   gcc_checking_assert (edge_time >= 0);
1185   /* Check that inlined time is better, but tolerate some roundoff issues.
1186      FIXME: When callee profile drops to 0 we account calls more.  This
1187      should be fixed by never doing that.  */
1188   gcc_checking_assert ((edge_time * 100
1189                         - callee_info->time * 101).to_int () <= 0
1190                         || callee->count.ipa ().initialized_p ());
1191   gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
1192
1193   if (dump)
1194     {
1195       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1196                edge->caller->dump_name (),
1197                edge->callee->dump_name ());
1198       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1199                growth,
1200                edge_time.to_double (),
1201                unspec_edge_time.to_double ());
1202       ipa_dump_hints (dump_file, hints);
1203       if (big_speedup_p (edge))
1204         fprintf (dump_file, " big_speedup");
1205       fprintf (dump_file, "\n");
1206     }
1207
1208   /* Always prefer inlining saving code size.  */
1209   if (growth <= 0)
1210     {
1211       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1212       if (dump)
1213         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1214                  growth);
1215     }
1216    /* Inlining into EXTERNAL functions is not going to change anything unless
1217       they are themselves inlined.  */
1218    else if (DECL_EXTERNAL (caller->decl))
1219     {
1220       if (dump)
1221         fprintf (dump_file, "      max: function is external\n");
1222       return sreal::max ();
1223     }
1224   /* When profile is available. Compute badness as:
1225
1226                  time_saved * caller_count
1227      goodness =  -------------------------------------------------
1228                  growth_of_caller * overall_growth * combined_size
1229
1230      badness = - goodness
1231
1232      Again use negative value to make calls with profile appear hotter
1233      then calls without.
1234   */
1235   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1236            || caller->count.ipa ().nonzero_p ())
1237     {
1238       sreal numerator, denominator;
1239       int overall_growth;
1240       sreal freq = edge->sreal_frequency ();
1241
1242       numerator = inlining_speedup (edge, freq, unspec_edge_time, edge_time);
1243       if (numerator <= 0)
1244         numerator = ((sreal) 1 >> 8);
1245       if (caller->count.ipa ().nonzero_p ())
1246         numerator *= caller->count.ipa ().to_gcov_type ();
1247       else if (caller->count.ipa ().initialized_p ())
1248         numerator = numerator >> 11;
1249       denominator = growth;
1250
1251       overall_growth = callee_info->growth;
1252
1253       /* Look for inliner wrappers of the form:
1254
1255          inline_caller ()
1256            {
1257              do_fast_job...
1258              if (need_more_work)
1259                noninline_callee ();
1260            }
1261          Without penalizing this case, we usually inline noninline_callee
1262          into the inline_caller because overall_growth is small preventing
1263          further inlining of inline_caller.
1264
1265          Penalize only callgraph edges to functions with small overall
1266          growth ...
1267         */
1268       if (growth > overall_growth
1269           /* ... and having only one caller which is not inlined ... */
1270           && callee_info->single_caller
1271           && !edge->caller->inlined_to
1272           /* ... and edges executed only conditionally ... */
1273           && freq < 1
1274           /* ... consider case where callee is not inline but caller is ... */
1275           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1276                && DECL_DECLARED_INLINE_P (caller->decl))
1277               /* ... or when early optimizers decided to split and edge
1278                  frequency still indicates splitting is a win ... */
1279               || (callee->split_part && !caller->split_part
1280                   && freq * 100
1281                          < opt_for_fn (caller->decl,
1282                                        param_partial_inlining_entry_probability)
1283                   /* ... and do not overwrite user specified hints.   */
1284                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1285                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1286         {
1287           ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1288           int caller_growth = caller_info->growth;
1289
1290           /* Only apply the penalty when caller looks like inline candidate,
1291              and it is not called once.  */
1292           if (!caller_info->single_caller && overall_growth < caller_growth
1293               && caller_info->inlinable
1294               && wrapper_heuristics_may_apply
1295                  (caller, ipa_size_summaries->get (caller)->size))
1296             {
1297               if (dump)
1298                 fprintf (dump_file,
1299                          "     Wrapper penalty. Increasing growth %i to %i\n",
1300                          overall_growth, caller_growth);
1301               overall_growth = caller_growth;
1302             }
1303         }
1304       if (overall_growth > 0)
1305         {
1306           /* Strongly prefer functions with few callers that can be inlined
1307              fully.  The square root here leads to smaller binaries at average.
1308              Watch however for extreme cases and return to linear function
1309              when growth is large.  */
1310           if (overall_growth < 256)
1311             overall_growth *= overall_growth;
1312           else
1313             overall_growth += 256 * 256 - 256;
1314           denominator *= overall_growth;
1315         }
1316       denominator *= ipa_size_summaries->get (caller)->size + growth;
1317
1318       badness = - numerator / denominator;
1319
1320       if (dump)
1321         {
1322           fprintf (dump_file,
1323                    "      %f: guessed profile. frequency %f, count %" PRId64
1324                    " caller count %" PRId64
1325                    " time saved %f"
1326                    " overall growth %i (current) %i (original)"
1327                    " %i (compensated)\n",
1328                    badness.to_double (),
1329                    freq.to_double (),
1330                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1331                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1332                    inlining_speedup (edge, freq, unspec_edge_time, edge_time).to_double (),
1333                    estimate_growth (callee),
1334                    callee_info->growth, overall_growth);
1335         }
1336     }
1337   /* When function local profile is not available or it does not give
1338      useful information (i.e. frequency is zero), base the cost on
1339      loop nest and overall size growth, so we optimize for overall number
1340      of functions fully inlined in program.  */
1341   else
1342     {
1343       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1344       badness = growth;
1345
1346       /* Decrease badness if call is nested.  */
1347       if (badness > 0)
1348         badness = badness >> nest;
1349       else
1350         badness = badness << nest;
1351       if (dump)
1352         fprintf (dump_file, "      %f: no profile. nest %i\n",
1353                  badness.to_double (), nest);
1354     }
1355   gcc_checking_assert (badness != 0);
1356
1357   if (edge->recursive_p ())
1358     badness = badness.shift (badness > 0 ? 4 : -4);
1359   if ((hints & (INLINE_HINT_indirect_call
1360                 | INLINE_HINT_loop_iterations
1361                 | INLINE_HINT_loop_stride))
1362       || callee_info->growth <= 0)
1363     badness = badness.shift (badness > 0 ? -2 : 2);
1364   if (hints & INLINE_HINT_builtin_constant_p)
1365     badness = badness.shift (badness > 0 ? -4 : 4);
1366   if (hints & (INLINE_HINT_same_scc))
1367     badness = badness.shift (badness > 0 ? 3 : -3);
1368   else if (hints & (INLINE_HINT_in_scc))
1369     badness = badness.shift (badness > 0 ? 2 : -2);
1370   else if (hints & (INLINE_HINT_cross_module))
1371     badness = badness.shift (badness > 0 ? 1 : -1);
1372   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1373     badness = badness.shift (badness > 0 ? -4 : 4);
1374   else if ((hints & INLINE_HINT_declared_inline))
1375     badness = badness.shift (badness > 0 ? -3 : 3);
1376   if (dump)
1377     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1378   return badness;
1379 }
1380
1381 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1382 static inline void
1383 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1384 {
1385   sreal badness = edge_badness (edge, false);
1386   if (edge->aux)
1387     {
1388       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1389       gcc_checking_assert (n->get_data () == edge);
1390
1391       /* fibonacci_heap::replace_key does busy updating of the
1392          heap that is unnecessarily expensive.
1393          We do lazy increases: after extracting minimum if the key
1394          turns out to be out of date, it is re-inserted into heap
1395          with correct value.  */
1396       if (badness < n->get_key ())
1397         {
1398           if (dump_file && (dump_flags & TDF_DETAILS))
1399             {
1400               fprintf (dump_file,
1401                        "  decreasing badness %s -> %s, %f to %f\n",
1402                        edge->caller->dump_name (),
1403                        edge->callee->dump_name (),
1404                        n->get_key ().to_double (),
1405                        badness.to_double ());
1406             }
1407           heap->decrease_key (n, badness);
1408         }
1409     }
1410   else
1411     {
1412        if (dump_file && (dump_flags & TDF_DETAILS))
1413          {
1414            fprintf (dump_file,
1415                     "  enqueuing call %s -> %s, badness %f\n",
1416                     edge->caller->dump_name (),
1417                     edge->callee->dump_name (),
1418                     badness.to_double ());
1419          }
1420       edge->aux = heap->insert (badness, edge);
1421     }
1422 }
1423
1424
1425 /* NODE was inlined.
1426    All caller edges needs to be reset because
1427    size estimates change. Similarly callees needs reset
1428    because better context may be known.  */
1429
1430 static void
1431 reset_edge_caches (struct cgraph_node *node)
1432 {
1433   struct cgraph_edge *edge;
1434   struct cgraph_edge *e = node->callees;
1435   struct cgraph_node *where = node;
1436   struct ipa_ref *ref;
1437
1438   if (where->inlined_to)
1439     where = where->inlined_to;
1440
1441   reset_node_cache (where);
1442
1443   if (edge_growth_cache != NULL)
1444     for (edge = where->callers; edge; edge = edge->next_caller)
1445       if (edge->inline_failed)
1446         edge_growth_cache->remove (edge);
1447
1448   FOR_EACH_ALIAS (where, ref)
1449     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1450
1451   if (!e)
1452     return;
1453
1454   while (true)
1455     if (!e->inline_failed && e->callee->callees)
1456       e = e->callee->callees;
1457     else
1458       {
1459         if (edge_growth_cache != NULL && e->inline_failed)
1460           edge_growth_cache->remove (e);
1461         if (e->next_callee)
1462           e = e->next_callee;
1463         else
1464           {
1465             do
1466               {
1467                 if (e->caller == node)
1468                   return;
1469                 e = e->caller->callers;
1470               }
1471             while (!e->next_callee);
1472             e = e->next_callee;
1473           }
1474       }
1475 }
1476
1477 /* Recompute HEAP nodes for each of caller of NODE.
1478    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1479    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1480    it is inlinable. Otherwise check all edges.  */
1481
1482 static void
1483 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1484                     bitmap updated_nodes,
1485                     struct cgraph_edge *check_inlinablity_for)
1486 {
1487   struct cgraph_edge *edge;
1488   struct ipa_ref *ref;
1489
1490   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1491       || node->inlined_to)
1492     return;
1493   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
1494     return;
1495
1496   FOR_EACH_ALIAS (node, ref)
1497     {
1498       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1499       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1500     }
1501
1502   for (edge = node->callers; edge; edge = edge->next_caller)
1503     if (edge->inline_failed)
1504       {
1505         if (!check_inlinablity_for
1506             || check_inlinablity_for == edge)
1507           {
1508             if (can_inline_edge_p (edge, false)
1509                 && want_inline_small_function_p (edge, false)
1510                 && can_inline_edge_by_limits_p (edge, false))
1511               update_edge_key (heap, edge);
1512             else if (edge->aux)
1513               {
1514                 report_inline_failed_reason (edge);
1515                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1516                 edge->aux = NULL;
1517               }
1518           }
1519         else if (edge->aux)
1520           update_edge_key (heap, edge);
1521       }
1522 }
1523
1524 /* Recompute HEAP nodes for each uninlined call in NODE
1525    If UPDATE_SINCE is non-NULL check if edges called within that function
1526    are inlinable (typically UPDATE_SINCE is the inline clone we introduced
1527    where all edges have new context).
1528
1529    This is used when we know that edge badnesses are going only to increase
1530    (we introduced new call site) and thus all we need is to insert newly
1531    created edges into heap.  */
1532
1533 static void
1534 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1535                     struct cgraph_node *update_since,
1536                     bitmap updated_nodes)
1537 {
1538   struct cgraph_edge *e = node->callees;
1539   bool check_inlinability = update_since == node;
1540
1541   if (!e)
1542     return;
1543   while (true)
1544     if (!e->inline_failed && e->callee->callees)
1545       {
1546         if (e->callee == update_since)
1547           check_inlinability = true;
1548         e = e->callee->callees;
1549       }
1550     else
1551       {
1552         enum availability avail;
1553         struct cgraph_node *callee;
1554         if (!check_inlinability)
1555           {
1556             if (e->aux
1557                 && !bitmap_bit_p (updated_nodes,
1558                                   e->callee->ultimate_alias_target
1559                                     (&avail, e->caller)->get_uid ()))
1560               update_edge_key (heap, e);
1561           }
1562         /* We do not reset callee growth cache here.  Since we added a new call,
1563            growth should have just increased and consequently badness metric
1564            don't need updating.  */
1565         else if (e->inline_failed
1566                  && (callee = e->callee->ultimate_alias_target (&avail,
1567                                                                 e->caller))
1568                  && avail >= AVAIL_AVAILABLE
1569                  && ipa_fn_summaries->get (callee) != NULL
1570                  && ipa_fn_summaries->get (callee)->inlinable
1571                  && !bitmap_bit_p (updated_nodes, callee->get_uid ()))
1572           {
1573             if (can_inline_edge_p (e, false)
1574                 && want_inline_small_function_p (e, false)
1575                 && can_inline_edge_by_limits_p (e, false))
1576               {
1577                 gcc_checking_assert (check_inlinability || can_inline_edge_p (e, false));
1578                 gcc_checking_assert (check_inlinability || e->aux);
1579                 update_edge_key (heap, e);
1580               }
1581             else if (e->aux)
1582               {
1583                 report_inline_failed_reason (e);
1584                 heap->delete_node ((edge_heap_node_t *) e->aux);
1585                 e->aux = NULL;
1586               }
1587           }
1588         /* In case we redirected to unreachable node we only need to remove the
1589            fibheap entry.  */
1590         else if (e->aux)
1591           {
1592             heap->delete_node ((edge_heap_node_t *) e->aux);
1593             e->aux = NULL;
1594           }
1595         if (e->next_callee)
1596           e = e->next_callee;
1597         else
1598           {
1599             do
1600               {
1601                 if (e->caller == node)
1602                   return;
1603                 if (e->caller == update_since)
1604                   check_inlinability = false;
1605                 e = e->caller->callers;
1606               }
1607             while (!e->next_callee);
1608             e = e->next_callee;
1609           }
1610       }
1611 }
1612
1613 /* Enqueue all recursive calls from NODE into priority queue depending on
1614    how likely we want to recursively inline the call.  */
1615
1616 static void
1617 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1618                         edge_heap_t *heap)
1619 {
1620   struct cgraph_edge *e;
1621   enum availability avail;
1622
1623   for (e = where->callees; e; e = e->next_callee)
1624     if (e->callee == node
1625         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1626             && avail > AVAIL_INTERPOSABLE))
1627       heap->insert (-e->sreal_frequency (), e);
1628   for (e = where->callees; e; e = e->next_callee)
1629     if (!e->inline_failed)
1630       lookup_recursive_calls (node, e->callee, heap);
1631 }
1632
1633 /* Decide on recursive inlining: in the case function has recursive calls,
1634    inline until body size reaches given argument.  If any new indirect edges
1635    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1636    is NULL.  */
1637
1638 static bool
1639 recursive_inlining (struct cgraph_edge *edge,
1640                     vec<cgraph_edge *> *new_edges)
1641 {
1642   cgraph_node *to  = (edge->caller->inlined_to
1643                       ? edge->caller->inlined_to : edge->caller);
1644   int limit = opt_for_fn (to->decl,
1645                           param_max_inline_insns_recursive_auto);
1646   edge_heap_t heap (sreal::min ());
1647   struct cgraph_node *node;
1648   struct cgraph_edge *e;
1649   struct cgraph_node *master_clone = NULL, *next;
1650   int depth = 0;
1651   int n = 0;
1652
1653   node = edge->caller;
1654   if (node->inlined_to)
1655     node = node->inlined_to;
1656
1657   if (DECL_DECLARED_INLINE_P (node->decl))
1658     limit = opt_for_fn (to->decl, param_max_inline_insns_recursive);
1659
1660   /* Make sure that function is small enough to be considered for inlining.  */
1661   if (estimate_size_after_inlining (node, edge)  >= limit)
1662     return false;
1663   lookup_recursive_calls (node, node, &heap);
1664   if (heap.empty ())
1665     return false;
1666
1667   if (dump_file)
1668     fprintf (dump_file,
1669              "  Performing recursive inlining on %s\n", node->dump_name ());
1670
1671   /* Do the inlining and update list of recursive call during process.  */
1672   while (!heap.empty ())
1673     {
1674       struct cgraph_edge *curr = heap.extract_min ();
1675       struct cgraph_node *cnode, *dest = curr->callee;
1676
1677       if (!can_inline_edge_p (curr, true)
1678           || !can_inline_edge_by_limits_p (curr, true))
1679         continue;
1680
1681       /* MASTER_CLONE is produced in the case we already started modified
1682          the function. Be sure to redirect edge to the original body before
1683          estimating growths otherwise we will be seeing growths after inlining
1684          the already modified body.  */
1685       if (master_clone)
1686         {
1687           curr->redirect_callee (master_clone);
1688           if (edge_growth_cache != NULL)
1689             edge_growth_cache->remove (curr);
1690         }
1691
1692       if (estimate_size_after_inlining (node, curr) > limit)
1693         {
1694           curr->redirect_callee (dest);
1695           if (edge_growth_cache != NULL)
1696             edge_growth_cache->remove (curr);
1697           break;
1698         }
1699
1700       depth = 1;
1701       for (cnode = curr->caller;
1702            cnode->inlined_to; cnode = cnode->callers->caller)
1703         if (node->decl
1704             == curr->callee->ultimate_alias_target ()->decl)
1705           depth++;
1706
1707       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1708         {
1709           curr->redirect_callee (dest);
1710           if (edge_growth_cache != NULL)
1711             edge_growth_cache->remove (curr);
1712           continue;
1713         }
1714
1715       if (dump_file)
1716         {
1717           fprintf (dump_file,
1718                    "   Inlining call of depth %i", depth);
1719           if (node->count.nonzero_p () && curr->count.initialized_p ())
1720             {
1721               fprintf (dump_file, " called approx. %.2f times per call",
1722                        (double)curr->count.to_gcov_type ()
1723                        / node->count.to_gcov_type ());
1724             }
1725           fprintf (dump_file, "\n");
1726         }
1727       if (!master_clone)
1728         {
1729           /* We need original clone to copy around.  */
1730           master_clone = node->create_clone (node->decl, node->count,
1731             false, vNULL, true, NULL, NULL);
1732           for (e = master_clone->callees; e; e = e->next_callee)
1733             if (!e->inline_failed)
1734               clone_inlined_nodes (e, true, false, NULL);
1735           curr->redirect_callee (master_clone);
1736           if (edge_growth_cache != NULL)
1737             edge_growth_cache->remove (curr);
1738         }
1739
1740       inline_call (curr, false, new_edges, &overall_size, true);
1741       reset_node_cache (node);
1742       lookup_recursive_calls (node, curr->callee, &heap);
1743       n++;
1744     }
1745
1746   if (!heap.empty () && dump_file)
1747     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1748
1749   if (!master_clone)
1750     return false;
1751
1752   if (dump_enabled_p ())
1753     dump_printf_loc (MSG_NOTE, edge->call_stmt,
1754                      "\n   Inlined %i times, "
1755                      "body grown from size %i to %i, time %f to %f\n", n,
1756                      ipa_size_summaries->get (master_clone)->size,
1757                      ipa_size_summaries->get (node)->size,
1758                      ipa_fn_summaries->get (master_clone)->time.to_double (),
1759                      ipa_fn_summaries->get (node)->time.to_double ());
1760
1761   /* Remove master clone we used for inlining.  We rely that clones inlined
1762      into master clone gets queued just before master clone so we don't
1763      need recursion.  */
1764   for (node = symtab->first_function (); node != master_clone;
1765        node = next)
1766     {
1767       next = symtab->next_function (node);
1768       if (node->inlined_to == master_clone)
1769         node->remove ();
1770     }
1771   master_clone->remove ();
1772   return true;
1773 }
1774
1775
1776 /* Given whole compilation unit estimate of INSNS, compute how large we can
1777    allow the unit to grow.  */
1778
1779 static int64_t
1780 compute_max_insns (cgraph_node *node, int insns)
1781 {
1782   int max_insns = insns;
1783   if (max_insns < opt_for_fn (node->decl, param_large_unit_insns))
1784     max_insns = opt_for_fn (node->decl, param_large_unit_insns);
1785
1786   return ((int64_t) max_insns
1787           * (100 + opt_for_fn (node->decl, param_inline_unit_growth)) / 100);
1788 }
1789
1790
1791 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1792
1793 static void
1794 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> &new_edges)
1795 {
1796   while (new_edges.length () > 0)
1797     {
1798       struct cgraph_edge *edge = new_edges.pop ();
1799
1800       gcc_assert (!edge->aux);
1801       gcc_assert (edge->callee);
1802       if (edge->inline_failed
1803           && can_inline_edge_p (edge, true)
1804           && want_inline_small_function_p (edge, true)
1805           && can_inline_edge_by_limits_p (edge, true))
1806         edge->aux = heap->insert (edge_badness (edge, false), edge);
1807     }
1808 }
1809
1810 /* Remove EDGE from the fibheap.  */
1811
1812 static void
1813 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1814 {
1815   if (e->aux)
1816     {
1817       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1818       e->aux = NULL;
1819     }
1820 }
1821
1822 /* Return true if speculation of edge E seems useful.
1823    If ANTICIPATE_INLINING is true, be conservative and hope that E
1824    may get inlined.  */
1825
1826 bool
1827 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1828 {
1829   /* If we have already decided to inline the edge, it seems useful.  */
1830   if (!e->inline_failed)
1831     return true;
1832
1833   enum availability avail;
1834   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1835                                                                  e->caller);
1836
1837   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1838
1839   if (!e->maybe_hot_p ())
1840     return false;
1841
1842   /* See if IP optimizations found something potentially useful about the
1843      function.  For now we look only for CONST/PURE flags.  Almost everything
1844      else we propagate is useless.  */
1845   if (avail >= AVAIL_AVAILABLE)
1846     {
1847       int ecf_flags = flags_from_decl_or_type (target->decl);
1848       if (ecf_flags & ECF_CONST)
1849         {
1850           if (!(e->speculative_call_indirect_edge ()->indirect_info
1851                 ->ecf_flags & ECF_CONST))
1852             return true;
1853         }
1854       else if (ecf_flags & ECF_PURE)
1855         {
1856           if (!(e->speculative_call_indirect_edge ()->indirect_info
1857                 ->ecf_flags & ECF_PURE))
1858             return true;
1859         }
1860     }
1861   /* If we did not managed to inline the function nor redirect
1862      to an ipa-cp clone (that are seen by having local flag set),
1863      it is probably pointless to inline it unless hardware is missing
1864      indirect call predictor.  */
1865   if (!anticipate_inlining && !target->local)
1866     return false;
1867   /* For overwritable targets there is not much to do.  */
1868   if (!can_inline_edge_p (e, false)
1869       || !can_inline_edge_by_limits_p (e, false, true))
1870     return false;
1871   /* OK, speculation seems interesting.  */
1872   return true;
1873 }
1874
1875 /* We know that EDGE is not going to be inlined.
1876    See if we can remove speculation.  */
1877
1878 static void
1879 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1880 {
1881   if (edge->speculative && !speculation_useful_p (edge, false))
1882     {
1883       struct cgraph_node *node = edge->caller;
1884       struct cgraph_node *where = node->inlined_to
1885                                   ? node->inlined_to : node;
1886       auto_bitmap updated_nodes;
1887
1888       if (edge->count.ipa ().initialized_p ())
1889         spec_rem += edge->count.ipa ();
1890       cgraph_edge::resolve_speculation (edge);
1891       reset_edge_caches (where);
1892       ipa_update_overall_fn_summary (where);
1893       update_caller_keys (edge_heap, where,
1894                           updated_nodes, NULL);
1895       update_callee_keys (edge_heap, where, NULL,
1896                           updated_nodes);
1897     }
1898 }
1899
1900 /* Return true if NODE should be accounted for overall size estimate.
1901    Skip all nodes optimized for size so we can measure the growth of hot
1902    part of program no matter of the padding.  */
1903
1904 bool
1905 inline_account_function_p (struct cgraph_node *node)
1906 {
1907    return (!DECL_EXTERNAL (node->decl)
1908            && !opt_for_fn (node->decl, optimize_size)
1909            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1910 }
1911
1912 /* Count number of callers of NODE and store it into DATA (that
1913    points to int.  Worker for cgraph_for_node_and_aliases.  */
1914
1915 static bool
1916 sum_callers (struct cgraph_node *node, void *data)
1917 {
1918   struct cgraph_edge *e;
1919   int *num_calls = (int *)data;
1920
1921   for (e = node->callers; e; e = e->next_caller)
1922     (*num_calls)++;
1923   return false;
1924 }
1925
1926 /* We only propagate across edges with non-interposable callee.  */
1927
1928 inline bool
1929 ignore_edge_p (struct cgraph_edge *e)
1930 {
1931   enum availability avail;
1932   e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
1933   return (avail <= AVAIL_INTERPOSABLE);
1934 }
1935
1936 /* We use greedy algorithm for inlining of small functions:
1937    All inline candidates are put into prioritized heap ordered in
1938    increasing badness.
1939
1940    The inlining of small functions is bounded by unit growth parameters.  */
1941
1942 static void
1943 inline_small_functions (void)
1944 {
1945   struct cgraph_node *node;
1946   struct cgraph_edge *edge;
1947   edge_heap_t edge_heap (sreal::min ());
1948   auto_bitmap updated_nodes;
1949   int min_size;
1950   auto_vec<cgraph_edge *> new_indirect_edges;
1951   int initial_size = 0;
1952   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1953   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1954   new_indirect_edges.create (8);
1955
1956   edge_removal_hook_holder
1957     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1958
1959   /* Compute overall unit size and other global parameters used by badness
1960      metrics.  */
1961
1962   max_count = profile_count::uninitialized ();
1963   ipa_reduced_postorder (order, true, ignore_edge_p);
1964   free (order);
1965
1966   FOR_EACH_DEFINED_FUNCTION (node)
1967     if (!node->inlined_to)
1968       {
1969         if (!node->alias && node->analyzed
1970             && (node->has_gimple_body_p () || node->thunk)
1971             && opt_for_fn (node->decl, optimize))
1972           {
1973             class ipa_fn_summary *info = ipa_fn_summaries->get (node);
1974             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1975
1976             /* Do not account external functions, they will be optimized out
1977                if not inlined.  Also only count the non-cold portion of program.  */
1978             if (inline_account_function_p (node))
1979               initial_size += ipa_size_summaries->get (node)->size;
1980             info->growth = estimate_growth (node);
1981
1982             int num_calls = 0;
1983             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1984                                                true);
1985             if (num_calls == 1)
1986               info->single_caller = true;
1987             if (dfs && dfs->next_cycle)
1988               {
1989                 struct cgraph_node *n2;
1990                 int id = dfs->scc_no + 1;
1991                 for (n2 = node; n2;
1992                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1993                   if (opt_for_fn (n2->decl, optimize))
1994                     {
1995                       ipa_fn_summary *info2 = ipa_fn_summaries->get
1996                          (n2->inlined_to ? n2->inlined_to : n2);
1997                       if (info2->scc_no)
1998                         break;
1999                       info2->scc_no = id;
2000                     }
2001               }
2002           }
2003
2004         for (edge = node->callers; edge; edge = edge->next_caller)
2005           max_count = max_count.max (edge->count.ipa ());
2006       }
2007   ipa_free_postorder_info ();
2008   initialize_growth_caches ();
2009
2010   if (dump_file)
2011     fprintf (dump_file,
2012              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
2013              initial_size);
2014
2015   overall_size = initial_size;
2016   min_size = overall_size;
2017
2018   /* Populate the heap with all edges we might inline.  */
2019
2020   FOR_EACH_DEFINED_FUNCTION (node)
2021     {
2022       bool update = false;
2023       struct cgraph_edge *next = NULL;
2024       bool has_speculative = false;
2025
2026       if (!opt_for_fn (node->decl, optimize))
2027         continue;
2028
2029       if (dump_file)
2030         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
2031
2032       for (edge = node->callees; edge; edge = edge->next_callee)
2033         {
2034           if (edge->inline_failed
2035               && !edge->aux
2036               && can_inline_edge_p (edge, true)
2037               && want_inline_small_function_p (edge, true)
2038               && can_inline_edge_by_limits_p (edge, true)
2039               && edge->inline_failed)
2040             {
2041               gcc_assert (!edge->aux);
2042               update_edge_key (&edge_heap, edge);
2043             }
2044           if (edge->speculative)
2045             has_speculative = true;
2046         }
2047       if (has_speculative)
2048         for (edge = node->callees; edge; edge = next)
2049           {
2050             next = edge->next_callee;
2051             if (edge->speculative
2052                 && !speculation_useful_p (edge, edge->aux != NULL))
2053               {
2054                 cgraph_edge::resolve_speculation (edge);
2055                 update = true;
2056               }
2057           }
2058       if (update)
2059         {
2060           struct cgraph_node *where = node->inlined_to
2061                                       ? node->inlined_to : node;
2062           ipa_update_overall_fn_summary (where);
2063           reset_edge_caches (where);
2064           update_caller_keys (&edge_heap, where,
2065                               updated_nodes, NULL);
2066           update_callee_keys (&edge_heap, where, NULL,
2067                               updated_nodes);
2068           bitmap_clear (updated_nodes);
2069         }
2070     }
2071
2072   gcc_assert (in_lto_p
2073               || !(max_count > 0)
2074               || (profile_info && flag_branch_probabilities));
2075
2076   while (!edge_heap.empty ())
2077     {
2078       int old_size = overall_size;
2079       struct cgraph_node *where, *callee;
2080       sreal badness = edge_heap.min_key ();
2081       sreal current_badness;
2082       int growth;
2083
2084       edge = edge_heap.extract_min ();
2085       gcc_assert (edge->aux);
2086       edge->aux = NULL;
2087       if (!edge->inline_failed || !edge->callee->analyzed)
2088         continue;
2089
2090       /* Be sure that caches are maintained consistent.
2091          This check is affected by scaling roundoff errors when compiling for
2092          IPA this we skip it in that case.  */
2093       if (flag_checking && !edge->callee->count.ipa_p ()
2094           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
2095         {
2096           sreal cached_badness = edge_badness (edge, false);
2097
2098           int old_size_est = estimate_edge_size (edge);
2099           sreal old_time_est = estimate_edge_time (edge);
2100           int old_hints_est = estimate_edge_hints (edge);
2101
2102           if (edge_growth_cache != NULL)
2103             edge_growth_cache->remove (edge);
2104           reset_node_cache (edge->caller->inlined_to
2105                             ? edge->caller->inlined_to
2106                             : edge->caller);
2107           gcc_assert (old_size_est == estimate_edge_size (edge));
2108           gcc_assert (old_time_est == estimate_edge_time (edge));
2109           /* FIXME:
2110
2111              gcc_assert (old_hints_est == estimate_edge_hints (edge));
2112
2113              fails with profile feedback because some hints depends on
2114              maybe_hot_edge_p predicate and because callee gets inlined to other
2115              calls, the edge may become cold.
2116              This ought to be fixed by computing relative probabilities
2117              for given invocation but that will be better done once whole
2118              code is converted to sreals.  Disable for now and revert to "wrong"
2119              value so enable/disable checking paths agree.  */
2120           edge_growth_cache->get (edge)->hints = old_hints_est + 1;
2121
2122           /* When updating the edge costs, we only decrease badness in the keys.
2123              Increases of badness are handled lazily; when we see key with out
2124              of date value on it, we re-insert it now.  */
2125           current_badness = edge_badness (edge, false);
2126           gcc_assert (cached_badness == current_badness);
2127           gcc_assert (current_badness >= badness);
2128         }
2129       else
2130         current_badness = edge_badness (edge, false);
2131       if (current_badness != badness)
2132         {
2133           if (edge_heap.min () && current_badness > edge_heap.min_key ())
2134             {
2135               edge->aux = edge_heap.insert (current_badness, edge);
2136               continue;
2137             }
2138           else
2139             badness = current_badness;
2140         }
2141
2142       if (!can_inline_edge_p (edge, true)
2143           || !can_inline_edge_by_limits_p (edge, true))
2144         {
2145           resolve_noninline_speculation (&edge_heap, edge);
2146           continue;
2147         }
2148
2149       callee = edge->callee->ultimate_alias_target ();
2150       growth = estimate_edge_growth (edge);
2151       if (dump_file)
2152         {
2153           fprintf (dump_file,
2154                    "\nConsidering %s with %i size\n",
2155                    callee->dump_name (),
2156                    ipa_size_summaries->get (callee)->size);
2157           fprintf (dump_file,
2158                    " to be inlined into %s in %s:%i\n"
2159                    " Estimated badness is %f, frequency %.2f.\n",
2160                    edge->caller->dump_name (),
2161                    edge->call_stmt
2162                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
2163                                                         edge->call_stmt))
2164                        > BUILTINS_LOCATION)
2165                    ? gimple_filename ((const gimple *) edge->call_stmt)
2166                    : "unknown",
2167                    edge->call_stmt
2168                    ? gimple_lineno ((const gimple *) edge->call_stmt)
2169                    : -1,
2170                    badness.to_double (),
2171                    edge->sreal_frequency ().to_double ());
2172           if (edge->count.ipa ().initialized_p ())
2173             {
2174               fprintf (dump_file, " Called ");
2175               edge->count.ipa ().dump (dump_file);
2176               fprintf (dump_file, " times\n");
2177             }
2178           if (dump_flags & TDF_DETAILS)
2179             edge_badness (edge, true);
2180         }
2181
2182       where = edge->caller;
2183
2184       if (overall_size + growth > compute_max_insns (where, min_size)
2185           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2186         {
2187           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
2188           report_inline_failed_reason (edge);
2189           resolve_noninline_speculation (&edge_heap, edge);
2190           continue;
2191         }
2192
2193       if (!want_inline_small_function_p (edge, true))
2194         {
2195           resolve_noninline_speculation (&edge_heap, edge);
2196           continue;
2197         }
2198
2199       profile_count old_count = callee->count;
2200
2201       /* Heuristics for inlining small functions work poorly for
2202          recursive calls where we do effects similar to loop unrolling.
2203          When inlining such edge seems profitable, leave decision on
2204          specific inliner.  */
2205       if (edge->recursive_p ())
2206         {
2207           if (where->inlined_to)
2208             where = where->inlined_to;
2209           if (!recursive_inlining (edge,
2210                                    opt_for_fn (edge->caller->decl,
2211                                                flag_indirect_inlining)
2212                                    ? &new_indirect_edges : NULL))
2213             {
2214               edge->inline_failed = CIF_RECURSIVE_INLINING;
2215               resolve_noninline_speculation (&edge_heap, edge);
2216               continue;
2217             }
2218           reset_edge_caches (where);
2219           /* Recursive inliner inlines all recursive calls of the function
2220              at once. Consequently we need to update all callee keys.  */
2221           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2222             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2223           update_callee_keys (&edge_heap, where, where, updated_nodes);
2224           bitmap_clear (updated_nodes);
2225         }
2226       else
2227         {
2228           struct cgraph_node *outer_node = NULL;
2229           int depth = 0;
2230
2231           /* Consider the case where self recursive function A is inlined
2232              into B.  This is desired optimization in some cases, since it
2233              leads to effect similar of loop peeling and we might completely
2234              optimize out the recursive call.  However we must be extra
2235              selective.  */
2236
2237           where = edge->caller;
2238           while (where->inlined_to)
2239             {
2240               if (where->decl == callee->decl)
2241                 outer_node = where, depth++;
2242               where = where->callers->caller;
2243             }
2244           if (outer_node
2245               && !want_inline_self_recursive_call_p (edge, outer_node,
2246                                                      true, depth))
2247             {
2248               edge->inline_failed
2249                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2250                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2251               resolve_noninline_speculation (&edge_heap, edge);
2252               continue;
2253             }
2254           else if (depth && dump_file)
2255             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2256
2257           gcc_checking_assert (!callee->inlined_to);
2258
2259           int old_size = ipa_size_summaries->get (where)->size;
2260           sreal old_time = ipa_fn_summaries->get (where)->time;
2261
2262           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2263           reset_edge_caches (edge->callee);
2264           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2265
2266           /* If caller's size and time increased we do not need to update
2267              all edges because badness is not going to decrease.  */
2268           if (old_size <= ipa_size_summaries->get (where)->size
2269               && old_time <= ipa_fn_summaries->get (where)->time
2270               /* Wrapper penalty may be non-monotonous in this respect.
2271                  Fortunately it only affects small functions.  */
2272               && !wrapper_heuristics_may_apply (where, old_size))
2273             update_callee_keys (&edge_heap, edge->callee, edge->callee,
2274                                 updated_nodes);
2275           else
2276             update_callee_keys (&edge_heap, where,
2277                                 edge->callee,
2278                                 updated_nodes);
2279         }
2280       where = edge->caller;
2281       if (where->inlined_to)
2282         where = where->inlined_to;
2283
2284       /* Our profitability metric can depend on local properties
2285          such as number of inlinable calls and size of the function body.
2286          After inlining these properties might change for the function we
2287          inlined into (since it's body size changed) and for the functions
2288          called by function we inlined (since number of it inlinable callers
2289          might change).  */
2290       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2291       /* Offline copy count has possibly changed, recompute if profile is
2292          available.  */
2293       struct cgraph_node *n
2294               = cgraph_node::get (edge->callee->decl)->ultimate_alias_target ();
2295       if (n != edge->callee && n->analyzed && !(n->count == old_count)
2296           && n->count.ipa_p ())
2297         update_callee_keys (&edge_heap, n, NULL, updated_nodes);
2298       bitmap_clear (updated_nodes);
2299
2300       if (dump_enabled_p ())
2301         {
2302           ipa_fn_summary *s = ipa_fn_summaries->get (where);
2303
2304           /* dump_printf can't handle %+i.  */
2305           char buf_net_change[100];
2306           snprintf (buf_net_change, sizeof buf_net_change, "%+i",
2307                     overall_size - old_size);
2308
2309           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, edge->call_stmt,
2310                            " Inlined %C into %C which now has time %f and "
2311                            "size %i, net change of %s%s.\n",
2312                            edge->callee, edge->caller,
2313                            s->time.to_double (),
2314                            ipa_size_summaries->get (edge->caller)->size,
2315                            buf_net_change,
2316                            cross_module_call_p (edge) ? " (cross module)":"");
2317         }
2318       if (min_size > overall_size)
2319         {
2320           min_size = overall_size;
2321
2322           if (dump_file)
2323             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2324         }
2325     }
2326
2327   free_growth_caches ();
2328   if (dump_enabled_p ())
2329     dump_printf (MSG_NOTE,
2330                  "Unit growth for small function inlining: %i->%i (%i%%)\n",
2331                  initial_size, overall_size,
2332                  initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2333   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2334 }
2335
2336 /* Flatten NODE.  Performed both during early inlining and
2337    at IPA inlining time.  */
2338
2339 static void
2340 flatten_function (struct cgraph_node *node, bool early, bool update)
2341 {
2342   struct cgraph_edge *e;
2343
2344   /* We shouldn't be called recursively when we are being processed.  */
2345   gcc_assert (node->aux == NULL);
2346
2347   node->aux = (void *) node;
2348
2349   for (e = node->callees; e; e = e->next_callee)
2350     {
2351       struct cgraph_node *orig_callee;
2352       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2353
2354       /* We've hit cycle?  It is time to give up.  */
2355       if (callee->aux)
2356         {
2357           if (dump_enabled_p ())
2358             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2359                              "Not inlining %C into %C to avoid cycle.\n",
2360                              callee, e->caller);
2361           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2362             e->inline_failed = CIF_RECURSIVE_INLINING;
2363           continue;
2364         }
2365
2366       /* When the edge is already inlined, we just need to recurse into
2367          it in order to fully flatten the leaves.  */
2368       if (!e->inline_failed)
2369         {
2370           flatten_function (callee, early, false);
2371           continue;
2372         }
2373
2374       /* Flatten attribute needs to be processed during late inlining. For
2375          extra code quality we however do flattening during early optimization,
2376          too.  */
2377       if (!early
2378           ? !can_inline_edge_p (e, true)
2379             && !can_inline_edge_by_limits_p (e, true)
2380           : !can_early_inline_edge_p (e))
2381         continue;
2382
2383       if (e->recursive_p ())
2384         {
2385           if (dump_enabled_p ())
2386             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2387                              "Not inlining: recursive call.\n");
2388           continue;
2389         }
2390
2391       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2392           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2393         {
2394           if (dump_enabled_p ())
2395             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2396                              "Not inlining: SSA form does not match.\n");
2397           continue;
2398         }
2399
2400       /* Inline the edge and flatten the inline clone.  Avoid
2401          recursing through the original node if the node was cloned.  */
2402       if (dump_enabled_p ())
2403         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2404                          " Inlining %C into %C.\n",
2405                          callee, e->caller);
2406       orig_callee = callee;
2407       inline_call (e, true, NULL, NULL, false);
2408       if (e->callee != orig_callee)
2409         orig_callee->aux = (void *) node;
2410       flatten_function (e->callee, early, false);
2411       if (e->callee != orig_callee)
2412         orig_callee->aux = NULL;
2413     }
2414
2415   node->aux = NULL;
2416   cgraph_node *where = node->inlined_to ? node->inlined_to : node;
2417   if (update && opt_for_fn (where->decl, optimize))
2418     ipa_update_overall_fn_summary (where);
2419 }
2420
2421 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2422    DATA points to number of calls originally found so we avoid infinite
2423    recursion.  */
2424
2425 static bool
2426 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2427                          hash_set<cgraph_node *> *callers)
2428 {
2429   int *num_calls = (int *)data;
2430   bool callee_removed = false;
2431
2432   while (node->callers && !node->inlined_to)
2433     {
2434       struct cgraph_node *caller = node->callers->caller;
2435
2436       if (!can_inline_edge_p (node->callers, true)
2437           || !can_inline_edge_by_limits_p (node->callers, true)
2438           || node->callers->recursive_p ())
2439         {
2440           if (dump_file)
2441             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2442           *num_calls = 0;
2443           return false;
2444         }
2445
2446       if (dump_file)
2447         {
2448           cgraph_node *ultimate = node->ultimate_alias_target ();
2449           fprintf (dump_file,
2450                    "\nInlining %s size %i.\n",
2451                    ultimate->dump_name (),
2452                    ipa_size_summaries->get (ultimate)->size);
2453           fprintf (dump_file,
2454                    " Called once from %s %i insns.\n",
2455                    node->callers->caller->dump_name (),
2456                    ipa_size_summaries->get (node->callers->caller)->size);
2457         }
2458
2459       /* Remember which callers we inlined to, delaying updating the
2460          overall summary.  */
2461       callers->add (node->callers->caller);
2462       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2463       if (dump_file)
2464         fprintf (dump_file,
2465                  " Inlined into %s which now has %i size\n",
2466                  caller->dump_name (),
2467                  ipa_size_summaries->get (caller)->size);
2468       if (!(*num_calls)--)
2469         {
2470           if (dump_file)
2471             fprintf (dump_file, "New calls found; giving up.\n");
2472           return callee_removed;
2473         }
2474       if (callee_removed)
2475         return true;
2476     }
2477   return false;
2478 }
2479
2480 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2481    update.  */
2482
2483 static bool
2484 inline_to_all_callers (struct cgraph_node *node, void *data)
2485 {
2486   hash_set<cgraph_node *> callers;
2487   bool res = inline_to_all_callers_1 (node, data, &callers);
2488   /* Perform the delayed update of the overall summary of all callers
2489      processed.  This avoids quadratic behavior in the cases where
2490      we have a lot of calls to the same function.  */
2491   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2492        i != callers.end (); ++i)
2493     ipa_update_overall_fn_summary ((*i)->inlined_to ? (*i)->inlined_to : *i);
2494   return res;
2495 }
2496
2497 /* Output overall time estimate.  */
2498 static void
2499 dump_overall_stats (void)
2500 {
2501   sreal sum_weighted = 0, sum = 0;
2502   struct cgraph_node *node;
2503
2504   FOR_EACH_DEFINED_FUNCTION (node)
2505     if (!node->inlined_to
2506         && !node->alias)
2507       {
2508         ipa_fn_summary *s = ipa_fn_summaries->get (node);
2509         if (s != NULL)
2510           {
2511           sum += s->time;
2512           if (node->count.ipa ().initialized_p ())
2513             sum_weighted += s->time * node->count.ipa ().to_gcov_type ();
2514           }
2515       }
2516   fprintf (dump_file, "Overall time estimate: "
2517            "%f weighted by profile: "
2518            "%f\n", sum.to_double (), sum_weighted.to_double ());
2519 }
2520
2521 /* Output some useful stats about inlining.  */
2522
2523 static void
2524 dump_inline_stats (void)
2525 {
2526   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2527   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2528   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2529   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2530   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2531   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2532   int64_t reason[CIF_N_REASONS][2];
2533   sreal reason_freq[CIF_N_REASONS];
2534   int i;
2535   struct cgraph_node *node;
2536
2537   memset (reason, 0, sizeof (reason));
2538   for (i=0; i < CIF_N_REASONS; i++)
2539     reason_freq[i] = 0;
2540   FOR_EACH_DEFINED_FUNCTION (node)
2541   {
2542     struct cgraph_edge *e;
2543     for (e = node->callees; e; e = e->next_callee)
2544       {
2545         if (e->inline_failed)
2546           {
2547             if (e->count.ipa ().initialized_p ())
2548               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2549             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2550             reason[(int) e->inline_failed][1] ++;
2551             if (DECL_VIRTUAL_P (e->callee->decl)
2552                 && e->count.ipa ().initialized_p ())
2553               {
2554                 if (e->indirect_inlining_edge)
2555                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2556                 else
2557                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2558               }
2559             else if (e->count.ipa ().initialized_p ())
2560               {
2561                 if (e->indirect_inlining_edge)
2562                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2563                 else
2564                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2565               }
2566           }
2567         else if (e->count.ipa ().initialized_p ())
2568           {
2569             if (e->speculative)
2570               {
2571                 if (DECL_VIRTUAL_P (e->callee->decl))
2572                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2573                 else
2574                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2575               }
2576             else if (DECL_VIRTUAL_P (e->callee->decl))
2577               {
2578                 if (e->indirect_inlining_edge)
2579                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2580                 else
2581                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2582               }
2583             else
2584               {
2585                 if (e->indirect_inlining_edge)
2586                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2587                 else
2588                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2589               }
2590           }
2591       }
2592     for (e = node->indirect_calls; e; e = e->next_callee)
2593       if (e->indirect_info->polymorphic
2594           & e->count.ipa ().initialized_p ())
2595         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2596       else if (e->count.ipa ().initialized_p ())
2597         indirect_cnt += e->count.ipa ().to_gcov_type ();
2598   }
2599   if (max_count.initialized_p ())
2600     {
2601       fprintf (dump_file,
2602                "Inlined %" PRId64 " + speculative "
2603                "%" PRId64 " + speculative polymorphic "
2604                "%" PRId64 " + previously indirect "
2605                "%" PRId64 " + virtual "
2606                "%" PRId64 " + virtual and previously indirect "
2607                "%" PRId64 "\n" "Not inlined "
2608                "%" PRId64 " + previously indirect "
2609                "%" PRId64 " + virtual "
2610                "%" PRId64 " + virtual and previously indirect "
2611                "%" PRId64 " + still indirect "
2612                "%" PRId64 " + still indirect polymorphic "
2613                "%" PRId64 "\n", inlined_cnt,
2614                inlined_speculative, inlined_speculative_ply,
2615                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2616                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2617                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2618       fprintf (dump_file, "Removed speculations ");
2619       spec_rem.dump (dump_file);
2620       fprintf (dump_file, "\n");
2621     }
2622   dump_overall_stats ();
2623   fprintf (dump_file, "\nWhy inlining failed?\n");
2624   for (i = 0; i < CIF_N_REASONS; i++)
2625     if (reason[i][1])
2626       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2627                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2628                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2629 }
2630
2631 /* Called when node is removed.  */
2632
2633 static void
2634 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2635 {
2636   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2637     return;
2638
2639   hash_set<struct cgraph_node *> *removed
2640     = (hash_set<struct cgraph_node *> *) data;
2641   removed->add (node);
2642 }
2643
2644 /* Decide on the inlining.  We do so in the topological order to avoid
2645    expenses on updating data structures.  */
2646
2647 static unsigned int
2648 ipa_inline (void)
2649 {
2650   struct cgraph_node *node;
2651   int nnodes;
2652   struct cgraph_node **order;
2653   int i, j;
2654   int cold;
2655   bool remove_functions = false;
2656
2657   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2658
2659   if (dump_file)
2660     ipa_dump_fn_summaries (dump_file);
2661
2662   nnodes = ipa_reverse_postorder (order);
2663   spec_rem = profile_count::zero ();
2664
2665   FOR_EACH_FUNCTION (node)
2666     {
2667       node->aux = 0;
2668
2669       /* Recompute the default reasons for inlining because they may have
2670          changed during merging.  */
2671       if (in_lto_p)
2672         {
2673           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2674             {
2675               gcc_assert (e->inline_failed);
2676               initialize_inline_failed (e);
2677             }
2678           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2679             initialize_inline_failed (e);
2680         }
2681     }
2682
2683   if (dump_file)
2684     fprintf (dump_file, "\nFlattening functions:\n");
2685
2686   /* First shrink order array, so that it only contains nodes with
2687      flatten attribute.  */
2688   for (i = nnodes - 1, j = i; i >= 0; i--)
2689     {
2690       node = order[i];
2691       if (node->definition
2692           /* Do not try to flatten aliases.  These may happen for example when
2693              creating local aliases.  */
2694           && !node->alias
2695           && lookup_attribute ("flatten",
2696                                DECL_ATTRIBUTES (node->decl)) != NULL)
2697         order[j--] = order[i];
2698     }
2699
2700   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2701      nodes with flatten attribute.  If there is more than one such
2702      node, we need to register a node removal hook, as flatten_function
2703      could remove other nodes with flatten attribute.  See PR82801.  */
2704   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2705   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2706   if (j < nnodes - 2)
2707     {
2708       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2709       node_removal_hook_holder
2710         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2711                                            flatten_removed_nodes);
2712     }
2713
2714   /* In the first pass handle functions to be flattened.  Do this with
2715      a priority so none of our later choices will make this impossible.  */
2716   for (i = nnodes - 1; i > j; i--)
2717     {
2718       node = order[i];
2719       if (flatten_removed_nodes
2720           && flatten_removed_nodes->contains (node))
2721         continue;
2722
2723       /* Handle nodes to be flattened.
2724          Ideally when processing callees we stop inlining at the
2725          entry of cycles, possibly cloning that entry point and
2726          try to flatten itself turning it into a self-recursive
2727          function.  */
2728       if (dump_file)
2729         fprintf (dump_file, "Flattening %s\n", node->dump_name ());
2730       flatten_function (node, false, true);
2731     }
2732
2733   if (j < nnodes - 2)
2734     {
2735       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2736       delete flatten_removed_nodes;
2737     }
2738   free (order);
2739
2740   if (dump_file)
2741     dump_overall_stats ();
2742
2743   inline_small_functions ();
2744
2745   gcc_assert (symtab->state == IPA_SSA);
2746   symtab->state = IPA_SSA_AFTER_INLINING;
2747   /* Do first after-inlining removal.  We want to remove all "stale" extern
2748      inline functions and virtual functions so we really know what is called
2749      once.  */
2750   symtab->remove_unreachable_nodes (dump_file);
2751
2752   /* Inline functions with a property that after inlining into all callers the
2753      code size will shrink because the out-of-line copy is eliminated.
2754      We do this regardless on the callee size as long as function growth limits
2755      are met.  */
2756   if (dump_file)
2757     fprintf (dump_file,
2758              "\nDeciding on functions to be inlined into all callers and "
2759              "removing useless speculations:\n");
2760
2761   /* Inlining one function called once has good chance of preventing
2762      inlining other function into the same callee.  Ideally we should
2763      work in priority order, but probably inlining hot functions first
2764      is good cut without the extra pain of maintaining the queue.
2765
2766      ??? this is not really fitting the bill perfectly: inlining function
2767      into callee often leads to better optimization of callee due to
2768      increased context for optimization.
2769      For example if main() function calls a function that outputs help
2770      and then function that does the main optimization, we should inline
2771      the second with priority even if both calls are cold by themselves.
2772
2773      We probably want to implement new predicate replacing our use of
2774      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2775      to be hot.  */
2776   for (cold = 0; cold <= 1; cold ++)
2777     {
2778       FOR_EACH_DEFINED_FUNCTION (node)
2779         {
2780           struct cgraph_edge *edge, *next;
2781           bool update=false;
2782
2783           if (!opt_for_fn (node->decl, optimize)
2784               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2785             continue;
2786
2787           for (edge = node->callees; edge; edge = next)
2788             {
2789               next = edge->next_callee;
2790               if (edge->speculative && !speculation_useful_p (edge, false))
2791                 {
2792                   if (edge->count.ipa ().initialized_p ())
2793                     spec_rem += edge->count.ipa ();
2794                   cgraph_edge::resolve_speculation (edge);
2795                   update = true;
2796                   remove_functions = true;
2797                 }
2798             }
2799           if (update)
2800             {
2801               struct cgraph_node *where = node->inlined_to
2802                                           ? node->inlined_to : node;
2803               reset_edge_caches (where);
2804               ipa_update_overall_fn_summary (where);
2805             }
2806           if (want_inline_function_to_all_callers_p (node, cold))
2807             {
2808               int num_calls = 0;
2809               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2810                                                  true);
2811               while (node->call_for_symbol_and_aliases
2812                        (inline_to_all_callers, &num_calls, true))
2813                 ;
2814               remove_functions = true;
2815             }
2816         }
2817     }
2818
2819   if (dump_enabled_p ())
2820     dump_printf (MSG_NOTE,
2821                  "\nInlined %i calls, eliminated %i functions\n\n",
2822                  ncalls_inlined, nfunctions_inlined);
2823   if (dump_file)
2824     dump_inline_stats ();
2825
2826   if (dump_file)
2827     ipa_dump_fn_summaries (dump_file);
2828   return remove_functions ? TODO_remove_functions : 0;
2829 }
2830
2831 /* Inline always-inline function calls in NODE.  */
2832
2833 static bool
2834 inline_always_inline_functions (struct cgraph_node *node)
2835 {
2836   struct cgraph_edge *e;
2837   bool inlined = false;
2838
2839   for (e = node->callees; e; e = e->next_callee)
2840     {
2841       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2842       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2843         continue;
2844
2845       if (e->recursive_p ())
2846         {
2847           if (dump_enabled_p ())
2848             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2849                              "  Not inlining recursive call to %C.\n",
2850                              e->callee);
2851           e->inline_failed = CIF_RECURSIVE_INLINING;
2852           continue;
2853         }
2854
2855       if (!can_early_inline_edge_p (e))
2856         {
2857           /* Set inlined to true if the callee is marked "always_inline" but
2858              is not inlinable.  This will allow flagging an error later in
2859              expand_call_inline in tree-inline.c.  */
2860           if (lookup_attribute ("always_inline",
2861                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2862             inlined = true;
2863           continue;
2864         }
2865
2866       if (dump_enabled_p ())
2867         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2868                          "  Inlining %C into %C (always_inline).\n",
2869                          e->callee, e->caller);
2870       inline_call (e, true, NULL, NULL, false);
2871       inlined = true;
2872     }
2873   if (inlined)
2874     ipa_update_overall_fn_summary (node);
2875
2876   return inlined;
2877 }
2878
2879 /* Decide on the inlining.  We do so in the topological order to avoid
2880    expenses on updating data structures.  */
2881
2882 static bool
2883 early_inline_small_functions (struct cgraph_node *node)
2884 {
2885   struct cgraph_edge *e;
2886   bool inlined = false;
2887
2888   for (e = node->callees; e; e = e->next_callee)
2889     {
2890       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2891
2892       /* We can encounter not-yet-analyzed function during
2893          early inlining on callgraphs with strongly
2894          connected components.  */
2895       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
2896       if (s == NULL || !s->inlinable || !e->inline_failed)
2897         continue;
2898
2899       /* Do not consider functions not declared inline.  */
2900       if (!DECL_DECLARED_INLINE_P (callee->decl)
2901           && !opt_for_fn (node->decl, flag_inline_small_functions)
2902           && !opt_for_fn (node->decl, flag_inline_functions))
2903         continue;
2904
2905       if (dump_enabled_p ())
2906         dump_printf_loc (MSG_NOTE, e->call_stmt,
2907                          "Considering inline candidate %C.\n",
2908                          callee);
2909
2910       if (!can_early_inline_edge_p (e))
2911         continue;
2912
2913       if (e->recursive_p ())
2914         {
2915           if (dump_enabled_p ())
2916             dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt,
2917                              "  Not inlining: recursive call.\n");
2918           continue;
2919         }
2920
2921       if (!want_early_inline_function_p (e))
2922         continue;
2923
2924       if (dump_enabled_p ())
2925         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, e->call_stmt,
2926                          " Inlining %C into %C.\n",
2927                          callee, e->caller);
2928       inline_call (e, true, NULL, NULL, false);
2929       inlined = true;
2930     }
2931
2932   if (inlined)
2933     ipa_update_overall_fn_summary (node);
2934
2935   return inlined;
2936 }
2937
2938 unsigned int
2939 early_inliner (function *fun)
2940 {
2941   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2942   struct cgraph_edge *edge;
2943   unsigned int todo = 0;
2944   int iterations = 0;
2945   bool inlined = false;
2946
2947   if (seen_error ())
2948     return 0;
2949
2950   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2951      happens when some pass decides to construct new function and
2952      cgraph_add_new_function calls lowering passes and early optimization on
2953      it.  This may confuse ourself when early inliner decide to inline call to
2954      function clone, because function clones don't have parameter list in
2955      ipa-prop matching their signature.  */
2956   if (ipa_node_params_sum)
2957     return 0;
2958
2959   if (flag_checking)
2960     node->verify ();
2961   node->remove_all_references ();
2962
2963   /* Even when not optimizing or not inlining inline always-inline
2964      functions.  */
2965   inlined = inline_always_inline_functions (node);
2966
2967   if (!optimize
2968       || flag_no_inline
2969       || !flag_early_inlining
2970       /* Never inline regular functions into always-inline functions
2971          during incremental inlining.  This sucks as functions calling
2972          always inline functions will get less optimized, but at the
2973          same time inlining of functions calling always inline
2974          function into an always inline function might introduce
2975          cycles of edges to be always inlined in the callgraph.
2976
2977          We might want to be smarter and just avoid this type of inlining.  */
2978       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2979           && lookup_attribute ("always_inline",
2980                                DECL_ATTRIBUTES (node->decl))))
2981     ;
2982   else if (lookup_attribute ("flatten",
2983                              DECL_ATTRIBUTES (node->decl)) != NULL)
2984     {
2985       /* When the function is marked to be flattened, recursively inline
2986          all calls in it.  */
2987       if (dump_enabled_p ())
2988         dump_printf (MSG_OPTIMIZED_LOCATIONS,
2989                      "Flattening %C\n", node);
2990       flatten_function (node, true, true);
2991       inlined = true;
2992     }
2993   else
2994     {
2995       /* If some always_inline functions was inlined, apply the changes.
2996          This way we will not account always inline into growth limits and
2997          moreover we will inline calls from always inlines that we skipped
2998          previously because of conditional above.  */
2999       if (inlined)
3000         {
3001           timevar_push (TV_INTEGRATION);
3002           todo |= optimize_inline_calls (current_function_decl);
3003           /* optimize_inline_calls call above might have introduced new
3004              statements that don't have inline parameters computed.  */
3005           for (edge = node->callees; edge; edge = edge->next_callee)
3006             {
3007               /* We can enounter not-yet-analyzed function during
3008                  early inlining on callgraphs with strongly
3009                  connected components.  */
3010               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3011               es->call_stmt_size
3012                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3013               es->call_stmt_time
3014                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3015             }
3016           ipa_update_overall_fn_summary (node);
3017           inlined = false;
3018           timevar_pop (TV_INTEGRATION);
3019         }
3020       /* We iterate incremental inlining to get trivial cases of indirect
3021          inlining.  */
3022       while (iterations < opt_for_fn (node->decl,
3023                                       param_early_inliner_max_iterations)
3024              && early_inline_small_functions (node))
3025         {
3026           timevar_push (TV_INTEGRATION);
3027           todo |= optimize_inline_calls (current_function_decl);
3028
3029           /* Technically we ought to recompute inline parameters so the new
3030              iteration of early inliner works as expected.  We however have
3031              values approximately right and thus we only need to update edge
3032              info that might be cleared out for newly discovered edges.  */
3033           for (edge = node->callees; edge; edge = edge->next_callee)
3034             {
3035               /* We have no summary for new bound store calls yet.  */
3036               ipa_call_summary *es = ipa_call_summaries->get_create (edge);
3037               es->call_stmt_size
3038                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
3039               es->call_stmt_time
3040                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
3041             }
3042           if (iterations < opt_for_fn (node->decl,
3043                                        param_early_inliner_max_iterations) - 1)
3044             ipa_update_overall_fn_summary (node);
3045           timevar_pop (TV_INTEGRATION);
3046           iterations++;
3047           inlined = false;
3048         }
3049       if (dump_file)
3050         fprintf (dump_file, "Iterations: %i\n", iterations);
3051     }
3052
3053   if (inlined)
3054     {
3055       timevar_push (TV_INTEGRATION);
3056       todo |= optimize_inline_calls (current_function_decl);
3057       timevar_pop (TV_INTEGRATION);
3058     }
3059
3060   fun->always_inline_functions_inlined = true;
3061
3062   return todo;
3063 }
3064
3065 /* Do inlining of small functions.  Doing so early helps profiling and other
3066    passes to be somewhat more effective and avoids some code duplication in
3067    later real inlining pass for testcases with very many function calls.  */
3068
3069 namespace {
3070
3071 const pass_data pass_data_early_inline =
3072 {
3073   GIMPLE_PASS, /* type */
3074   "einline", /* name */
3075   OPTGROUP_INLINE, /* optinfo_flags */
3076   TV_EARLY_INLINING, /* tv_id */
3077   PROP_ssa, /* properties_required */
3078   0, /* properties_provided */
3079   0, /* properties_destroyed */
3080   0, /* todo_flags_start */
3081   0, /* todo_flags_finish */
3082 };
3083
3084 class pass_early_inline : public gimple_opt_pass
3085 {
3086 public:
3087   pass_early_inline (gcc::context *ctxt)
3088     : gimple_opt_pass (pass_data_early_inline, ctxt)
3089   {}
3090
3091   /* opt_pass methods: */
3092   virtual unsigned int execute (function *);
3093
3094 }; // class pass_early_inline
3095
3096 unsigned int
3097 pass_early_inline::execute (function *fun)
3098 {
3099   return early_inliner (fun);
3100 }
3101
3102 } // anon namespace
3103
3104 gimple_opt_pass *
3105 make_pass_early_inline (gcc::context *ctxt)
3106 {
3107   return new pass_early_inline (ctxt);
3108 }
3109
3110 namespace {
3111
3112 const pass_data pass_data_ipa_inline =
3113 {
3114   IPA_PASS, /* type */
3115   "inline", /* name */
3116   OPTGROUP_INLINE, /* optinfo_flags */
3117   TV_IPA_INLINING, /* tv_id */
3118   0, /* properties_required */
3119   0, /* properties_provided */
3120   0, /* properties_destroyed */
3121   0, /* todo_flags_start */
3122   ( TODO_dump_symtab ), /* todo_flags_finish */
3123 };
3124
3125 class pass_ipa_inline : public ipa_opt_pass_d
3126 {
3127 public:
3128   pass_ipa_inline (gcc::context *ctxt)
3129     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
3130                       NULL, /* generate_summary */
3131                       NULL, /* write_summary */
3132                       NULL, /* read_summary */
3133                       NULL, /* write_optimization_summary */
3134                       NULL, /* read_optimization_summary */
3135                       NULL, /* stmt_fixup */
3136                       0, /* function_transform_todo_flags_start */
3137                       inline_transform, /* function_transform */
3138                       NULL) /* variable_transform */
3139   {}
3140
3141   /* opt_pass methods: */
3142   virtual unsigned int execute (function *) { return ipa_inline (); }
3143
3144 }; // class pass_ipa_inline
3145
3146 } // anon namespace
3147
3148 ipa_opt_pass_d *
3149 make_pass_ipa_inline (gcc::context *ctxt)
3150 {
3151   return new pass_ipa_inline (ctxt);
3152 }