gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "tree-vrp.h"
 112 #include "ipa-prop.h"
 113 #include "ipa-fnsummary.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117 #include "auto-profile.h"
 118 #include "builtins.h"
 119 #include "fibonacci_heap.h"
 120 #include "stringpool.h"
 121 #include "attribs.h"
 122 #include "asan.h"
 123
 124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 126
 127 /* Statistics we collect about inlining algorithm.  */
 128 static int overall_size;
 129 static profile_count max_count;
 130 static profile_count spec_rem;
 131
 132 /* Return false when inlining edge E would lead to violating
 133    limits on function unit growth or stack usage growth.
 134
 135    The relative function body growth limit is present generally
 136    to avoid problems with non-linear behavior of the compiler.
 137    To allow inlining huge functions into tiny wrapper, the limit
 138    is always based on the bigger of the two functions considered.
 139
 140    For stack growth limits we always base the growth in stack usage
 141    of the callers.  We want to prevent applications from segfaulting
 142    on stack overflow when functions with huge stack frames gets
 143    inlined. */
 144
 145 static bool
 146 caller_growth_limits (struct cgraph_edge *e)
 147 {
 148   struct cgraph_node *to = e->caller;
 149   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 150   int newsize;
 151   int limit = 0;
 152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 153   ipa_fn_summary *info, *what_info, *outer_info = ipa_fn_summaries->get (to);
 154
 155   /* Look for function e->caller is inlined to.  While doing
 156      so work out the largest function body on the way.  As
 157      described above, we want to base our function growth
 158      limits based on that.  Not on the self size of the
 159      outer function, not on the self size of inline code
 160      we immediately inline to.  This is the most relaxed
 161      interpretation of the rule "do not grow large functions
 162      too much in order to prevent compiler from exploding".  */
 163   while (true)
 164     {
 165       info = ipa_fn_summaries->get (to);
 166       if (limit < info->self_size)
 167         limit = info->self_size;
 168       if (stack_size_limit < info->estimated_self_stack_size)
 169         stack_size_limit = info->estimated_self_stack_size;
 170       if (to->global.inlined_to)
 171         to = to->callers->caller;
 172       else
 173         break;
 174     }
 175
 176   what_info = ipa_fn_summaries->get (what);
 177
 178   if (limit < what_info->self_size)
 179     limit = what_info->self_size;
 180
 181   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= info->size
 187       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 204
 205   inlined_stack = (outer_info->stack_frame_offset
 206                    + outer_info->estimated_self_stack_size
 207                    + what_info->estimated_stack_size);
 208   /* Check new stack consumption with stack consumption at the place
 209      stack is used.  */
 210   if (inlined_stack > stack_size_limit
 211       /* If function already has large stack usage from sibling
 212          inline call, we can inline, too.
 213          This bit overoptimistically assume that we are good at stack
 214          packing.  */
 215       && inlined_stack > info->estimated_stack_size
 216       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 217     {
 218       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 219       return false;
 220     }
 221   return true;
 222 }
 223
 224 /* Dump info about why inlining has failed.  */
 225
 226 static void
 227 report_inline_failed_reason (struct cgraph_edge *e)
 228 {
 229   if (dump_file)
 230     {
 231       fprintf (dump_file, "  not inlinable: %s -> %s, %s\n",
 232                e->caller->dump_name (),
 233                e->callee->dump_name (),
 234                cgraph_inline_failed_string (e->inline_failed));
 235       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 236            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 237           && e->caller->lto_file_data
 238           && e->callee->ultimate_alias_target ()->lto_file_data)
 239         {
 240           fprintf (dump_file, "  LTO objects: %s, %s\n",
 241                    e->caller->lto_file_data->file_name,
 242                    e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 243         }
 244       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 245         cl_target_option_print_diff
 246          (dump_file, 2, target_opts_for_fn (e->caller->decl),
 247           target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 248       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 249         cl_optimization_print_diff
 250           (dump_file, 2, opts_for_fn (e->caller->decl),
 251            opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 252     }
 253 }
 254
 255  /* Decide whether sanitizer-related attributes allow inlining. */
 256
 257 static bool
 258 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 259 {
 260   if (!caller || !callee)
 261     return true;
 262
 263   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 264            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 265           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 266               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 267           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 268               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 269 }
 270
 271 /* Used for flags where it is safe to inline when caller's value is
 272    grater than callee's.  */
 273 #define check_maybe_up(flag) \
 274       (opts_for_fn (caller->decl)->x_##flag             \
 275        != opts_for_fn (callee->decl)->x_##flag          \
 276        && (!always_inline                               \
 277            || opts_for_fn (caller->decl)->x_##flag      \
 278               < opts_for_fn (callee->decl)->x_##flag))
 279 /* Used for flags where it is safe to inline when caller's value is
 280    smaller than callee's.  */
 281 #define check_maybe_down(flag) \
 282       (opts_for_fn (caller->decl)->x_##flag             \
 283        != opts_for_fn (callee->decl)->x_##flag          \
 284        && (!always_inline                               \
 285            || opts_for_fn (caller->decl)->x_##flag      \
 286               > opts_for_fn (callee->decl)->x_##flag))
 287 /* Used for flags where exact match is needed for correctness.  */
 288 #define check_match(flag) \
 289       (opts_for_fn (caller->decl)->x_##flag             \
 290        != opts_for_fn (callee->decl)->x_##flag)
 291
 292 /* Decide if we can inline the edge and possibly update
 293    inline_failed reason.
 294    We check whether inlining is possible at all and whether
 295    caller growth limits allow doing so.
 296
 297    if REPORT is true, output reason to the dump file. */
 298
 299 static bool
 300 can_inline_edge_p (struct cgraph_edge *e, bool report,
 301                    bool early = false)
 302 {
 303   gcc_checking_assert (e->inline_failed);
 304
 305   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 306     {
 307       if (report)
 308         report_inline_failed_reason (e);
 309       return false;
 310     }
 311
 312   bool inlinable = true;
 313   enum availability avail;
 314   cgraph_node *caller = e->caller->global.inlined_to
 315                         ? e->caller->global.inlined_to : e->caller;
 316   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 317
 318   if (!callee->definition)
 319     {
 320       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 321       inlinable = false;
 322     }
 323   if (!early && (!opt_for_fn (callee->decl, optimize)
 324                  || !opt_for_fn (caller->decl, optimize)))
 325     {
 326       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 327       inlinable = false;
 328     }
 329   else if (callee->calls_comdat_local)
 330     {
 331       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 332       inlinable = false;
 333     }
 334   else if (avail <= AVAIL_INTERPOSABLE)
 335     {
 336       e->inline_failed = CIF_OVERWRITABLE;
 337       inlinable = false;
 338     }
 339   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 340      initialized to one of FINAL_ERROR reasons.  */
 341   else if (e->call_stmt_cannot_inline_p)
 342     gcc_unreachable ();
 343   /* Don't inline if the functions have different EH personalities.  */
 344   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 345            && DECL_FUNCTION_PERSONALITY (callee->decl)
 346            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 347                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 348     {
 349       e->inline_failed = CIF_EH_PERSONALITY;
 350       inlinable = false;
 351     }
 352   /* TM pure functions should not be inlined into non-TM_pure
 353      functions.  */
 354   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 355     {
 356       e->inline_failed = CIF_UNSPECIFIED;
 357       inlinable = false;
 358     }
 359   /* Check compatibility of target optimization options.  */
 360   else if (!targetm.target_option.can_inline_p (caller->decl,
 361                                                 callee->decl))
 362     {
 363       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 364       inlinable = false;
 365     }
 366   else if (!ipa_fn_summaries->get (callee)->inlinable)
 367     {
 368       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 369       inlinable = false;
 370     }
 371   /* Don't inline a function with mismatched sanitization attributes. */
 372   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 373     {
 374       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 375       inlinable = false;
 376     }
 377   if (!inlinable && report)
 378     report_inline_failed_reason (e);
 379   return inlinable;
 380 }
 381
 382 /* Decide if we can inline the edge and possibly update
 383    inline_failed reason.
 384    We check whether inlining is possible at all and whether
 385    caller growth limits allow doing so.
 386
 387    if REPORT is true, output reason to the dump file.
 388
 389    if DISREGARD_LIMITS is true, ignore size limits.  */
 390
 391 static bool
 392 can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report,
 393                              bool disregard_limits = false, bool early = false)
 394 {
 395   gcc_checking_assert (e->inline_failed);
 396
 397   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 398     {
 399       if (report)
 400         report_inline_failed_reason (e);
 401       return false;
 402     }
 403
 404   bool inlinable = true;
 405   enum availability avail;
 406   cgraph_node *caller = e->caller->global.inlined_to
 407                         ? e->caller->global.inlined_to : e->caller;
 408   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 409   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 410   tree callee_tree
 411     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 412   /* Check if caller growth allows the inlining.  */
 413   if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 414       && !disregard_limits
 415       && !lookup_attribute ("flatten",
 416                  DECL_ATTRIBUTES (caller->decl))
 417       && !caller_growth_limits (e))
 418     inlinable = false;
 419   /* Don't inline a function with a higher optimization level than the
 420      caller.  FIXME: this is really just tip of iceberg of handling
 421      optimization attribute.  */
 422   else if (caller_tree != callee_tree)
 423     {
 424       bool always_inline =
 425              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 426               && lookup_attribute ("always_inline",
 427                                    DECL_ATTRIBUTES (callee->decl)));
 428       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 429       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 430
 431      /* Until GCC 4.9 we did not check the semantics alterning flags
 432         bellow and inline across optimization boundry.
 433         Enabling checks bellow breaks several packages by refusing
 434         to inline library always_inline functions. See PR65873.
 435         Disable the check for early inlining for now until better solution
 436         is found.  */
 437      if (always_inline && early)
 438         ;
 439       /* There are some options that change IL semantics which means
 440          we cannot inline in these cases for correctness reason.
 441          Not even for always_inline declared functions.  */
 442      else if (check_match (flag_wrapv)
 443               || check_match (flag_trapv)
 444               || check_match (flag_pcc_struct_return)
 445               /* When caller or callee does FP math, be sure FP codegen flags
 446                  compatible.  */
 447               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 448                   && (check_maybe_up (flag_rounding_math)
 449                       || check_maybe_up (flag_trapping_math)
 450                       || check_maybe_down (flag_unsafe_math_optimizations)
 451                       || check_maybe_down (flag_finite_math_only)
 452                       || check_maybe_up (flag_signaling_nans)
 453                       || check_maybe_down (flag_cx_limited_range)
 454                       || check_maybe_up (flag_signed_zeros)
 455                       || check_maybe_down (flag_associative_math)
 456                       || check_maybe_down (flag_reciprocal_math)
 457                       || check_maybe_down (flag_fp_int_builtin_inexact)
 458                       /* Strictly speaking only when the callee contains function
 459                          calls that may end up setting errno.  */
 460                       || check_maybe_up (flag_errno_math)))
 461               /* We do not want to make code compiled with exceptions to be
 462                  brought into a non-EH function unless we know that the callee
 463                  does not throw.
 464                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 465               || (check_maybe_up (flag_non_call_exceptions)
 466                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 467               || (check_maybe_up (flag_exceptions)
 468                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 469               /* When devirtualization is diabled for callee, it is not safe
 470                  to inline it as we possibly mangled the type info.
 471                  Allow early inlining of always inlines.  */
 472               || (!early && check_maybe_down (flag_devirtualize)))
 473         {
 474           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 475           inlinable = false;
 476         }
 477       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 478       else if (always_inline)
 479         ;
 480       /* When user added an attribute to the callee honor it.  */
 481       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 482                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 483         {
 484           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 485           inlinable = false;
 486         }
 487       /* If explicit optimize attribute are not used, the mismatch is caused
 488          by different command line options used to build different units.
 489          Do not care about COMDAT functions - those are intended to be
 490          optimized with the optimization flags of module they are used in.
 491          Also do not care about mixing up size/speed optimization when
 492          DECL_DISREGARD_INLINE_LIMITS is set.  */
 493       else if ((callee->merged_comdat
 494                 && !lookup_attribute ("optimize",
 495                                       DECL_ATTRIBUTES (caller->decl)))
 496                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 497         ;
 498       /* If mismatch is caused by merging two LTO units with different
 499          optimizationflags we want to be bit nicer.  However never inline
 500          if one of functions is not optimized at all.  */
 501       else if (!opt_for_fn (callee->decl, optimize)
 502                || !opt_for_fn (caller->decl, optimize))
 503         {
 504           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 505           inlinable = false;
 506         }
 507       /* If callee is optimized for size and caller is not, allow inlining if
 508          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 509          is inline (and thus likely an unified comdat).  This will allow caller
 510          to run faster.  */
 511       else if (opt_for_fn (callee->decl, optimize_size)
 512                > opt_for_fn (caller->decl, optimize_size))
 513         {
 514           int growth = estimate_edge_growth (e);
 515           if (growth > 0
 516               && (!DECL_DECLARED_INLINE_P (callee->decl)
 517                   && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
 518                                     MAX_INLINE_INSNS_AUTO)))
 519             {
 520               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 521               inlinable = false;
 522             }
 523         }
 524       /* If callee is more aggressively optimized for performance than caller,
 525          we generally want to inline only cheap (runtime wise) functions.  */
 526       else if (opt_for_fn (callee->decl, optimize_size)
 527                < opt_for_fn (caller->decl, optimize_size)
 528                || (opt_for_fn (callee->decl, optimize)
 529                    > opt_for_fn (caller->decl, optimize)))
 530         {
 531           if (estimate_edge_time (e)
 532               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 533             {
 534               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 535               inlinable = false;
 536             }
 537         }
 538
 539     }
 540
 541   if (!inlinable && report)
 542     report_inline_failed_reason (e);
 543   return inlinable;
 544 }
 545
 546
 547 /* Return true if the edge E is inlinable during early inlining.  */
 548
 549 static bool
 550 can_early_inline_edge_p (struct cgraph_edge *e)
 551 {
 552   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 553   /* Early inliner might get called at WPA stage when IPA pass adds new
 554      function.  In this case we can not really do any of early inlining
 555      because function bodies are missing.  */
 556   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 557     return false;
 558   if (!gimple_has_body_p (callee->decl))
 559     {
 560       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 561       return false;
 562     }
 563   /* In early inliner some of callees may not be in SSA form yet
 564      (i.e. the callgraph is cyclic and we did not process
 565      the callee by early inliner, yet).  We don't have CIF code for this
 566      case; later we will re-do the decision in the real inliner.  */
 567   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 568       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 569     {
 570       if (dump_file)
 571         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 572       return false;
 573     }
 574   if (!can_inline_edge_p (e, true, true)
 575       || !can_inline_edge_by_limits_p (e, true, false, true))
 576     return false;
 577   return true;
 578 }
 579
 580
 581 /* Return number of calls in N.  Ignore cheap builtins.  */
 582
 583 static int
 584 num_calls (struct cgraph_node *n)
 585 {
 586   struct cgraph_edge *e;
 587   int num = 0;
 588
 589   for (e = n->callees; e; e = e->next_callee)
 590     if (!is_inexpensive_builtin (e->callee->decl))
 591       num++;
 592   return num;
 593 }
 594
 595
 596 /* Return true if we are interested in inlining small function.  */
 597
 598 static bool
 599 want_early_inline_function_p (struct cgraph_edge *e)
 600 {
 601   bool want_inline = true;
 602   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 603
 604   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 605     ;
 606   /* For AutoFDO, we need to make sure that before profile summary, all
 607      hot paths' IR look exactly the same as profiled binary. As a result,
 608      in einliner, we will disregard size limit and inline those callsites
 609      that are:
 610        * inlined in the profiled binary, and
 611        * the cloned callee has enough samples to be considered "hot".  */
 612   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 613     ;
 614   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 615            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 616     {
 617       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 618       report_inline_failed_reason (e);
 619       want_inline = false;
 620     }
 621   else
 622     {
 623       int growth = estimate_edge_growth (e);
 624       int n;
 625
 626       if (growth <= 0)
 627         ;
 628       else if (!e->maybe_hot_p ()
 629                && growth > 0)
 630         {
 631           if (dump_file)
 632             fprintf (dump_file, "  will not early inline: %s->%s, "
 633                      "call is cold and code would grow by %i\n",
 634                      e->caller->dump_name (),
 635                      callee->dump_name (),
 636                      growth);
 637           want_inline = false;
 638         }
 639       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 640         {
 641           if (dump_file)
 642             fprintf (dump_file, "  will not early inline: %s->%s, "
 643                      "growth %i exceeds --param early-inlining-insns\n",
 644                      e->caller->dump_name (),
 645                      callee->dump_name (),
 646                      growth);
 647           want_inline = false;
 648         }
 649       else if ((n = num_calls (callee)) != 0
 650                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 651         {
 652           if (dump_file)
 653             fprintf (dump_file, "  will not early inline: %s->%s, "
 654                      "growth %i exceeds --param early-inlining-insns "
 655                      "divided by number of calls\n",
 656                      e->caller->dump_name (),
 657                      callee->dump_name (),
 658                      growth);
 659           want_inline = false;
 660         }
 661     }
 662   return want_inline;
 663 }
 664
 665 /* Compute time of the edge->caller + edge->callee execution when inlining
 666    does not happen.  */
 667
 668 inline sreal
 669 compute_uninlined_call_time (struct cgraph_edge *edge,
 670                              sreal uninlined_call_time)
 671 {
 672   cgraph_node *caller = (edge->caller->global.inlined_to
 673                          ? edge->caller->global.inlined_to
 674                          : edge->caller);
 675
 676   sreal freq = edge->sreal_frequency ();
 677   if (freq > 0)
 678     uninlined_call_time *= freq;
 679   else
 680     uninlined_call_time = uninlined_call_time >> 11;
 681
 682   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 683   return uninlined_call_time + caller_time;
 684 }
 685
 686 /* Same as compute_uinlined_call_time but compute time when inlining
 687    does happen.  */
 688
 689 inline sreal
 690 compute_inlined_call_time (struct cgraph_edge *edge,
 691                            sreal time)
 692 {
 693   cgraph_node *caller = (edge->caller->global.inlined_to
 694                          ? edge->caller->global.inlined_to
 695                          : edge->caller);
 696   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 697
 698   sreal freq = edge->sreal_frequency ();
 699   if (freq > 0)
 700     time *= freq;
 701   else
 702     time = time >> 11;
 703
 704   /* This calculation should match one in ipa-inline-analysis.c
 705      (estimate_edge_size_and_time).  */
 706   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 707   time += caller_time;
 708   if (time <= 0)
 709     time = ((sreal) 1) >> 8;
 710   gcc_checking_assert (time >= 0);
 711   return time;
 712 }
 713
 714 /* Return true if the speedup for inlining E is bigger than
 715    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 716
 717 static bool
 718 big_speedup_p (struct cgraph_edge *e)
 719 {
 720   sreal unspec_time;
 721   sreal spec_time = estimate_edge_time (e, &unspec_time);
 722   sreal time = compute_uninlined_call_time (e, unspec_time);
 723   sreal inlined_time = compute_inlined_call_time (e, spec_time);
 724
 725   if ((time - inlined_time) * 100
 726       > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
 727     return true;
 728   return false;
 729 }
 730
 731 /* Return true if we are interested in inlining small function.
 732    When REPORT is true, report reason to dump file.  */
 733
 734 static bool
 735 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 736 {
 737   bool want_inline = true;
 738   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 739
 740   /* Allow this function to be called before can_inline_edge_p,
 741      since it's usually cheaper.  */
 742   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 743     want_inline = false;
 744   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 745     ;
 746   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 747            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 748     {
 749       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 750       want_inline = false;
 751     }
 752   /* Do fast and conservative check if the function can be good
 753      inline candidate.  At the moment we allow inline hints to
 754      promote non-inline functions to inline and we increase
 755      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 756   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 757            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 758            && ipa_fn_summaries->get (callee)->min_size
 759                 - ipa_call_summaries->get (e)->call_stmt_size
 760               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 761     {
 762       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 763       want_inline = false;
 764     }
 765   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 766             || e->count.ipa ().nonzero_p ())
 767            && ipa_fn_summaries->get (callee)->min_size
 768                 - ipa_call_summaries->get (e)->call_stmt_size
 769               > 16 * MAX_INLINE_INSNS_SINGLE)
 770     {
 771       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 772                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 773                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 774       want_inline = false;
 775     }
 776   else
 777     {
 778       int growth = estimate_edge_growth (e);
 779       ipa_hints hints = estimate_edge_hints (e);
 780       bool big_speedup = big_speedup_p (e);
 781
 782       if (growth <= 0)
 783         ;
 784       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 785          hints suggests that inlining given function is very profitable.  */
 786       else if (DECL_DECLARED_INLINE_P (callee->decl)
 787                && growth >= MAX_INLINE_INSNS_SINGLE
 788                && ((!big_speedup
 789                     && !(hints & (INLINE_HINT_indirect_call
 790                                   | INLINE_HINT_known_hot
 791                                   | INLINE_HINT_loop_iterations
 792                                   | INLINE_HINT_array_index
 793                                   | INLINE_HINT_loop_stride)))
 794                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 795         {
 796           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 797           want_inline = false;
 798         }
 799       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 800                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 801         {
 802           /* growth_likely_positive is expensive, always test it last.  */
 803           if (growth >= MAX_INLINE_INSNS_SINGLE
 804               || growth_likely_positive (callee, growth))
 805             {
 806               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 807               want_inline = false;
 808             }
 809         }
 810       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 811          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 812          inlining given function is very profitable.  */
 813       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 814                && !big_speedup
 815                && !(hints & INLINE_HINT_known_hot)
 816                && growth >= ((hints & (INLINE_HINT_indirect_call
 817                                        | INLINE_HINT_loop_iterations
 818                                        | INLINE_HINT_array_index
 819                                        | INLINE_HINT_loop_stride))
 820                              ? MAX (MAX_INLINE_INSNS_AUTO,
 821                                     MAX_INLINE_INSNS_SINGLE)
 822                              : MAX_INLINE_INSNS_AUTO))
 823         {
 824           /* growth_likely_positive is expensive, always test it last.  */
 825           if (growth >= MAX_INLINE_INSNS_SINGLE
 826               || growth_likely_positive (callee, growth))
 827             {
 828               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 829               want_inline = false;
 830             }
 831         }
 832       /* If call is cold, do not inline when function body would grow. */
 833       else if (!e->maybe_hot_p ()
 834                && (growth >= MAX_INLINE_INSNS_SINGLE
 835                    || growth_likely_positive (callee, growth)))
 836         {
 837           e->inline_failed = CIF_UNLIKELY_CALL;
 838           want_inline = false;
 839         }
 840     }
 841   if (!want_inline && report)
 842     report_inline_failed_reason (e);
 843   return want_inline;
 844 }
 845
 846 /* EDGE is self recursive edge.
 847    We hand two cases - when function A is inlining into itself
 848    or when function A is being inlined into another inliner copy of function
 849    A within function B.
 850
 851    In first case OUTER_NODE points to the toplevel copy of A, while
 852    in the second case OUTER_NODE points to the outermost copy of A in B.
 853
 854    In both cases we want to be extra selective since
 855    inlining the call will just introduce new recursive calls to appear.  */
 856
 857 static bool
 858 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 859                                    struct cgraph_node *outer_node,
 860                                    bool peeling,
 861                                    int depth)
 862 {
 863   char const *reason = NULL;
 864   bool want_inline = true;
 865   sreal caller_freq = 1;
 866   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 867
 868   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 869     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 870
 871   if (!edge->maybe_hot_p ())
 872     {
 873       reason = "recursive call is cold";
 874       want_inline = false;
 875     }
 876   else if (depth > max_depth)
 877     {
 878       reason = "--param max-inline-recursive-depth exceeded.";
 879       want_inline = false;
 880     }
 881   else if (outer_node->global.inlined_to
 882            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 883     {
 884       reason = "caller frequency is 0";
 885       want_inline = false;
 886     }
 887
 888   if (!want_inline)
 889     ;
 890   /* Inlining of self recursive function into copy of itself within other
 891      function is transformation similar to loop peeling.
 892
 893      Peeling is profitable if we can inline enough copies to make probability
 894      of actual call to the self recursive function very small.  Be sure that
 895      the probability of recursion is small.
 896
 897      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 898      This way the expected number of recursion is at most max_depth.  */
 899   else if (peeling)
 900     {
 901       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 902       int i;
 903       for (i = 1; i < depth; i++)
 904         max_prob = max_prob * max_prob;
 905       if (edge->sreal_frequency () >= max_prob * caller_freq)
 906         {
 907           reason = "frequency of recursive call is too large";
 908           want_inline = false;
 909         }
 910     }
 911   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 912      recursion depth is large.  We reduce function call overhead and increase
 913      chances that things fit in hardware return predictor.
 914
 915      Recursive inlining might however increase cost of stack frame setup
 916      actually slowing down functions whose recursion tree is wide rather than
 917      deep.
 918
 919      Deciding reliably on when to do recursive inlining without profile feedback
 920      is tricky.  For now we disable recursive inlining when probability of self
 921      recursion is low.
 922
 923      Recursive inlining of self recursive call within loop also results in
 924      large loop depths that generally optimize badly.  We may want to throttle
 925      down inlining in those cases.  In particular this seems to happen in one
 926      of libstdc++ rb tree methods.  */
 927   else
 928     {
 929       if (edge->sreal_frequency () * 100
 930           <= caller_freq
 931              * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
 932         {
 933           reason = "frequency of recursive call is too small";
 934           want_inline = false;
 935         }
 936     }
 937   if (!want_inline && dump_file)
 938     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 939   return want_inline;
 940 }
 941
 942 /* Return true when NODE has uninlinable caller;
 943    set HAS_HOT_CALL if it has hot call.
 944    Worker for cgraph_for_node_and_aliases.  */
 945
 946 static bool
 947 check_callers (struct cgraph_node *node, void *has_hot_call)
 948 {
 949   struct cgraph_edge *e;
 950    for (e = node->callers; e; e = e->next_caller)
 951      {
 952        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
 953            || !opt_for_fn (e->caller->decl, optimize))
 954          return true;
 955        if (!can_inline_edge_p (e, true))
 956          return true;
 957        if (e->recursive_p ())
 958          return true;
 959        if (!can_inline_edge_by_limits_p (e, true))
 960          return true;
 961        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 962          *(bool *)has_hot_call = true;
 963      }
 964   return false;
 965 }
 966
 967 /* If NODE has a caller, return true.  */
 968
 969 static bool
 970 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 971 {
 972   if (node->callers)
 973     return true;
 974   return false;
 975 }
 976
 977 /* Decide if inlining NODE would reduce unit size by eliminating
 978    the offline copy of function.
 979    When COLD is true the cold calls are considered, too.  */
 980
 981 static bool
 982 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 983 {
 984   bool has_hot_call = false;
 985
 986   /* Aliases gets inlined along with the function they alias.  */
 987   if (node->alias)
 988     return false;
 989   /* Already inlined?  */
 990   if (node->global.inlined_to)
 991     return false;
 992   /* Does it have callers?  */
 993   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
 994     return false;
 995   /* Inlining into all callers would increase size?  */
 996   if (estimate_growth (node) > 0)
 997     return false;
 998   /* All inlines must be possible.  */
 999   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
1000                                          true))
1001     return false;
1002   if (!cold && !has_hot_call)
1003     return false;
1004   return true;
1005 }
1006
1007 /* A cost model driving the inlining heuristics in a way so the edges with
1008    smallest badness are inlined first.  After each inlining is performed
1009    the costs of all caller edges of nodes affected are recomputed so the
1010    metrics may accurately depend on values such as number of inlinable callers
1011    of the function or function body size.  */
1012
1013 static sreal
1014 edge_badness (struct cgraph_edge *edge, bool dump)
1015 {
1016   sreal badness;
1017   int growth;
1018   sreal edge_time, unspec_edge_time;
1019   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1020   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1021   ipa_hints hints;
1022   cgraph_node *caller = (edge->caller->global.inlined_to
1023                          ? edge->caller->global.inlined_to
1024                          : edge->caller);
1025
1026   growth = estimate_edge_growth (edge);
1027   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1028   hints = estimate_edge_hints (edge);
1029   gcc_checking_assert (edge_time >= 0);
1030   /* Check that inlined time is better, but tolerate some roundoff issues.
1031      FIXME: When callee profile drops to 0 we account calls more.  This
1032      should be fixed by never doing that.  */
1033   gcc_checking_assert ((edge_time * 100
1034                         - callee_info->time * 101).to_int () <= 0
1035                         || callee->count.ipa ().initialized_p ());
1036   gcc_checking_assert (growth <= callee_info->size);
1037
1038   if (dump)
1039     {
1040       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1041                edge->caller->dump_name (),
1042                edge->callee->dump_name ());
1043       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1044                growth,
1045                edge_time.to_double (),
1046                unspec_edge_time.to_double ());
1047       ipa_dump_hints (dump_file, hints);
1048       if (big_speedup_p (edge))
1049         fprintf (dump_file, " big_speedup");
1050       fprintf (dump_file, "\n");
1051     }
1052
1053   /* Always prefer inlining saving code size.  */
1054   if (growth <= 0)
1055     {
1056       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1057       if (dump)
1058         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1059                  growth);
1060     }
1061    /* Inlining into EXTERNAL functions is not going to change anything unless
1062       they are themselves inlined.  */
1063    else if (DECL_EXTERNAL (caller->decl))
1064     {
1065       if (dump)
1066         fprintf (dump_file, "      max: function is external\n");
1067       return sreal::max ();
1068     }
1069   /* When profile is available. Compute badness as:
1070
1071                  time_saved * caller_count
1072      goodness =  -------------------------------------------------
1073                  growth_of_caller * overall_growth * combined_size
1074
1075      badness = - goodness
1076
1077      Again use negative value to make calls with profile appear hotter
1078      then calls without.
1079   */
1080   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1081            || caller->count.ipa ().nonzero_p ())
1082     {
1083       sreal numerator, denominator;
1084       int overall_growth;
1085       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1086
1087       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1088                    - inlined_time);
1089       if (numerator <= 0)
1090         numerator = ((sreal) 1 >> 8);
1091       if (caller->count.ipa ().nonzero_p ())
1092         numerator *= caller->count.ipa ().to_gcov_type ();
1093       else if (caller->count.ipa ().initialized_p ())
1094         numerator = numerator >> 11;
1095       denominator = growth;
1096
1097       overall_growth = callee_info->growth;
1098
1099       /* Look for inliner wrappers of the form:
1100
1101          inline_caller ()
1102            {
1103              do_fast_job...
1104              if (need_more_work)
1105                noninline_callee ();
1106            }
1107          Withhout panilizing this case, we usually inline noninline_callee
1108          into the inline_caller because overall_growth is small preventing
1109          further inlining of inline_caller.
1110
1111          Penalize only callgraph edges to functions with small overall
1112          growth ...
1113         */
1114       if (growth > overall_growth
1115           /* ... and having only one caller which is not inlined ... */
1116           && callee_info->single_caller
1117           && !edge->caller->global.inlined_to
1118           /* ... and edges executed only conditionally ... */
1119           && edge->sreal_frequency () < 1
1120           /* ... consider case where callee is not inline but caller is ... */
1121           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1122                && DECL_DECLARED_INLINE_P (caller->decl))
1123               /* ... or when early optimizers decided to split and edge
1124                  frequency still indicates splitting is a win ... */
1125               || (callee->split_part && !caller->split_part
1126                   && edge->sreal_frequency () * 100
1127                      < PARAM_VALUE
1128                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1129                   /* ... and do not overwrite user specified hints.   */
1130                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1131                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1132         {
1133           struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1134           int caller_growth = caller_info->growth;
1135
1136           /* Only apply the penalty when caller looks like inline candidate,
1137              and it is not called once and.  */
1138           if (!caller_info->single_caller && overall_growth < caller_growth
1139               && caller_info->inlinable
1140               && caller_info->size
1141                  < (DECL_DECLARED_INLINE_P (caller->decl)
1142                     ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1143             {
1144               if (dump)
1145                 fprintf (dump_file,
1146                          "     Wrapper penalty. Increasing growth %i to %i\n",
1147                          overall_growth, caller_growth);
1148               overall_growth = caller_growth;
1149             }
1150         }
1151       if (overall_growth > 0)
1152         {
1153           /* Strongly preffer functions with few callers that can be inlined
1154              fully.  The square root here leads to smaller binaries at average.
1155              Watch however for extreme cases and return to linear function
1156              when growth is large.  */
1157           if (overall_growth < 256)
1158             overall_growth *= overall_growth;
1159           else
1160             overall_growth += 256 * 256 - 256;
1161           denominator *= overall_growth;
1162         }
1163       denominator *= inlined_time;
1164
1165       badness = - numerator / denominator;
1166
1167       if (dump)
1168         {
1169           fprintf (dump_file,
1170                    "      %f: guessed profile. frequency %f, count %" PRId64
1171                    " caller count %" PRId64
1172                    " time w/o inlining %f, time with inlining %f"
1173                    " overall growth %i (current) %i (original)"
1174                    " %i (compensated)\n",
1175                    badness.to_double (),
1176                    edge->sreal_frequency ().to_double (),
1177                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1178                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1179                    compute_uninlined_call_time (edge,
1180                                                 unspec_edge_time).to_double (),
1181                    inlined_time.to_double (),
1182                    estimate_growth (callee),
1183                    callee_info->growth, overall_growth);
1184         }
1185     }
1186   /* When function local profile is not available or it does not give
1187      useful information (ie frequency is zero), base the cost on
1188      loop nest and overall size growth, so we optimize for overall number
1189      of functions fully inlined in program.  */
1190   else
1191     {
1192       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1193       badness = growth;
1194
1195       /* Decrease badness if call is nested.  */
1196       if (badness > 0)
1197         badness = badness >> nest;
1198       else
1199         badness = badness << nest;
1200       if (dump)
1201         fprintf (dump_file, "      %f: no profile. nest %i\n",
1202                  badness.to_double (), nest);
1203     }
1204   gcc_checking_assert (badness != 0);
1205
1206   if (edge->recursive_p ())
1207     badness = badness.shift (badness > 0 ? 4 : -4);
1208   if ((hints & (INLINE_HINT_indirect_call
1209                 | INLINE_HINT_loop_iterations
1210                 | INLINE_HINT_array_index
1211                 | INLINE_HINT_loop_stride))
1212       || callee_info->growth <= 0)
1213     badness = badness.shift (badness > 0 ? -2 : 2);
1214   if (hints & (INLINE_HINT_same_scc))
1215     badness = badness.shift (badness > 0 ? 3 : -3);
1216   else if (hints & (INLINE_HINT_in_scc))
1217     badness = badness.shift (badness > 0 ? 2 : -2);
1218   else if (hints & (INLINE_HINT_cross_module))
1219     badness = badness.shift (badness > 0 ? 1 : -1);
1220   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1221     badness = badness.shift (badness > 0 ? -4 : 4);
1222   else if ((hints & INLINE_HINT_declared_inline))
1223     badness = badness.shift (badness > 0 ? -3 : 3);
1224   if (dump)
1225     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1226   return badness;
1227 }
1228
1229 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1230 static inline void
1231 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1232 {
1233   sreal badness = edge_badness (edge, false);
1234   if (edge->aux)
1235     {
1236       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1237       gcc_checking_assert (n->get_data () == edge);
1238
1239       /* fibonacci_heap::replace_key does busy updating of the
1240          heap that is unnecesarily expensive.
1241          We do lazy increases: after extracting minimum if the key
1242          turns out to be out of date, it is re-inserted into heap
1243          with correct value.  */
1244       if (badness < n->get_key ())
1245         {
1246           if (dump_file && (dump_flags & TDF_DETAILS))
1247             {
1248               fprintf (dump_file,
1249                        "  decreasing badness %s -> %s, %f to %f\n",
1250                        edge->caller->dump_name (),
1251                        edge->callee->dump_name (),
1252                        n->get_key ().to_double (),
1253                        badness.to_double ());
1254             }
1255           heap->decrease_key (n, badness);
1256         }
1257     }
1258   else
1259     {
1260        if (dump_file && (dump_flags & TDF_DETAILS))
1261          {
1262            fprintf (dump_file,
1263                     "  enqueuing call %s -> %s, badness %f\n",
1264                     edge->caller->dump_name (),
1265                     edge->callee->dump_name (),
1266                     badness.to_double ());
1267          }
1268       edge->aux = heap->insert (badness, edge);
1269     }
1270 }
1271
1272
1273 /* NODE was inlined.
1274    All caller edges needs to be resetted because
1275    size estimates change. Similarly callees needs reset
1276    because better context may be known.  */
1277
1278 static void
1279 reset_edge_caches (struct cgraph_node *node)
1280 {
1281   struct cgraph_edge *edge;
1282   struct cgraph_edge *e = node->callees;
1283   struct cgraph_node *where = node;
1284   struct ipa_ref *ref;
1285
1286   if (where->global.inlined_to)
1287     where = where->global.inlined_to;
1288
1289   for (edge = where->callers; edge; edge = edge->next_caller)
1290     if (edge->inline_failed)
1291       reset_edge_growth_cache (edge);
1292
1293   FOR_EACH_ALIAS (where, ref)
1294     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1295
1296   if (!e)
1297     return;
1298
1299   while (true)
1300     if (!e->inline_failed && e->callee->callees)
1301       e = e->callee->callees;
1302     else
1303       {
1304         if (e->inline_failed)
1305           reset_edge_growth_cache (e);
1306         if (e->next_callee)
1307           e = e->next_callee;
1308         else
1309           {
1310             do
1311               {
1312                 if (e->caller == node)
1313                   return;
1314                 e = e->caller->callers;
1315               }
1316             while (!e->next_callee);
1317             e = e->next_callee;
1318           }
1319       }
1320 }
1321
1322 /* Recompute HEAP nodes for each of caller of NODE.
1323    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1324    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1325    it is inlinable. Otherwise check all edges.  */
1326
1327 static void
1328 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1329                     bitmap updated_nodes,
1330                     struct cgraph_edge *check_inlinablity_for)
1331 {
1332   struct cgraph_edge *edge;
1333   struct ipa_ref *ref;
1334
1335   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1336       || node->global.inlined_to)
1337     return;
1338   if (!bitmap_set_bit (updated_nodes, node->uid))
1339     return;
1340
1341   FOR_EACH_ALIAS (node, ref)
1342     {
1343       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1344       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1345     }
1346
1347   for (edge = node->callers; edge; edge = edge->next_caller)
1348     if (edge->inline_failed)
1349       {
1350         if (!check_inlinablity_for
1351             || check_inlinablity_for == edge)
1352           {
1353             if (can_inline_edge_p (edge, false)
1354                 && want_inline_small_function_p (edge, false)
1355                 && can_inline_edge_by_limits_p (edge, false))
1356               update_edge_key (heap, edge);
1357             else if (edge->aux)
1358               {
1359                 report_inline_failed_reason (edge);
1360                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1361                 edge->aux = NULL;
1362               }
1363           }
1364         else if (edge->aux)
1365           update_edge_key (heap, edge);
1366       }
1367 }
1368
1369 /* Recompute HEAP nodes for each uninlined call in NODE.
1370    This is used when we know that edge badnesses are going only to increase
1371    (we introduced new call site) and thus all we need is to insert newly
1372    created edges into heap.  */
1373
1374 static void
1375 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1376                     bitmap updated_nodes)
1377 {
1378   struct cgraph_edge *e = node->callees;
1379
1380   if (!e)
1381     return;
1382   while (true)
1383     if (!e->inline_failed && e->callee->callees)
1384       e = e->callee->callees;
1385     else
1386       {
1387         enum availability avail;
1388         struct cgraph_node *callee;
1389         /* We do not reset callee growth cache here.  Since we added a new call,
1390            growth chould have just increased and consequentely badness metric
1391            don't need updating.  */
1392         if (e->inline_failed
1393             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1394             && ipa_fn_summaries->get (callee)->inlinable
1395             && avail >= AVAIL_AVAILABLE
1396             && !bitmap_bit_p (updated_nodes, callee->uid))
1397           {
1398             if (can_inline_edge_p (e, false)
1399                 && want_inline_small_function_p (e, false)
1400                 && can_inline_edge_by_limits_p (e, false))
1401               update_edge_key (heap, e);
1402             else if (e->aux)
1403               {
1404                 report_inline_failed_reason (e);
1405                 heap->delete_node ((edge_heap_node_t *) e->aux);
1406                 e->aux = NULL;
1407               }
1408           }
1409         if (e->next_callee)
1410           e = e->next_callee;
1411         else
1412           {
1413             do
1414               {
1415                 if (e->caller == node)
1416                   return;
1417                 e = e->caller->callers;
1418               }
1419             while (!e->next_callee);
1420             e = e->next_callee;
1421           }
1422       }
1423 }
1424
1425 /* Enqueue all recursive calls from NODE into priority queue depending on
1426    how likely we want to recursively inline the call.  */
1427
1428 static void
1429 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1430                         edge_heap_t *heap)
1431 {
1432   struct cgraph_edge *e;
1433   enum availability avail;
1434
1435   for (e = where->callees; e; e = e->next_callee)
1436     if (e->callee == node
1437         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1438             && avail > AVAIL_INTERPOSABLE))
1439       heap->insert (-e->sreal_frequency (), e);
1440   for (e = where->callees; e; e = e->next_callee)
1441     if (!e->inline_failed)
1442       lookup_recursive_calls (node, e->callee, heap);
1443 }
1444
1445 /* Decide on recursive inlining: in the case function has recursive calls,
1446    inline until body size reaches given argument.  If any new indirect edges
1447    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1448    is NULL.  */
1449
1450 static bool
1451 recursive_inlining (struct cgraph_edge *edge,
1452                     vec<cgraph_edge *> *new_edges)
1453 {
1454   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1455   edge_heap_t heap (sreal::min ());
1456   struct cgraph_node *node;
1457   struct cgraph_edge *e;
1458   struct cgraph_node *master_clone = NULL, *next;
1459   int depth = 0;
1460   int n = 0;
1461
1462   node = edge->caller;
1463   if (node->global.inlined_to)
1464     node = node->global.inlined_to;
1465
1466   if (DECL_DECLARED_INLINE_P (node->decl))
1467     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1468
1469   /* Make sure that function is small enough to be considered for inlining.  */
1470   if (estimate_size_after_inlining (node, edge)  >= limit)
1471     return false;
1472   lookup_recursive_calls (node, node, &heap);
1473   if (heap.empty ())
1474     return false;
1475
1476   if (dump_file)
1477     fprintf (dump_file,
1478              "  Performing recursive inlining on %s\n",
1479              node->name ());
1480
1481   /* Do the inlining and update list of recursive call during process.  */
1482   while (!heap.empty ())
1483     {
1484       struct cgraph_edge *curr = heap.extract_min ();
1485       struct cgraph_node *cnode, *dest = curr->callee;
1486
1487       if (!can_inline_edge_p (curr, true)
1488           || can_inline_edge_by_limits_p (curr, true))
1489         continue;
1490
1491       /* MASTER_CLONE is produced in the case we already started modified
1492          the function. Be sure to redirect edge to the original body before
1493          estimating growths otherwise we will be seeing growths after inlining
1494          the already modified body.  */
1495       if (master_clone)
1496         {
1497           curr->redirect_callee (master_clone);
1498           reset_edge_growth_cache (curr);
1499         }
1500
1501       if (estimate_size_after_inlining (node, curr) > limit)
1502         {
1503           curr->redirect_callee (dest);
1504           reset_edge_growth_cache (curr);
1505           break;
1506         }
1507
1508       depth = 1;
1509       for (cnode = curr->caller;
1510            cnode->global.inlined_to; cnode = cnode->callers->caller)
1511         if (node->decl
1512             == curr->callee->ultimate_alias_target ()->decl)
1513           depth++;
1514
1515       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1516         {
1517           curr->redirect_callee (dest);
1518           reset_edge_growth_cache (curr);
1519           continue;
1520         }
1521
1522       if (dump_file)
1523         {
1524           fprintf (dump_file,
1525                    "   Inlining call of depth %i", depth);
1526           if (node->count.nonzero_p ())
1527             {
1528               fprintf (dump_file, " called approx. %.2f times per call",
1529                        (double)curr->count.to_gcov_type ()
1530                        / node->count.to_gcov_type ());
1531             }
1532           fprintf (dump_file, "\n");
1533         }
1534       if (!master_clone)
1535         {
1536           /* We need original clone to copy around.  */
1537           master_clone = node->create_clone (node->decl, node->count,
1538             false, vNULL, true, NULL, NULL);
1539           for (e = master_clone->callees; e; e = e->next_callee)
1540             if (!e->inline_failed)
1541               clone_inlined_nodes (e, true, false, NULL);
1542           curr->redirect_callee (master_clone);
1543           reset_edge_growth_cache (curr);
1544         }
1545
1546       inline_call (curr, false, new_edges, &overall_size, true);
1547       lookup_recursive_calls (node, curr->callee, &heap);
1548       n++;
1549     }
1550
1551   if (!heap.empty () && dump_file)
1552     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1553
1554   if (!master_clone)
1555     return false;
1556
1557   if (dump_file)
1558     fprintf (dump_file,
1559              "\n   Inlined %i times, "
1560              "body grown from size %i to %i, time %f to %f\n", n,
1561              ipa_fn_summaries->get (master_clone)->size,
1562              ipa_fn_summaries->get (node)->size,
1563              ipa_fn_summaries->get (master_clone)->time.to_double (),
1564              ipa_fn_summaries->get (node)->time.to_double ());
1565
1566   /* Remove master clone we used for inlining.  We rely that clones inlined
1567      into master clone gets queued just before master clone so we don't
1568      need recursion.  */
1569   for (node = symtab->first_function (); node != master_clone;
1570        node = next)
1571     {
1572       next = symtab->next_function (node);
1573       if (node->global.inlined_to == master_clone)
1574         node->remove ();
1575     }
1576   master_clone->remove ();
1577   return true;
1578 }
1579
1580
1581 /* Given whole compilation unit estimate of INSNS, compute how large we can
1582    allow the unit to grow.  */
1583
1584 static int
1585 compute_max_insns (int insns)
1586 {
1587   int max_insns = insns;
1588   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1589     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1590
1591   return ((int64_t) max_insns
1592           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1593 }
1594
1595
1596 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1597
1598 static void
1599 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1600 {
1601   while (new_edges.length () > 0)
1602     {
1603       struct cgraph_edge *edge = new_edges.pop ();
1604
1605       gcc_assert (!edge->aux);
1606       if (edge->inline_failed
1607           && can_inline_edge_p (edge, true)
1608           && want_inline_small_function_p (edge, true)
1609           && can_inline_edge_by_limits_p (edge, true))
1610         edge->aux = heap->insert (edge_badness (edge, false), edge);
1611     }
1612 }
1613
1614 /* Remove EDGE from the fibheap.  */
1615
1616 static void
1617 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1618 {
1619   if (e->aux)
1620     {
1621       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1622       e->aux = NULL;
1623     }
1624 }
1625
1626 /* Return true if speculation of edge E seems useful.
1627    If ANTICIPATE_INLINING is true, be conservative and hope that E
1628    may get inlined.  */
1629
1630 bool
1631 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1632 {
1633   enum availability avail;
1634   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1635                                                                  e->caller);
1636   struct cgraph_edge *direct, *indirect;
1637   struct ipa_ref *ref;
1638
1639   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1640
1641   if (!e->maybe_hot_p ())
1642     return false;
1643
1644   /* See if IP optimizations found something potentially useful about the
1645      function.  For now we look only for CONST/PURE flags.  Almost everything
1646      else we propagate is useless.  */
1647   if (avail >= AVAIL_AVAILABLE)
1648     {
1649       int ecf_flags = flags_from_decl_or_type (target->decl);
1650       if (ecf_flags & ECF_CONST)
1651         {
1652           e->speculative_call_info (direct, indirect, ref);
1653           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1654             return true;
1655         }
1656       else if (ecf_flags & ECF_PURE)
1657         {
1658           e->speculative_call_info (direct, indirect, ref);
1659           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1660             return true;
1661         }
1662     }
1663   /* If we did not managed to inline the function nor redirect
1664      to an ipa-cp clone (that are seen by having local flag set),
1665      it is probably pointless to inline it unless hardware is missing
1666      indirect call predictor.  */
1667   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1668     return false;
1669   /* For overwritable targets there is not much to do.  */
1670   if (e->inline_failed
1671       && (!can_inline_edge_p (e, false)
1672           || !can_inline_edge_by_limits_p (e, false, true)))
1673     return false;
1674   /* OK, speculation seems interesting.  */
1675   return true;
1676 }
1677
1678 /* We know that EDGE is not going to be inlined.
1679    See if we can remove speculation.  */
1680
1681 static void
1682 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1683 {
1684   if (edge->speculative && !speculation_useful_p (edge, false))
1685     {
1686       struct cgraph_node *node = edge->caller;
1687       struct cgraph_node *where = node->global.inlined_to
1688                                   ? node->global.inlined_to : node;
1689       auto_bitmap updated_nodes;
1690
1691       if (edge->count.ipa ().initialized_p ())
1692         spec_rem += edge->count.ipa ();
1693       edge->resolve_speculation ();
1694       reset_edge_caches (where);
1695       ipa_update_overall_fn_summary (where);
1696       update_caller_keys (edge_heap, where,
1697                           updated_nodes, NULL);
1698       update_callee_keys (edge_heap, where,
1699                           updated_nodes);
1700     }
1701 }
1702
1703 /* Return true if NODE should be accounted for overall size estimate.
1704    Skip all nodes optimized for size so we can measure the growth of hot
1705    part of program no matter of the padding.  */
1706
1707 bool
1708 inline_account_function_p (struct cgraph_node *node)
1709 {
1710    return (!DECL_EXTERNAL (node->decl)
1711            && !opt_for_fn (node->decl, optimize_size)
1712            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1713 }
1714
1715 /* Count number of callers of NODE and store it into DATA (that
1716    points to int.  Worker for cgraph_for_node_and_aliases.  */
1717
1718 static bool
1719 sum_callers (struct cgraph_node *node, void *data)
1720 {
1721   struct cgraph_edge *e;
1722   int *num_calls = (int *)data;
1723
1724   for (e = node->callers; e; e = e->next_caller)
1725     (*num_calls)++;
1726   return false;
1727 }
1728
1729 /* We use greedy algorithm for inlining of small functions:
1730    All inline candidates are put into prioritized heap ordered in
1731    increasing badness.
1732
1733    The inlining of small functions is bounded by unit growth parameters.  */
1734
1735 static void
1736 inline_small_functions (void)
1737 {
1738   struct cgraph_node *node;
1739   struct cgraph_edge *edge;
1740   edge_heap_t edge_heap (sreal::min ());
1741   auto_bitmap updated_nodes;
1742   int min_size, max_size;
1743   auto_vec<cgraph_edge *> new_indirect_edges;
1744   int initial_size = 0;
1745   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1746   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1747   new_indirect_edges.create (8);
1748
1749   edge_removal_hook_holder
1750     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1751
1752   /* Compute overall unit size and other global parameters used by badness
1753      metrics.  */
1754
1755   max_count = profile_count::uninitialized ();
1756   ipa_reduced_postorder (order, true, true, NULL);
1757   free (order);
1758
1759   FOR_EACH_DEFINED_FUNCTION (node)
1760     if (!node->global.inlined_to)
1761       {
1762         if (!node->alias && node->analyzed
1763             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1764             && opt_for_fn (node->decl, optimize))
1765           {
1766             struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1767             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1768
1769             /* Do not account external functions, they will be optimized out
1770                if not inlined.  Also only count the non-cold portion of program.  */
1771             if (inline_account_function_p (node))
1772               initial_size += info->size;
1773             info->growth = estimate_growth (node);
1774
1775             int num_calls = 0;
1776             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1777                                                true);
1778             if (num_calls == 1)
1779               info->single_caller = true;
1780             if (dfs && dfs->next_cycle)
1781               {
1782                 struct cgraph_node *n2;
1783                 int id = dfs->scc_no + 1;
1784                 for (n2 = node; n2;
1785                      n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
1786                   if (opt_for_fn (n2->decl, optimize))
1787                     {
1788                       struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1789                       if (info2->scc_no)
1790                         break;
1791                       info2->scc_no = id;
1792                     }
1793               }
1794           }
1795
1796         for (edge = node->callers; edge; edge = edge->next_caller)
1797           max_count = max_count.max (edge->count.ipa ());
1798       }
1799   ipa_free_postorder_info ();
1800   initialize_growth_caches ();
1801
1802   if (dump_file)
1803     fprintf (dump_file,
1804              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1805              initial_size);
1806
1807   overall_size = initial_size;
1808   max_size = compute_max_insns (overall_size);
1809   min_size = overall_size;
1810
1811   /* Populate the heap with all edges we might inline.  */
1812
1813   FOR_EACH_DEFINED_FUNCTION (node)
1814     {
1815       bool update = false;
1816       struct cgraph_edge *next = NULL;
1817       bool has_speculative = false;
1818
1819       if (!opt_for_fn (node->decl, optimize))
1820         continue;
1821
1822       if (dump_file)
1823         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1824
1825       for (edge = node->callees; edge; edge = next)
1826         {
1827           next = edge->next_callee;
1828           if (edge->inline_failed
1829               && !edge->aux
1830               && can_inline_edge_p (edge, true)
1831               && want_inline_small_function_p (edge, true)
1832               && can_inline_edge_by_limits_p (edge, true)
1833               && edge->inline_failed)
1834             {
1835               gcc_assert (!edge->aux);
1836               update_edge_key (&edge_heap, edge);
1837             }
1838           if (edge->speculative)
1839             has_speculative = true;
1840         }
1841       if (has_speculative)
1842         for (edge = node->callees; edge; edge = next)
1843           if (edge->speculative && !speculation_useful_p (edge,
1844                                                           edge->aux != NULL))
1845             {
1846               edge->resolve_speculation ();
1847               update = true;
1848             }
1849       if (update)
1850         {
1851           struct cgraph_node *where = node->global.inlined_to
1852                                       ? node->global.inlined_to : node;
1853           ipa_update_overall_fn_summary (where);
1854           reset_edge_caches (where);
1855           update_caller_keys (&edge_heap, where,
1856                               updated_nodes, NULL);
1857           update_callee_keys (&edge_heap, where,
1858                               updated_nodes);
1859           bitmap_clear (updated_nodes);
1860         }
1861     }
1862
1863   gcc_assert (in_lto_p
1864               || !(max_count > 0)
1865               || (profile_info && flag_branch_probabilities));
1866
1867   while (!edge_heap.empty ())
1868     {
1869       int old_size = overall_size;
1870       struct cgraph_node *where, *callee;
1871       sreal badness = edge_heap.min_key ();
1872       sreal current_badness;
1873       int growth;
1874
1875       edge = edge_heap.extract_min ();
1876       gcc_assert (edge->aux);
1877       edge->aux = NULL;
1878       if (!edge->inline_failed || !edge->callee->analyzed)
1879         continue;
1880
1881 #if CHECKING_P
1882       /* Be sure that caches are maintained consistent.
1883          This check is affected by scaling roundoff errors when compiling for
1884          IPA this we skip it in that case.  */
1885       if (!edge->callee->count.ipa_p ()
1886           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1887         {
1888           sreal cached_badness = edge_badness (edge, false);
1889
1890           int old_size_est = estimate_edge_size (edge);
1891           sreal old_time_est = estimate_edge_time (edge);
1892           int old_hints_est = estimate_edge_hints (edge);
1893
1894           reset_edge_growth_cache (edge);
1895           gcc_assert (old_size_est == estimate_edge_size (edge));
1896           gcc_assert (old_time_est == estimate_edge_time (edge));
1897           /* FIXME:
1898
1899              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1900
1901              fails with profile feedback because some hints depends on
1902              maybe_hot_edge_p predicate and because callee gets inlined to other
1903              calls, the edge may become cold.
1904              This ought to be fixed by computing relative probabilities
1905              for given invocation but that will be better done once whole
1906              code is converted to sreals.  Disable for now and revert to "wrong"
1907              value so enable/disable checking paths agree.  */
1908           edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1909
1910           /* When updating the edge costs, we only decrease badness in the keys.
1911              Increases of badness are handled lazilly; when we see key with out
1912              of date value on it, we re-insert it now.  */
1913           current_badness = edge_badness (edge, false);
1914           gcc_assert (cached_badness == current_badness);
1915           gcc_assert (current_badness >= badness);
1916         }
1917       else
1918         current_badness = edge_badness (edge, false);
1919 #else
1920       current_badness = edge_badness (edge, false);
1921 #endif
1922       if (current_badness != badness)
1923         {
1924           if (edge_heap.min () && current_badness > edge_heap.min_key ())
1925             {
1926               edge->aux = edge_heap.insert (current_badness, edge);
1927               continue;
1928             }
1929           else
1930             badness = current_badness;
1931         }
1932
1933       if (!can_inline_edge_p (edge, true)
1934           || !can_inline_edge_by_limits_p (edge, true))
1935         {
1936           resolve_noninline_speculation (&edge_heap, edge);
1937           continue;
1938         }
1939
1940       callee = edge->callee->ultimate_alias_target ();
1941       growth = estimate_edge_growth (edge);
1942       if (dump_file)
1943         {
1944           fprintf (dump_file,
1945                    "\nConsidering %s with %i size\n",
1946                    callee->dump_name (),
1947                    ipa_fn_summaries->get (callee)->size);
1948           fprintf (dump_file,
1949                    " to be inlined into %s in %s:%i\n"
1950                    " Estimated badness is %f, frequency %.2f.\n",
1951                    edge->caller->dump_name (),
1952                    edge->call_stmt
1953                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
1954                                                         edge->call_stmt))
1955                        > BUILTINS_LOCATION)
1956                    ? gimple_filename ((const gimple *) edge->call_stmt)
1957                    : "unknown",
1958                    edge->call_stmt
1959                    ? gimple_lineno ((const gimple *) edge->call_stmt)
1960                    : -1,
1961                    badness.to_double (),
1962                    edge->sreal_frequency ().to_double ());
1963           if (edge->count.ipa ().initialized_p ())
1964             {
1965               fprintf (dump_file, " Called ");
1966               edge->count.ipa ().dump (dump_file);
1967               fprintf (dump_file, " times\n");
1968             }
1969           if (dump_flags & TDF_DETAILS)
1970             edge_badness (edge, true);
1971         }
1972
1973       if (overall_size + growth > max_size
1974           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1975         {
1976           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1977           report_inline_failed_reason (edge);
1978           resolve_noninline_speculation (&edge_heap, edge);
1979           continue;
1980         }
1981
1982       if (!want_inline_small_function_p (edge, true))
1983         {
1984           resolve_noninline_speculation (&edge_heap, edge);
1985           continue;
1986         }
1987
1988       /* Heuristics for inlining small functions work poorly for
1989          recursive calls where we do effects similar to loop unrolling.
1990          When inlining such edge seems profitable, leave decision on
1991          specific inliner.  */
1992       if (edge->recursive_p ())
1993         {
1994           where = edge->caller;
1995           if (where->global.inlined_to)
1996             where = where->global.inlined_to;
1997           if (!recursive_inlining (edge,
1998                                    opt_for_fn (edge->caller->decl,
1999                                                flag_indirect_inlining)
2000                                    ? &new_indirect_edges : NULL))
2001             {
2002               edge->inline_failed = CIF_RECURSIVE_INLINING;
2003               resolve_noninline_speculation (&edge_heap, edge);
2004               continue;
2005             }
2006           reset_edge_caches (where);
2007           /* Recursive inliner inlines all recursive calls of the function
2008              at once. Consequently we need to update all callee keys.  */
2009           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2010             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2011           update_callee_keys (&edge_heap, where, updated_nodes);
2012           bitmap_clear (updated_nodes);
2013         }
2014       else
2015         {
2016           struct cgraph_node *outer_node = NULL;
2017           int depth = 0;
2018
2019           /* Consider the case where self recursive function A is inlined
2020              into B.  This is desired optimization in some cases, since it
2021              leads to effect similar of loop peeling and we might completely
2022              optimize out the recursive call.  However we must be extra
2023              selective.  */
2024
2025           where = edge->caller;
2026           while (where->global.inlined_to)
2027             {
2028               if (where->decl == callee->decl)
2029                 outer_node = where, depth++;
2030               where = where->callers->caller;
2031             }
2032           if (outer_node
2033               && !want_inline_self_recursive_call_p (edge, outer_node,
2034                                                      true, depth))
2035             {
2036               edge->inline_failed
2037                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2038                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2039               resolve_noninline_speculation (&edge_heap, edge);
2040               continue;
2041             }
2042           else if (depth && dump_file)
2043             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2044
2045           gcc_checking_assert (!callee->global.inlined_to);
2046           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2047           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2048
2049           reset_edge_caches (edge->callee);
2050
2051           update_callee_keys (&edge_heap, where, updated_nodes);
2052         }
2053       where = edge->caller;
2054       if (where->global.inlined_to)
2055         where = where->global.inlined_to;
2056
2057       /* Our profitability metric can depend on local properties
2058          such as number of inlinable calls and size of the function body.
2059          After inlining these properties might change for the function we
2060          inlined into (since it's body size changed) and for the functions
2061          called by function we inlined (since number of it inlinable callers
2062          might change).  */
2063       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2064       /* Offline copy count has possibly changed, recompute if profile is
2065          available.  */
2066       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2067       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2068         update_callee_keys (&edge_heap, n, updated_nodes);
2069       bitmap_clear (updated_nodes);
2070
2071       if (dump_file)
2072         {
2073           fprintf (dump_file,
2074                    " Inlined %s into %s which now has time %f and size %i, "
2075                    "net change of %+i.\n",
2076                    xstrdup_for_dump (edge->callee->name ()),
2077                    xstrdup_for_dump (edge->caller->name ()),
2078                    ipa_fn_summaries->get (edge->caller)->time.to_double (),
2079                    ipa_fn_summaries->get (edge->caller)->size,
2080                    overall_size - old_size);
2081         }
2082       if (min_size > overall_size)
2083         {
2084           min_size = overall_size;
2085           max_size = compute_max_insns (min_size);
2086
2087           if (dump_file)
2088             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2089         }
2090     }
2091
2092   free_growth_caches ();
2093   if (dump_file)
2094     fprintf (dump_file,
2095              "Unit growth for small function inlining: %i->%i (%i%%)\n",
2096              initial_size, overall_size,
2097              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2098   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2099 }
2100
2101 /* Flatten NODE.  Performed both during early inlining and
2102    at IPA inlining time.  */
2103
2104 static void
2105 flatten_function (struct cgraph_node *node, bool early)
2106 {
2107   struct cgraph_edge *e;
2108
2109   /* We shouldn't be called recursively when we are being processed.  */
2110   gcc_assert (node->aux == NULL);
2111
2112   node->aux = (void *) node;
2113
2114   for (e = node->callees; e; e = e->next_callee)
2115     {
2116       struct cgraph_node *orig_callee;
2117       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2118
2119       /* We've hit cycle?  It is time to give up.  */
2120       if (callee->aux)
2121         {
2122           if (dump_file)
2123             fprintf (dump_file,
2124                      "Not inlining %s into %s to avoid cycle.\n",
2125                      xstrdup_for_dump (callee->name ()),
2126                      xstrdup_for_dump (e->caller->name ()));
2127           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2128             e->inline_failed = CIF_RECURSIVE_INLINING;
2129           continue;
2130         }
2131
2132       /* When the edge is already inlined, we just need to recurse into
2133          it in order to fully flatten the leaves.  */
2134       if (!e->inline_failed)
2135         {
2136           flatten_function (callee, early);
2137           continue;
2138         }
2139
2140       /* Flatten attribute needs to be processed during late inlining. For
2141          extra code quality we however do flattening during early optimization,
2142          too.  */
2143       if (!early
2144           ? !can_inline_edge_p (e, true)
2145             && !can_inline_edge_by_limits_p (e, true)
2146           : !can_early_inline_edge_p (e))
2147         continue;
2148
2149       if (e->recursive_p ())
2150         {
2151           if (dump_file)
2152             fprintf (dump_file, "Not inlining: recursive call.\n");
2153           continue;
2154         }
2155
2156       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2157           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2158         {
2159           if (dump_file)
2160             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2161           continue;
2162         }
2163
2164       /* Inline the edge and flatten the inline clone.  Avoid
2165          recursing through the original node if the node was cloned.  */
2166       if (dump_file)
2167         fprintf (dump_file, " Inlining %s into %s.\n",
2168                  xstrdup_for_dump (callee->name ()),
2169                  xstrdup_for_dump (e->caller->name ()));
2170       orig_callee = callee;
2171       inline_call (e, true, NULL, NULL, false);
2172       if (e->callee != orig_callee)
2173         orig_callee->aux = (void *) node;
2174       flatten_function (e->callee, early);
2175       if (e->callee != orig_callee)
2176         orig_callee->aux = NULL;
2177     }
2178
2179   node->aux = NULL;
2180   if (!node->global.inlined_to)
2181     ipa_update_overall_fn_summary (node);
2182 }
2183
2184 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2185    DATA points to number of calls originally found so we avoid infinite
2186    recursion.  */
2187
2188 static bool
2189 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2190                          hash_set<cgraph_node *> *callers)
2191 {
2192   int *num_calls = (int *)data;
2193   bool callee_removed = false;
2194
2195   while (node->callers && !node->global.inlined_to)
2196     {
2197       struct cgraph_node *caller = node->callers->caller;
2198
2199       if (!can_inline_edge_p (node->callers, true)
2200           || !can_inline_edge_by_limits_p (node->callers, true)
2201           || node->callers->recursive_p ())
2202         {
2203           if (dump_file)
2204             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2205           *num_calls = 0;
2206           return false;
2207         }
2208
2209       if (dump_file)
2210         {
2211           fprintf (dump_file,
2212                    "\nInlining %s size %i.\n",
2213                    node->name (),
2214                    ipa_fn_summaries->get (node)->size);
2215           fprintf (dump_file,
2216                    " Called once from %s %i insns.\n",
2217                    node->callers->caller->name (),
2218                    ipa_fn_summaries->get (node->callers->caller)->size);
2219         }
2220
2221       /* Remember which callers we inlined to, delaying updating the
2222          overall summary.  */
2223       callers->add (node->callers->caller);
2224       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2225       if (dump_file)
2226         fprintf (dump_file,
2227                  " Inlined into %s which now has %i size\n",
2228                  caller->name (),
2229                  ipa_fn_summaries->get (caller)->size);
2230       if (!(*num_calls)--)
2231         {
2232           if (dump_file)
2233             fprintf (dump_file, "New calls found; giving up.\n");
2234           return callee_removed;
2235         }
2236       if (callee_removed)
2237         return true;
2238     }
2239   return false;
2240 }
2241
2242 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2243    update.  */
2244
2245 static bool
2246 inline_to_all_callers (struct cgraph_node *node, void *data)
2247 {
2248   hash_set<cgraph_node *> callers;
2249   bool res = inline_to_all_callers_1 (node, data, &callers);
2250   /* Perform the delayed update of the overall summary of all callers
2251      processed.  This avoids quadratic behavior in the cases where
2252      we have a lot of calls to the same function.  */
2253   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2254        i != callers.end (); ++i)
2255     ipa_update_overall_fn_summary (*i);
2256   return res;
2257 }
2258
2259 /* Output overall time estimate.  */
2260 static void
2261 dump_overall_stats (void)
2262 {
2263   sreal sum_weighted = 0, sum = 0;
2264   struct cgraph_node *node;
2265
2266   FOR_EACH_DEFINED_FUNCTION (node)
2267     if (!node->global.inlined_to
2268         && !node->alias)
2269       {
2270         sreal time = ipa_fn_summaries->get (node)->time;
2271         sum += time;
2272         if (node->count.ipa ().initialized_p ())
2273           sum_weighted += time * node->count.ipa ().to_gcov_type ();
2274       }
2275   fprintf (dump_file, "Overall time estimate: "
2276            "%f weighted by profile: "
2277            "%f\n", sum.to_double (), sum_weighted.to_double ());
2278 }
2279
2280 /* Output some useful stats about inlining.  */
2281
2282 static void
2283 dump_inline_stats (void)
2284 {
2285   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2286   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2287   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2288   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2289   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2290   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2291   int64_t reason[CIF_N_REASONS][2];
2292   sreal reason_freq[CIF_N_REASONS];
2293   int i;
2294   struct cgraph_node *node;
2295
2296   memset (reason, 0, sizeof (reason));
2297   for (i=0; i < CIF_N_REASONS; i++)
2298     reason_freq[i] = 0;
2299   FOR_EACH_DEFINED_FUNCTION (node)
2300   {
2301     struct cgraph_edge *e;
2302     for (e = node->callees; e; e = e->next_callee)
2303       {
2304         if (e->inline_failed)
2305           {
2306             if (e->count.ipa ().initialized_p ())
2307               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2308             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2309             reason[(int) e->inline_failed][1] ++;
2310             if (DECL_VIRTUAL_P (e->callee->decl)
2311                 && e->count.ipa ().initialized_p ())
2312               {
2313                 if (e->indirect_inlining_edge)
2314                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2315                 else
2316                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2317               }
2318             else if (e->count.ipa ().initialized_p ())
2319               {
2320                 if (e->indirect_inlining_edge)
2321                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2322                 else
2323                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2324               }
2325           }
2326         else if (e->count.ipa ().initialized_p ())
2327           {
2328             if (e->speculative)
2329               {
2330                 if (DECL_VIRTUAL_P (e->callee->decl))
2331                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2332                 else
2333                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2334               }
2335             else if (DECL_VIRTUAL_P (e->callee->decl))
2336               {
2337                 if (e->indirect_inlining_edge)
2338                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2339                 else
2340                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2341               }
2342             else
2343               {
2344                 if (e->indirect_inlining_edge)
2345                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2346                 else
2347                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2348               }
2349           }
2350       }
2351     for (e = node->indirect_calls; e; e = e->next_callee)
2352       if (e->indirect_info->polymorphic
2353           & e->count.ipa ().initialized_p ())
2354         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2355       else if (e->count.ipa ().initialized_p ())
2356         indirect_cnt += e->count.ipa ().to_gcov_type ();
2357   }
2358   if (max_count.initialized_p ())
2359     {
2360       fprintf (dump_file,
2361                "Inlined %" PRId64 " + speculative "
2362                "%" PRId64 " + speculative polymorphic "
2363                "%" PRId64 " + previously indirect "
2364                "%" PRId64 " + virtual "
2365                "%" PRId64 " + virtual and previously indirect "
2366                "%" PRId64 "\n" "Not inlined "
2367                "%" PRId64 " + previously indirect "
2368                "%" PRId64 " + virtual "
2369                "%" PRId64 " + virtual and previously indirect "
2370                "%" PRId64 " + stil indirect "
2371                "%" PRId64 " + still indirect polymorphic "
2372                "%" PRId64 "\n", inlined_cnt,
2373                inlined_speculative, inlined_speculative_ply,
2374                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2375                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2376                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2377       fprintf (dump_file, "Removed speculations ");
2378       spec_rem.dump (dump_file);
2379       fprintf (dump_file, "\n");
2380     }
2381   dump_overall_stats ();
2382   fprintf (dump_file, "\nWhy inlining failed?\n");
2383   for (i = 0; i < CIF_N_REASONS; i++)
2384     if (reason[i][1])
2385       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2386                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2387                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2388 }
2389
2390 /* Called when node is removed.  */
2391
2392 static void
2393 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2394 {
2395   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2396     return;
2397
2398   hash_set<struct cgraph_node *> *removed
2399     = (hash_set<struct cgraph_node *> *) data;
2400   removed->add (node);
2401 }
2402
2403 /* Decide on the inlining.  We do so in the topological order to avoid
2404    expenses on updating data structures.  */
2405
2406 static unsigned int
2407 ipa_inline (void)
2408 {
2409   struct cgraph_node *node;
2410   int nnodes;
2411   struct cgraph_node **order;
2412   int i, j;
2413   int cold;
2414   bool remove_functions = false;
2415
2416   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2417
2418   if (dump_file)
2419     ipa_dump_fn_summaries (dump_file);
2420
2421   nnodes = ipa_reverse_postorder (order);
2422   spec_rem = profile_count::zero ();
2423
2424   FOR_EACH_FUNCTION (node)
2425     {
2426       node->aux = 0;
2427
2428       /* Recompute the default reasons for inlining because they may have
2429          changed during merging.  */
2430       if (in_lto_p)
2431         {
2432           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2433             {
2434               gcc_assert (e->inline_failed);
2435               initialize_inline_failed (e);
2436             }
2437           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2438             initialize_inline_failed (e);
2439         }
2440     }
2441
2442   if (dump_file)
2443     fprintf (dump_file, "\nFlattening functions:\n");
2444
2445   /* First shrink order array, so that it only contains nodes with
2446      flatten attribute.  */
2447   for (i = nnodes - 1, j = i; i >= 0; i--)
2448     {
2449       node = order[i];
2450       if (lookup_attribute ("flatten",
2451                             DECL_ATTRIBUTES (node->decl)) != NULL)
2452         order[j--] = order[i];
2453     }
2454
2455   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2456      nodes with flatten attribute.  If there is more than one such
2457      node, we need to register a node removal hook, as flatten_function
2458      could remove other nodes with flatten attribute.  See PR82801.  */
2459   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2460   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2461   if (j < nnodes - 2)
2462     {
2463       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2464       node_removal_hook_holder
2465         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2466                                            flatten_removed_nodes);
2467     }
2468
2469   /* In the first pass handle functions to be flattened.  Do this with
2470      a priority so none of our later choices will make this impossible.  */
2471   for (i = nnodes - 1; i > j; i--)
2472     {
2473       node = order[i];
2474       if (flatten_removed_nodes
2475           && flatten_removed_nodes->contains (node))
2476         continue;
2477
2478       /* Handle nodes to be flattened.
2479          Ideally when processing callees we stop inlining at the
2480          entry of cycles, possibly cloning that entry point and
2481          try to flatten itself turning it into a self-recursive
2482          function.  */
2483       if (dump_file)
2484         fprintf (dump_file, "Flattening %s\n", node->name ());
2485       flatten_function (node, false);
2486     }
2487
2488   if (j < nnodes - 2)
2489     {
2490       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2491       delete flatten_removed_nodes;
2492     }
2493   free (order);
2494
2495   if (dump_file)
2496     dump_overall_stats ();
2497
2498   inline_small_functions ();
2499
2500   gcc_assert (symtab->state == IPA_SSA);
2501   symtab->state = IPA_SSA_AFTER_INLINING;
2502   /* Do first after-inlining removal.  We want to remove all "stale" extern
2503      inline functions and virtual functions so we really know what is called
2504      once.  */
2505   symtab->remove_unreachable_nodes (dump_file);
2506
2507   /* Inline functions with a property that after inlining into all callers the
2508      code size will shrink because the out-of-line copy is eliminated.
2509      We do this regardless on the callee size as long as function growth limits
2510      are met.  */
2511   if (dump_file)
2512     fprintf (dump_file,
2513              "\nDeciding on functions to be inlined into all callers and "
2514              "removing useless speculations:\n");
2515
2516   /* Inlining one function called once has good chance of preventing
2517      inlining other function into the same callee.  Ideally we should
2518      work in priority order, but probably inlining hot functions first
2519      is good cut without the extra pain of maintaining the queue.
2520
2521      ??? this is not really fitting the bill perfectly: inlining function
2522      into callee often leads to better optimization of callee due to
2523      increased context for optimization.
2524      For example if main() function calls a function that outputs help
2525      and then function that does the main optmization, we should inline
2526      the second with priority even if both calls are cold by themselves.
2527
2528      We probably want to implement new predicate replacing our use of
2529      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2530      to be hot.  */
2531   for (cold = 0; cold <= 1; cold ++)
2532     {
2533       FOR_EACH_DEFINED_FUNCTION (node)
2534         {
2535           struct cgraph_edge *edge, *next;
2536           bool update=false;
2537
2538           if (!opt_for_fn (node->decl, optimize)
2539               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2540             continue;
2541
2542           for (edge = node->callees; edge; edge = next)
2543             {
2544               next = edge->next_callee;
2545               if (edge->speculative && !speculation_useful_p (edge, false))
2546                 {
2547                   if (edge->count.ipa ().initialized_p ())
2548                     spec_rem += edge->count.ipa ();
2549                   edge->resolve_speculation ();
2550                   update = true;
2551                   remove_functions = true;
2552                 }
2553             }
2554           if (update)
2555             {
2556               struct cgraph_node *where = node->global.inlined_to
2557                                           ? node->global.inlined_to : node;
2558               reset_edge_caches (where);
2559               ipa_update_overall_fn_summary (where);
2560             }
2561           if (want_inline_function_to_all_callers_p (node, cold))
2562             {
2563               int num_calls = 0;
2564               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2565                                                  true);
2566               while (node->call_for_symbol_and_aliases
2567                        (inline_to_all_callers, &num_calls, true))
2568                 ;
2569               remove_functions = true;
2570             }
2571         }
2572     }
2573
2574   /* Free ipa-prop structures if they are no longer needed.  */
2575   ipa_free_all_structures_after_iinln ();
2576
2577   if (dump_file)
2578     {
2579       fprintf (dump_file,
2580                "\nInlined %i calls, eliminated %i functions\n\n",
2581                ncalls_inlined, nfunctions_inlined);
2582       dump_inline_stats ();
2583     }
2584
2585   if (dump_file)
2586     ipa_dump_fn_summaries (dump_file);
2587   return remove_functions ? TODO_remove_functions : 0;
2588 }
2589
2590 /* Inline always-inline function calls in NODE.  */
2591
2592 static bool
2593 inline_always_inline_functions (struct cgraph_node *node)
2594 {
2595   struct cgraph_edge *e;
2596   bool inlined = false;
2597
2598   for (e = node->callees; e; e = e->next_callee)
2599     {
2600       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2601       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2602         continue;
2603
2604       if (e->recursive_p ())
2605         {
2606           if (dump_file)
2607             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2608                      e->callee->name ());
2609           e->inline_failed = CIF_RECURSIVE_INLINING;
2610           continue;
2611         }
2612
2613       if (!can_early_inline_edge_p (e))
2614         {
2615           /* Set inlined to true if the callee is marked "always_inline" but
2616              is not inlinable.  This will allow flagging an error later in
2617              expand_call_inline in tree-inline.c.  */
2618           if (lookup_attribute ("always_inline",
2619                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2620             inlined = true;
2621           continue;
2622         }
2623
2624       if (dump_file)
2625         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2626                  xstrdup_for_dump (e->callee->name ()),
2627                  xstrdup_for_dump (e->caller->name ()));
2628       inline_call (e, true, NULL, NULL, false);
2629       inlined = true;
2630     }
2631   if (inlined)
2632     ipa_update_overall_fn_summary (node);
2633
2634   return inlined;
2635 }
2636
2637 /* Decide on the inlining.  We do so in the topological order to avoid
2638    expenses on updating data structures.  */
2639
2640 static bool
2641 early_inline_small_functions (struct cgraph_node *node)
2642 {
2643   struct cgraph_edge *e;
2644   bool inlined = false;
2645
2646   for (e = node->callees; e; e = e->next_callee)
2647     {
2648       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2649       if (!ipa_fn_summaries->get (callee)->inlinable
2650           || !e->inline_failed)
2651         continue;
2652
2653       /* Do not consider functions not declared inline.  */
2654       if (!DECL_DECLARED_INLINE_P (callee->decl)
2655           && !opt_for_fn (node->decl, flag_inline_small_functions)
2656           && !opt_for_fn (node->decl, flag_inline_functions))
2657         continue;
2658
2659       if (dump_file)
2660         fprintf (dump_file, "Considering inline candidate %s.\n",
2661                  callee->name ());
2662
2663       if (!can_early_inline_edge_p (e))
2664         continue;
2665
2666       if (e->recursive_p ())
2667         {
2668           if (dump_file)
2669             fprintf (dump_file, "  Not inlining: recursive call.\n");
2670           continue;
2671         }
2672
2673       if (!want_early_inline_function_p (e))
2674         continue;
2675
2676       if (dump_file)
2677         fprintf (dump_file, " Inlining %s into %s.\n",
2678                  xstrdup_for_dump (callee->name ()),
2679                  xstrdup_for_dump (e->caller->name ()));
2680       inline_call (e, true, NULL, NULL, false);
2681       inlined = true;
2682     }
2683
2684   if (inlined)
2685     ipa_update_overall_fn_summary (node);
2686
2687   return inlined;
2688 }
2689
2690 unsigned int
2691 early_inliner (function *fun)
2692 {
2693   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2694   struct cgraph_edge *edge;
2695   unsigned int todo = 0;
2696   int iterations = 0;
2697   bool inlined = false;
2698
2699   if (seen_error ())
2700     return 0;
2701
2702   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2703      happens when some pass decides to construct new function and
2704      cgraph_add_new_function calls lowering passes and early optimization on
2705      it.  This may confuse ourself when early inliner decide to inline call to
2706      function clone, because function clones don't have parameter list in
2707      ipa-prop matching their signature.  */
2708   if (ipa_node_params_sum)
2709     return 0;
2710
2711   if (flag_checking)
2712     node->verify ();
2713   node->remove_all_references ();
2714
2715   /* Rebuild this reference because it dosn't depend on
2716      function's body and it's required to pass cgraph_node
2717      verification.  */
2718   if (node->instrumented_version
2719       && !node->instrumentation_clone)
2720     node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2721
2722   /* Even when not optimizing or not inlining inline always-inline
2723      functions.  */
2724   inlined = inline_always_inline_functions (node);
2725
2726   if (!optimize
2727       || flag_no_inline
2728       || !flag_early_inlining
2729       /* Never inline regular functions into always-inline functions
2730          during incremental inlining.  This sucks as functions calling
2731          always inline functions will get less optimized, but at the
2732          same time inlining of functions calling always inline
2733          function into an always inline function might introduce
2734          cycles of edges to be always inlined in the callgraph.
2735
2736          We might want to be smarter and just avoid this type of inlining.  */
2737       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2738           && lookup_attribute ("always_inline",
2739                                DECL_ATTRIBUTES (node->decl))))
2740     ;
2741   else if (lookup_attribute ("flatten",
2742                              DECL_ATTRIBUTES (node->decl)) != NULL)
2743     {
2744       /* When the function is marked to be flattened, recursively inline
2745          all calls in it.  */
2746       if (dump_file)
2747         fprintf (dump_file,
2748                  "Flattening %s\n", node->name ());
2749       flatten_function (node, true);
2750       inlined = true;
2751     }
2752   else
2753     {
2754       /* If some always_inline functions was inlined, apply the changes.
2755          This way we will not account always inline into growth limits and
2756          moreover we will inline calls from always inlines that we skipped
2757          previously because of conditional above.  */
2758       if (inlined)
2759         {
2760           timevar_push (TV_INTEGRATION);
2761           todo |= optimize_inline_calls (current_function_decl);
2762           /* optimize_inline_calls call above might have introduced new
2763              statements that don't have inline parameters computed.  */
2764           for (edge = node->callees; edge; edge = edge->next_callee)
2765             {
2766               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2767               es->call_stmt_size
2768                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2769               es->call_stmt_time
2770                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2771             }
2772           ipa_update_overall_fn_summary (node);
2773           inlined = false;
2774           timevar_pop (TV_INTEGRATION);
2775         }
2776       /* We iterate incremental inlining to get trivial cases of indirect
2777          inlining.  */
2778       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2779              && early_inline_small_functions (node))
2780         {
2781           timevar_push (TV_INTEGRATION);
2782           todo |= optimize_inline_calls (current_function_decl);
2783
2784           /* Technically we ought to recompute inline parameters so the new
2785              iteration of early inliner works as expected.  We however have
2786              values approximately right and thus we only need to update edge
2787              info that might be cleared out for newly discovered edges.  */
2788           for (edge = node->callees; edge; edge = edge->next_callee)
2789             {
2790               /* We have no summary for new bound store calls yet.  */
2791               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2792               es->call_stmt_size
2793                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2794               es->call_stmt_time
2795                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2796
2797               if (edge->callee->decl
2798                   && !gimple_check_call_matching_types (
2799                       edge->call_stmt, edge->callee->decl, false))
2800                 {
2801                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2802                   edge->call_stmt_cannot_inline_p = true;
2803                 }
2804             }
2805           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2806             ipa_update_overall_fn_summary (node);
2807           timevar_pop (TV_INTEGRATION);
2808           iterations++;
2809           inlined = false;
2810         }
2811       if (dump_file)
2812         fprintf (dump_file, "Iterations: %i\n", iterations);
2813     }
2814
2815   if (inlined)
2816     {
2817       timevar_push (TV_INTEGRATION);
2818       todo |= optimize_inline_calls (current_function_decl);
2819       timevar_pop (TV_INTEGRATION);
2820     }
2821
2822   fun->always_inline_functions_inlined = true;
2823
2824   return todo;
2825 }
2826
2827 /* Do inlining of small functions.  Doing so early helps profiling and other
2828    passes to be somewhat more effective and avoids some code duplication in
2829    later real inlining pass for testcases with very many function calls.  */
2830
2831 namespace {
2832
2833 const pass_data pass_data_early_inline =
2834 {
2835   GIMPLE_PASS, /* type */
2836   "einline", /* name */
2837   OPTGROUP_INLINE, /* optinfo_flags */
2838   TV_EARLY_INLINING, /* tv_id */
2839   PROP_ssa, /* properties_required */
2840   0, /* properties_provided */
2841   0, /* properties_destroyed */
2842   0, /* todo_flags_start */
2843   0, /* todo_flags_finish */
2844 };
2845
2846 class pass_early_inline : public gimple_opt_pass
2847 {
2848 public:
2849   pass_early_inline (gcc::context *ctxt)
2850     : gimple_opt_pass (pass_data_early_inline, ctxt)
2851   {}
2852
2853   /* opt_pass methods: */
2854   virtual unsigned int execute (function *);
2855
2856 }; // class pass_early_inline
2857
2858 unsigned int
2859 pass_early_inline::execute (function *fun)
2860 {
2861   return early_inliner (fun);
2862 }
2863
2864 } // anon namespace
2865
2866 gimple_opt_pass *
2867 make_pass_early_inline (gcc::context *ctxt)
2868 {
2869   return new pass_early_inline (ctxt);
2870 }
2871
2872 namespace {
2873
2874 const pass_data pass_data_ipa_inline =
2875 {
2876   IPA_PASS, /* type */
2877   "inline", /* name */
2878   OPTGROUP_INLINE, /* optinfo_flags */
2879   TV_IPA_INLINING, /* tv_id */
2880   0, /* properties_required */
2881   0, /* properties_provided */
2882   0, /* properties_destroyed */
2883   0, /* todo_flags_start */
2884   ( TODO_dump_symtab ), /* todo_flags_finish */
2885 };
2886
2887 class pass_ipa_inline : public ipa_opt_pass_d
2888 {
2889 public:
2890   pass_ipa_inline (gcc::context *ctxt)
2891     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2892                       NULL, /* generate_summary */
2893                       NULL, /* write_summary */
2894                       NULL, /* read_summary */
2895                       NULL, /* write_optimization_summary */
2896                       NULL, /* read_optimization_summary */
2897                       NULL, /* stmt_fixup */
2898                       0, /* function_transform_todo_flags_start */
2899                       inline_transform, /* function_transform */
2900                       NULL) /* variable_transform */
2901   {}
2902
2903   /* opt_pass methods: */
2904   virtual unsigned int execute (function *) { return ipa_inline (); }
2905
2906 }; // class pass_ipa_inline
2907
2908 } // anon namespace
2909
2910 ipa_opt_pass_d *
2911 make_pass_ipa_inline (gcc::context *ctxt)
2912 {
2913   return new pass_ipa_inline (ctxt);
2914 }