gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "tree-vrp.h"
 112 #include "ipa-prop.h"
 113 #include "ipa-fnsummary.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117 #include "auto-profile.h"
 118 #include "builtins.h"
 119 #include "fibonacci_heap.h"
 120 #include "stringpool.h"
 121 #include "attribs.h"
 122 #include "asan.h"
 123
 124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 126
 127 /* Statistics we collect about inlining algorithm.  */
 128 static int overall_size;
 129 static profile_count max_count;
 130 static profile_count spec_rem;
 131
 132 /* Return false when inlining edge E would lead to violating
 133    limits on function unit growth or stack usage growth.
 134
 135    The relative function body growth limit is present generally
 136    to avoid problems with non-linear behavior of the compiler.
 137    To allow inlining huge functions into tiny wrapper, the limit
 138    is always based on the bigger of the two functions considered.
 139
 140    For stack growth limits we always base the growth in stack usage
 141    of the callers.  We want to prevent applications from segfaulting
 142    on stack overflow when functions with huge stack frames gets
 143    inlined. */
 144
 145 static bool
 146 caller_growth_limits (struct cgraph_edge *e)
 147 {
 148   struct cgraph_node *to = e->caller;
 149   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 150   int newsize;
 151   int limit = 0;
 152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 153   ipa_fn_summary *info, *what_info, *outer_info = ipa_fn_summaries->get (to);
 154
 155   /* Look for function e->caller is inlined to.  While doing
 156      so work out the largest function body on the way.  As
 157      described above, we want to base our function growth
 158      limits based on that.  Not on the self size of the
 159      outer function, not on the self size of inline code
 160      we immediately inline to.  This is the most relaxed
 161      interpretation of the rule "do not grow large functions
 162      too much in order to prevent compiler from exploding".  */
 163   while (true)
 164     {
 165       info = ipa_fn_summaries->get (to);
 166       if (limit < info->self_size)
 167         limit = info->self_size;
 168       if (stack_size_limit < info->estimated_self_stack_size)
 169         stack_size_limit = info->estimated_self_stack_size;
 170       if (to->global.inlined_to)
 171         to = to->callers->caller;
 172       else
 173         break;
 174     }
 175
 176   what_info = ipa_fn_summaries->get (what);
 177
 178   if (limit < what_info->self_size)
 179     limit = what_info->self_size;
 180
 181   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= info->size
 187       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 204
 205   inlined_stack = (outer_info->stack_frame_offset
 206                    + outer_info->estimated_self_stack_size
 207                    + what_info->estimated_stack_size);
 208   /* Check new stack consumption with stack consumption at the place
 209      stack is used.  */
 210   if (inlined_stack > stack_size_limit
 211       /* If function already has large stack usage from sibling
 212          inline call, we can inline, too.
 213          This bit overoptimistically assume that we are good at stack
 214          packing.  */
 215       && inlined_stack > info->estimated_stack_size
 216       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 217     {
 218       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 219       return false;
 220     }
 221   return true;
 222 }
 223
 224 /* Dump info about why inlining has failed.  */
 225
 226 static void
 227 report_inline_failed_reason (struct cgraph_edge *e)
 228 {
 229   if (dump_file)
 230     {
 231       fprintf (dump_file, "  not inlinable: %s -> %s, %s\n",
 232                e->caller->dump_name (),
 233                e->callee->dump_name (),
 234                cgraph_inline_failed_string (e->inline_failed));
 235       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 236            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 237           && e->caller->lto_file_data
 238           && e->callee->ultimate_alias_target ()->lto_file_data)
 239         {
 240           fprintf (dump_file, "  LTO objects: %s, %s\n",
 241                    e->caller->lto_file_data->file_name,
 242                    e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 243         }
 244       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 245         cl_target_option_print_diff
 246          (dump_file, 2, target_opts_for_fn (e->caller->decl),
 247           target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 248       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 249         cl_optimization_print_diff
 250           (dump_file, 2, opts_for_fn (e->caller->decl),
 251            opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 252     }
 253 }
 254
 255  /* Decide whether sanitizer-related attributes allow inlining. */
 256
 257 static bool
 258 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 259 {
 260   if (!caller || !callee)
 261     return true;
 262
 263   return ((sanitize_flags_p (SANITIZE_ADDRESS, caller)
 264            == sanitize_flags_p (SANITIZE_ADDRESS, callee))
 265           && (sanitize_flags_p (SANITIZE_POINTER_COMPARE, caller)
 266               == sanitize_flags_p (SANITIZE_POINTER_COMPARE, callee))
 267           && (sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, caller)
 268               == sanitize_flags_p (SANITIZE_POINTER_SUBTRACT, callee)));
 269 }
 270
 271 /* Used for flags where it is safe to inline when caller's value is
 272    grater than callee's.  */
 273 #define check_maybe_up(flag) \
 274       (opts_for_fn (caller->decl)->x_##flag             \
 275        != opts_for_fn (callee->decl)->x_##flag          \
 276        && (!always_inline                               \
 277            || opts_for_fn (caller->decl)->x_##flag      \
 278               < opts_for_fn (callee->decl)->x_##flag))
 279 /* Used for flags where it is safe to inline when caller's value is
 280    smaller than callee's.  */
 281 #define check_maybe_down(flag) \
 282       (opts_for_fn (caller->decl)->x_##flag             \
 283        != opts_for_fn (callee->decl)->x_##flag          \
 284        && (!always_inline                               \
 285            || opts_for_fn (caller->decl)->x_##flag      \
 286               > opts_for_fn (callee->decl)->x_##flag))
 287 /* Used for flags where exact match is needed for correctness.  */
 288 #define check_match(flag) \
 289       (opts_for_fn (caller->decl)->x_##flag             \
 290        != opts_for_fn (callee->decl)->x_##flag)
 291
 292  /* Decide if we can inline the edge and possibly update
 293    inline_failed reason.
 294    We check whether inlining is possible at all and whether
 295    caller growth limits allow doing so.
 296
 297    if REPORT is true, output reason to the dump file.
 298
 299    if DISREGARD_LIMITS is true, ignore size limits.*/
 300
 301 static bool
 302 can_inline_edge_p (struct cgraph_edge *e, bool report,
 303                    bool disregard_limits = false, bool early = false)
 304 {
 305   gcc_checking_assert (e->inline_failed);
 306
 307   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 308     {
 309       if (report)
 310         report_inline_failed_reason (e);
 311       return false;
 312     }
 313
 314   bool inlinable = true;
 315   enum availability avail;
 316   cgraph_node *caller = e->caller->global.inlined_to
 317                         ? e->caller->global.inlined_to : e->caller;
 318   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 319   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 320   tree callee_tree
 321     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 322
 323   if (!callee->definition)
 324     {
 325       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 326       inlinable = false;
 327     }
 328   if (!early && (!opt_for_fn (callee->decl, optimize)
 329                  || !opt_for_fn (caller->decl, optimize)))
 330     {
 331       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 332       inlinable = false;
 333     }
 334   else if (callee->calls_comdat_local)
 335     {
 336       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 337       inlinable = false;
 338     }
 339   else if (avail <= AVAIL_INTERPOSABLE)
 340     {
 341       e->inline_failed = CIF_OVERWRITABLE;
 342       inlinable = false;
 343     }
 344   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 345      initialized to one of FINAL_ERROR reasons.  */
 346   else if (e->call_stmt_cannot_inline_p)
 347     gcc_unreachable ();
 348   /* Don't inline if the functions have different EH personalities.  */
 349   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 350            && DECL_FUNCTION_PERSONALITY (callee->decl)
 351            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 352                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 353     {
 354       e->inline_failed = CIF_EH_PERSONALITY;
 355       inlinable = false;
 356     }
 357   /* TM pure functions should not be inlined into non-TM_pure
 358      functions.  */
 359   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 360     {
 361       e->inline_failed = CIF_UNSPECIFIED;
 362       inlinable = false;
 363     }
 364   /* Check compatibility of target optimization options.  */
 365   else if (!targetm.target_option.can_inline_p (caller->decl,
 366                                                 callee->decl))
 367     {
 368       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 369       inlinable = false;
 370     }
 371   else if (!ipa_fn_summaries->get (callee)->inlinable)
 372     {
 373       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 374       inlinable = false;
 375     }
 376   /* Don't inline a function with mismatched sanitization attributes. */
 377   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 378     {
 379       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 380       inlinable = false;
 381     }
 382   /* Check if caller growth allows the inlining.  */
 383   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 384            && !disregard_limits
 385            && !lookup_attribute ("flatten",
 386                                  DECL_ATTRIBUTES (caller->decl))
 387            && !caller_growth_limits (e))
 388     inlinable = false;
 389   /* Don't inline a function with a higher optimization level than the
 390      caller.  FIXME: this is really just tip of iceberg of handling
 391      optimization attribute.  */
 392   else if (caller_tree != callee_tree)
 393     {
 394       bool always_inline =
 395              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 396               && lookup_attribute ("always_inline",
 397                                    DECL_ATTRIBUTES (callee->decl)));
 398       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 399       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 400
 401      /* Until GCC 4.9 we did not check the semantics alterning flags
 402         bellow and inline across optimization boundry.
 403         Enabling checks bellow breaks several packages by refusing
 404         to inline library always_inline functions. See PR65873.
 405         Disable the check for early inlining for now until better solution
 406         is found.  */
 407      if (always_inline && early)
 408         ;
 409       /* There are some options that change IL semantics which means
 410          we cannot inline in these cases for correctness reason.
 411          Not even for always_inline declared functions.  */
 412      else if (check_match (flag_wrapv)
 413               || check_match (flag_trapv)
 414               || check_match (flag_pcc_struct_return)
 415               /* When caller or callee does FP math, be sure FP codegen flags
 416                  compatible.  */
 417               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 418                   && (check_maybe_up (flag_rounding_math)
 419                       || check_maybe_up (flag_trapping_math)
 420                       || check_maybe_down (flag_unsafe_math_optimizations)
 421                       || check_maybe_down (flag_finite_math_only)
 422                       || check_maybe_up (flag_signaling_nans)
 423                       || check_maybe_down (flag_cx_limited_range)
 424                       || check_maybe_up (flag_signed_zeros)
 425                       || check_maybe_down (flag_associative_math)
 426                       || check_maybe_down (flag_reciprocal_math)
 427                       || check_maybe_down (flag_fp_int_builtin_inexact)
 428                       /* Strictly speaking only when the callee contains function
 429                          calls that may end up setting errno.  */
 430                       || check_maybe_up (flag_errno_math)))
 431               /* We do not want to make code compiled with exceptions to be
 432                  brought into a non-EH function unless we know that the callee
 433                  does not throw.
 434                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 435               || (check_maybe_up (flag_non_call_exceptions)
 436                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 437               || (check_maybe_up (flag_exceptions)
 438                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 439               /* When devirtualization is diabled for callee, it is not safe
 440                  to inline it as we possibly mangled the type info.
 441                  Allow early inlining of always inlines.  */
 442               || (!early && check_maybe_down (flag_devirtualize)))
 443         {
 444           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 445           inlinable = false;
 446         }
 447       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 448       else if (always_inline)
 449         ;
 450       /* When user added an attribute to the callee honor it.  */
 451       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 452                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 453         {
 454           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 455           inlinable = false;
 456         }
 457       /* If explicit optimize attribute are not used, the mismatch is caused
 458          by different command line options used to build different units.
 459          Do not care about COMDAT functions - those are intended to be
 460          optimized with the optimization flags of module they are used in.
 461          Also do not care about mixing up size/speed optimization when
 462          DECL_DISREGARD_INLINE_LIMITS is set.  */
 463       else if ((callee->merged_comdat
 464                 && !lookup_attribute ("optimize",
 465                                       DECL_ATTRIBUTES (caller->decl)))
 466                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 467         ;
 468       /* If mismatch is caused by merging two LTO units with different
 469          optimizationflags we want to be bit nicer.  However never inline
 470          if one of functions is not optimized at all.  */
 471       else if (!opt_for_fn (callee->decl, optimize)
 472                || !opt_for_fn (caller->decl, optimize))
 473         {
 474           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 475           inlinable = false;
 476         }
 477       /* If callee is optimized for size and caller is not, allow inlining if
 478          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 479          is inline (and thus likely an unified comdat).  This will allow caller
 480          to run faster.  */
 481       else if (opt_for_fn (callee->decl, optimize_size)
 482                > opt_for_fn (caller->decl, optimize_size))
 483         {
 484           int growth = estimate_edge_growth (e);
 485           if (growth > 0
 486               && (!DECL_DECLARED_INLINE_P (callee->decl)
 487                   && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
 488                                     MAX_INLINE_INSNS_AUTO)))
 489             {
 490               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 491               inlinable = false;
 492             }
 493         }
 494       /* If callee is more aggressively optimized for performance than caller,
 495          we generally want to inline only cheap (runtime wise) functions.  */
 496       else if (opt_for_fn (callee->decl, optimize_size)
 497                < opt_for_fn (caller->decl, optimize_size)
 498                || (opt_for_fn (callee->decl, optimize)
 499                    > opt_for_fn (caller->decl, optimize)))
 500         {
 501           if (estimate_edge_time (e)
 502               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 503             {
 504               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 505               inlinable = false;
 506             }
 507         }
 508
 509     }
 510
 511   if (!inlinable && report)
 512     report_inline_failed_reason (e);
 513   return inlinable;
 514 }
 515
 516
 517 /* Return true if the edge E is inlinable during early inlining.  */
 518
 519 static bool
 520 can_early_inline_edge_p (struct cgraph_edge *e)
 521 {
 522   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 523   /* Early inliner might get called at WPA stage when IPA pass adds new
 524      function.  In this case we can not really do any of early inlining
 525      because function bodies are missing.  */
 526   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 527     return false;
 528   if (!gimple_has_body_p (callee->decl))
 529     {
 530       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 531       return false;
 532     }
 533   /* In early inliner some of callees may not be in SSA form yet
 534      (i.e. the callgraph is cyclic and we did not process
 535      the callee by early inliner, yet).  We don't have CIF code for this
 536      case; later we will re-do the decision in the real inliner.  */
 537   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 538       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 539     {
 540       if (dump_file)
 541         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 542       return false;
 543     }
 544   if (!can_inline_edge_p (e, true, false, true))
 545     return false;
 546   return true;
 547 }
 548
 549
 550 /* Return number of calls in N.  Ignore cheap builtins.  */
 551
 552 static int
 553 num_calls (struct cgraph_node *n)
 554 {
 555   struct cgraph_edge *e;
 556   int num = 0;
 557
 558   for (e = n->callees; e; e = e->next_callee)
 559     if (!is_inexpensive_builtin (e->callee->decl))
 560       num++;
 561   return num;
 562 }
 563
 564
 565 /* Return true if we are interested in inlining small function.  */
 566
 567 static bool
 568 want_early_inline_function_p (struct cgraph_edge *e)
 569 {
 570   bool want_inline = true;
 571   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 572
 573   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 574     ;
 575   /* For AutoFDO, we need to make sure that before profile summary, all
 576      hot paths' IR look exactly the same as profiled binary. As a result,
 577      in einliner, we will disregard size limit and inline those callsites
 578      that are:
 579        * inlined in the profiled binary, and
 580        * the cloned callee has enough samples to be considered "hot".  */
 581   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 582     ;
 583   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 584            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 585     {
 586       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 587       report_inline_failed_reason (e);
 588       want_inline = false;
 589     }
 590   else
 591     {
 592       int growth = estimate_edge_growth (e);
 593       int n;
 594
 595       if (growth <= 0)
 596         ;
 597       else if (!e->maybe_hot_p ()
 598                && growth > 0)
 599         {
 600           if (dump_file)
 601             fprintf (dump_file, "  will not early inline: %s->%s, "
 602                      "call is cold and code would grow by %i\n",
 603                      e->caller->dump_name (),
 604                      callee->dump_name (),
 605                      growth);
 606           want_inline = false;
 607         }
 608       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 609         {
 610           if (dump_file)
 611             fprintf (dump_file, "  will not early inline: %s->%s, "
 612                      "growth %i exceeds --param early-inlining-insns\n",
 613                      e->caller->dump_name (),
 614                      callee->dump_name (),
 615                      growth);
 616           want_inline = false;
 617         }
 618       else if ((n = num_calls (callee)) != 0
 619                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 620         {
 621           if (dump_file)
 622             fprintf (dump_file, "  will not early inline: %s->%s, "
 623                      "growth %i exceeds --param early-inlining-insns "
 624                      "divided by number of calls\n",
 625                      e->caller->dump_name (),
 626                      callee->dump_name (),
 627                      growth);
 628           want_inline = false;
 629         }
 630     }
 631   return want_inline;
 632 }
 633
 634 /* Compute time of the edge->caller + edge->callee execution when inlining
 635    does not happen.  */
 636
 637 inline sreal
 638 compute_uninlined_call_time (struct cgraph_edge *edge,
 639                              sreal uninlined_call_time)
 640 {
 641   cgraph_node *caller = (edge->caller->global.inlined_to
 642                          ? edge->caller->global.inlined_to
 643                          : edge->caller);
 644
 645   sreal freq = edge->sreal_frequency ();
 646   if (freq > 0)
 647     uninlined_call_time *= freq;
 648   else
 649     uninlined_call_time = uninlined_call_time >> 11;
 650
 651   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 652   return uninlined_call_time + caller_time;
 653 }
 654
 655 /* Same as compute_uinlined_call_time but compute time when inlining
 656    does happen.  */
 657
 658 inline sreal
 659 compute_inlined_call_time (struct cgraph_edge *edge,
 660                            sreal time)
 661 {
 662   cgraph_node *caller = (edge->caller->global.inlined_to
 663                          ? edge->caller->global.inlined_to
 664                          : edge->caller);
 665   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 666
 667   sreal freq = edge->sreal_frequency ();
 668   if (freq > 0)
 669     time *= freq;
 670   else
 671     time = time >> 11;
 672
 673   /* This calculation should match one in ipa-inline-analysis.c
 674      (estimate_edge_size_and_time).  */
 675   time -= (sreal)ipa_call_summaries->get (edge)->call_stmt_time * freq;
 676   time += caller_time;
 677   if (time <= 0)
 678     time = ((sreal) 1) >> 8;
 679   gcc_checking_assert (time >= 0);
 680   return time;
 681 }
 682
 683 /* Return true if the speedup for inlining E is bigger than
 684    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 685
 686 static bool
 687 big_speedup_p (struct cgraph_edge *e)
 688 {
 689   sreal unspec_time;
 690   sreal spec_time = estimate_edge_time (e, &unspec_time);
 691   sreal time = compute_uninlined_call_time (e, unspec_time);
 692   sreal inlined_time = compute_inlined_call_time (e, spec_time);
 693
 694   if ((time - inlined_time) * 100
 695       > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)))
 696     return true;
 697   return false;
 698 }
 699
 700 /* Return true if we are interested in inlining small function.
 701    When REPORT is true, report reason to dump file.  */
 702
 703 static bool
 704 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 705 {
 706   bool want_inline = true;
 707   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 708
 709   /* Allow this function to be called before can_inline_edge_p,
 710      since it's usually cheaper.  */
 711   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 712     want_inline = false;
 713   else if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 714     ;
 715   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 716            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 717     {
 718       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 719       want_inline = false;
 720     }
 721   /* Do fast and conservative check if the function can be good
 722      inline candidate.  At the moment we allow inline hints to
 723      promote non-inline functions to inline and we increase
 724      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 725   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 726            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 727            && ipa_fn_summaries->get (callee)->min_size
 728                 - ipa_call_summaries->get (e)->call_stmt_size
 729               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 730     {
 731       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 732       want_inline = false;
 733     }
 734   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 735             || e->count.ipa ().nonzero_p ())
 736            && ipa_fn_summaries->get (callee)->min_size
 737                 - ipa_call_summaries->get (e)->call_stmt_size
 738               > 16 * MAX_INLINE_INSNS_SINGLE)
 739     {
 740       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 741                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 742                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 743       want_inline = false;
 744     }
 745   else
 746     {
 747       int growth = estimate_edge_growth (e);
 748       ipa_hints hints = estimate_edge_hints (e);
 749       bool big_speedup = big_speedup_p (e);
 750
 751       if (growth <= 0)
 752         ;
 753       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 754          hints suggests that inlining given function is very profitable.  */
 755       else if (DECL_DECLARED_INLINE_P (callee->decl)
 756                && growth >= MAX_INLINE_INSNS_SINGLE
 757                && ((!big_speedup
 758                     && !(hints & (INLINE_HINT_indirect_call
 759                                   | INLINE_HINT_known_hot
 760                                   | INLINE_HINT_loop_iterations
 761                                   | INLINE_HINT_array_index
 762                                   | INLINE_HINT_loop_stride)))
 763                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 764         {
 765           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 766           want_inline = false;
 767         }
 768       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 769                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 770         {
 771           /* growth_likely_positive is expensive, always test it last.  */
 772           if (growth >= MAX_INLINE_INSNS_SINGLE
 773               || growth_likely_positive (callee, growth))
 774             {
 775               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 776               want_inline = false;
 777             }
 778         }
 779       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 780          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 781          inlining given function is very profitable.  */
 782       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 783                && !big_speedup
 784                && !(hints & INLINE_HINT_known_hot)
 785                && growth >= ((hints & (INLINE_HINT_indirect_call
 786                                        | INLINE_HINT_loop_iterations
 787                                        | INLINE_HINT_array_index
 788                                        | INLINE_HINT_loop_stride))
 789                              ? MAX (MAX_INLINE_INSNS_AUTO,
 790                                     MAX_INLINE_INSNS_SINGLE)
 791                              : MAX_INLINE_INSNS_AUTO))
 792         {
 793           /* growth_likely_positive is expensive, always test it last.  */
 794           if (growth >= MAX_INLINE_INSNS_SINGLE
 795               || growth_likely_positive (callee, growth))
 796             {
 797               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 798               want_inline = false;
 799             }
 800         }
 801       /* If call is cold, do not inline when function body would grow. */
 802       else if (!e->maybe_hot_p ()
 803                && (growth >= MAX_INLINE_INSNS_SINGLE
 804                    || growth_likely_positive (callee, growth)))
 805         {
 806           e->inline_failed = CIF_UNLIKELY_CALL;
 807           want_inline = false;
 808         }
 809     }
 810   if (!want_inline && report)
 811     report_inline_failed_reason (e);
 812   return want_inline;
 813 }
 814
 815 /* EDGE is self recursive edge.
 816    We hand two cases - when function A is inlining into itself
 817    or when function A is being inlined into another inliner copy of function
 818    A within function B.
 819
 820    In first case OUTER_NODE points to the toplevel copy of A, while
 821    in the second case OUTER_NODE points to the outermost copy of A in B.
 822
 823    In both cases we want to be extra selective since
 824    inlining the call will just introduce new recursive calls to appear.  */
 825
 826 static bool
 827 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 828                                    struct cgraph_node *outer_node,
 829                                    bool peeling,
 830                                    int depth)
 831 {
 832   char const *reason = NULL;
 833   bool want_inline = true;
 834   sreal caller_freq = 1;
 835   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 836
 837   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 838     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 839
 840   if (!edge->maybe_hot_p ())
 841     {
 842       reason = "recursive call is cold";
 843       want_inline = false;
 844     }
 845   else if (depth > max_depth)
 846     {
 847       reason = "--param max-inline-recursive-depth exceeded.";
 848       want_inline = false;
 849     }
 850   else if (outer_node->global.inlined_to
 851            && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
 852     {
 853       reason = "caller frequency is 0";
 854       want_inline = false;
 855     }
 856
 857   if (!want_inline)
 858     ;
 859   /* Inlining of self recursive function into copy of itself within other
 860      function is transformation similar to loop peeling.
 861
 862      Peeling is profitable if we can inline enough copies to make probability
 863      of actual call to the self recursive function very small.  Be sure that
 864      the probability of recursion is small.
 865
 866      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 867      This way the expected number of recursion is at most max_depth.  */
 868   else if (peeling)
 869     {
 870       sreal max_prob = (sreal)1 - ((sreal)1 / (sreal)max_depth);
 871       int i;
 872       for (i = 1; i < depth; i++)
 873         max_prob = max_prob * max_prob;
 874       if (edge->sreal_frequency () >= max_prob * caller_freq)
 875         {
 876           reason = "frequency of recursive call is too large";
 877           want_inline = false;
 878         }
 879     }
 880   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if
 881      recursion depth is large.  We reduce function call overhead and increase
 882      chances that things fit in hardware return predictor.
 883
 884      Recursive inlining might however increase cost of stack frame setup
 885      actually slowing down functions whose recursion tree is wide rather than
 886      deep.
 887
 888      Deciding reliably on when to do recursive inlining without profile feedback
 889      is tricky.  For now we disable recursive inlining when probability of self
 890      recursion is low.
 891
 892      Recursive inlining of self recursive call within loop also results in
 893      large loop depths that generally optimize badly.  We may want to throttle
 894      down inlining in those cases.  In particular this seems to happen in one
 895      of libstdc++ rb tree methods.  */
 896   else
 897     {
 898       if (edge->sreal_frequency () * 100
 899           <= caller_freq
 900              * PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY))
 901         {
 902           reason = "frequency of recursive call is too small";
 903           want_inline = false;
 904         }
 905     }
 906   if (!want_inline && dump_file)
 907     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 908   return want_inline;
 909 }
 910
 911 /* Return true when NODE has uninlinable caller;
 912    set HAS_HOT_CALL if it has hot call.
 913    Worker for cgraph_for_node_and_aliases.  */
 914
 915 static bool
 916 check_callers (struct cgraph_node *node, void *has_hot_call)
 917 {
 918   struct cgraph_edge *e;
 919    for (e = node->callers; e; e = e->next_caller)
 920      {
 921        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
 922            || !opt_for_fn (e->caller->decl, optimize))
 923          return true;
 924        if (!can_inline_edge_p (e, true))
 925          return true;
 926        if (e->recursive_p ())
 927          return true;
 928        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 929          *(bool *)has_hot_call = true;
 930      }
 931   return false;
 932 }
 933
 934 /* If NODE has a caller, return true.  */
 935
 936 static bool
 937 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 938 {
 939   if (node->callers)
 940     return true;
 941   return false;
 942 }
 943
 944 /* Decide if inlining NODE would reduce unit size by eliminating
 945    the offline copy of function.
 946    When COLD is true the cold calls are considered, too.  */
 947
 948 static bool
 949 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 950 {
 951   bool has_hot_call = false;
 952
 953   /* Aliases gets inlined along with the function they alias.  */
 954   if (node->alias)
 955     return false;
 956   /* Already inlined?  */
 957   if (node->global.inlined_to)
 958     return false;
 959   /* Does it have callers?  */
 960   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
 961     return false;
 962   /* Inlining into all callers would increase size?  */
 963   if (estimate_growth (node) > 0)
 964     return false;
 965   /* All inlines must be possible.  */
 966   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
 967                                          true))
 968     return false;
 969   if (!cold && !has_hot_call)
 970     return false;
 971   return true;
 972 }
 973
 974 /* A cost model driving the inlining heuristics in a way so the edges with
 975    smallest badness are inlined first.  After each inlining is performed
 976    the costs of all caller edges of nodes affected are recomputed so the
 977    metrics may accurately depend on values such as number of inlinable callers
 978    of the function or function body size.  */
 979
 980 static sreal
 981 edge_badness (struct cgraph_edge *edge, bool dump)
 982 {
 983   sreal badness;
 984   int growth;
 985   sreal edge_time, unspec_edge_time;
 986   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 987   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 988   ipa_hints hints;
 989   cgraph_node *caller = (edge->caller->global.inlined_to
 990                          ? edge->caller->global.inlined_to
 991                          : edge->caller);
 992
 993   growth = estimate_edge_growth (edge);
 994   edge_time = estimate_edge_time (edge, &unspec_edge_time);
 995   hints = estimate_edge_hints (edge);
 996   gcc_checking_assert (edge_time >= 0);
 997   /* Check that inlined time is better, but tolerate some roundoff issues.
 998      FIXME: When callee profile drops to 0 we account calls more.  This
 999      should be fixed by never doing that.  */
1000   gcc_checking_assert ((edge_time * 100
1001                         - callee_info->time * 101).to_int () <= 0
1002                         || callee->count.ipa ().initialized_p ());
1003   gcc_checking_assert (growth <= callee_info->size);
1004
1005   if (dump)
1006     {
1007       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1008                edge->caller->dump_name (),
1009                edge->callee->dump_name ());
1010       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1011                growth,
1012                edge_time.to_double (),
1013                unspec_edge_time.to_double ());
1014       ipa_dump_hints (dump_file, hints);
1015       if (big_speedup_p (edge))
1016         fprintf (dump_file, " big_speedup");
1017       fprintf (dump_file, "\n");
1018     }
1019
1020   /* Always prefer inlining saving code size.  */
1021   if (growth <= 0)
1022     {
1023       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1024       if (dump)
1025         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1026                  growth);
1027     }
1028    /* Inlining into EXTERNAL functions is not going to change anything unless
1029       they are themselves inlined.  */
1030    else if (DECL_EXTERNAL (caller->decl))
1031     {
1032       if (dump)
1033         fprintf (dump_file, "      max: function is external\n");
1034       return sreal::max ();
1035     }
1036   /* When profile is available. Compute badness as:
1037
1038                  time_saved * caller_count
1039      goodness =  -------------------------------------------------
1040                  growth_of_caller * overall_growth * combined_size
1041
1042      badness = - goodness
1043
1044      Again use negative value to make calls with profile appear hotter
1045      then calls without.
1046   */
1047   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1048            || caller->count.ipa ().nonzero_p ())
1049     {
1050       sreal numerator, denominator;
1051       int overall_growth;
1052       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1053
1054       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1055                    - inlined_time);
1056       if (numerator <= 0)
1057         numerator = ((sreal) 1 >> 8);
1058       if (caller->count.ipa ().nonzero_p ())
1059         numerator *= caller->count.ipa ().to_gcov_type ();
1060       else if (caller->count.ipa ().initialized_p ())
1061         numerator = numerator >> 11;
1062       denominator = growth;
1063
1064       overall_growth = callee_info->growth;
1065
1066       /* Look for inliner wrappers of the form:
1067
1068          inline_caller ()
1069            {
1070              do_fast_job...
1071              if (need_more_work)
1072                noninline_callee ();
1073            }
1074          Withhout panilizing this case, we usually inline noninline_callee
1075          into the inline_caller because overall_growth is small preventing
1076          further inlining of inline_caller.
1077
1078          Penalize only callgraph edges to functions with small overall
1079          growth ...
1080         */
1081       if (growth > overall_growth
1082           /* ... and having only one caller which is not inlined ... */
1083           && callee_info->single_caller
1084           && !edge->caller->global.inlined_to
1085           /* ... and edges executed only conditionally ... */
1086           && edge->sreal_frequency () < 1
1087           /* ... consider case where callee is not inline but caller is ... */
1088           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1089                && DECL_DECLARED_INLINE_P (caller->decl))
1090               /* ... or when early optimizers decided to split and edge
1091                  frequency still indicates splitting is a win ... */
1092               || (callee->split_part && !caller->split_part
1093                   && edge->sreal_frequency () * 100
1094                      < PARAM_VALUE
1095                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY)
1096                   /* ... and do not overwrite user specified hints.   */
1097                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1098                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1099         {
1100           struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1101           int caller_growth = caller_info->growth;
1102
1103           /* Only apply the penalty when caller looks like inline candidate,
1104              and it is not called once and.  */
1105           if (!caller_info->single_caller && overall_growth < caller_growth
1106               && caller_info->inlinable
1107               && caller_info->size
1108                  < (DECL_DECLARED_INLINE_P (caller->decl)
1109                     ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1110             {
1111               if (dump)
1112                 fprintf (dump_file,
1113                          "     Wrapper penalty. Increasing growth %i to %i\n",
1114                          overall_growth, caller_growth);
1115               overall_growth = caller_growth;
1116             }
1117         }
1118       if (overall_growth > 0)
1119         {
1120           /* Strongly preffer functions with few callers that can be inlined
1121              fully.  The square root here leads to smaller binaries at average.
1122              Watch however for extreme cases and return to linear function
1123              when growth is large.  */
1124           if (overall_growth < 256)
1125             overall_growth *= overall_growth;
1126           else
1127             overall_growth += 256 * 256 - 256;
1128           denominator *= overall_growth;
1129         }
1130       denominator *= inlined_time;
1131
1132       badness = - numerator / denominator;
1133
1134       if (dump)
1135         {
1136           fprintf (dump_file,
1137                    "      %f: guessed profile. frequency %f, count %" PRId64
1138                    " caller count %" PRId64
1139                    " time w/o inlining %f, time with inlining %f"
1140                    " overall growth %i (current) %i (original)"
1141                    " %i (compensated)\n",
1142                    badness.to_double (),
1143                    edge->sreal_frequency ().to_double (),
1144                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1145                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1146                    compute_uninlined_call_time (edge,
1147                                                 unspec_edge_time).to_double (),
1148                    inlined_time.to_double (),
1149                    estimate_growth (callee),
1150                    callee_info->growth, overall_growth);
1151         }
1152     }
1153   /* When function local profile is not available or it does not give
1154      useful information (ie frequency is zero), base the cost on
1155      loop nest and overall size growth, so we optimize for overall number
1156      of functions fully inlined in program.  */
1157   else
1158     {
1159       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1160       badness = growth;
1161
1162       /* Decrease badness if call is nested.  */
1163       if (badness > 0)
1164         badness = badness >> nest;
1165       else
1166         badness = badness << nest;
1167       if (dump)
1168         fprintf (dump_file, "      %f: no profile. nest %i\n",
1169                  badness.to_double (), nest);
1170     }
1171   gcc_checking_assert (badness != 0);
1172
1173   if (edge->recursive_p ())
1174     badness = badness.shift (badness > 0 ? 4 : -4);
1175   if ((hints & (INLINE_HINT_indirect_call
1176                 | INLINE_HINT_loop_iterations
1177                 | INLINE_HINT_array_index
1178                 | INLINE_HINT_loop_stride))
1179       || callee_info->growth <= 0)
1180     badness = badness.shift (badness > 0 ? -2 : 2);
1181   if (hints & (INLINE_HINT_same_scc))
1182     badness = badness.shift (badness > 0 ? 3 : -3);
1183   else if (hints & (INLINE_HINT_in_scc))
1184     badness = badness.shift (badness > 0 ? 2 : -2);
1185   else if (hints & (INLINE_HINT_cross_module))
1186     badness = badness.shift (badness > 0 ? 1 : -1);
1187   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1188     badness = badness.shift (badness > 0 ? -4 : 4);
1189   else if ((hints & INLINE_HINT_declared_inline))
1190     badness = badness.shift (badness > 0 ? -3 : 3);
1191   if (dump)
1192     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1193   return badness;
1194 }
1195
1196 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1197 static inline void
1198 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1199 {
1200   sreal badness = edge_badness (edge, false);
1201   if (edge->aux)
1202     {
1203       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1204       gcc_checking_assert (n->get_data () == edge);
1205
1206       /* fibonacci_heap::replace_key does busy updating of the
1207          heap that is unnecesarily expensive.
1208          We do lazy increases: after extracting minimum if the key
1209          turns out to be out of date, it is re-inserted into heap
1210          with correct value.  */
1211       if (badness < n->get_key ())
1212         {
1213           if (dump_file && (dump_flags & TDF_DETAILS))
1214             {
1215               fprintf (dump_file,
1216                        "  decreasing badness %s -> %s, %f to %f\n",
1217                        edge->caller->dump_name (),
1218                        edge->callee->dump_name (),
1219                        n->get_key ().to_double (),
1220                        badness.to_double ());
1221             }
1222           heap->decrease_key (n, badness);
1223         }
1224     }
1225   else
1226     {
1227        if (dump_file && (dump_flags & TDF_DETAILS))
1228          {
1229            fprintf (dump_file,
1230                     "  enqueuing call %s -> %s, badness %f\n",
1231                     edge->caller->dump_name (),
1232                     edge->callee->dump_name (),
1233                     badness.to_double ());
1234          }
1235       edge->aux = heap->insert (badness, edge);
1236     }
1237 }
1238
1239
1240 /* NODE was inlined.
1241    All caller edges needs to be resetted because
1242    size estimates change. Similarly callees needs reset
1243    because better context may be known.  */
1244
1245 static void
1246 reset_edge_caches (struct cgraph_node *node)
1247 {
1248   struct cgraph_edge *edge;
1249   struct cgraph_edge *e = node->callees;
1250   struct cgraph_node *where = node;
1251   struct ipa_ref *ref;
1252
1253   if (where->global.inlined_to)
1254     where = where->global.inlined_to;
1255
1256   for (edge = where->callers; edge; edge = edge->next_caller)
1257     if (edge->inline_failed)
1258       reset_edge_growth_cache (edge);
1259
1260   FOR_EACH_ALIAS (where, ref)
1261     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1262
1263   if (!e)
1264     return;
1265
1266   while (true)
1267     if (!e->inline_failed && e->callee->callees)
1268       e = e->callee->callees;
1269     else
1270       {
1271         if (e->inline_failed)
1272           reset_edge_growth_cache (e);
1273         if (e->next_callee)
1274           e = e->next_callee;
1275         else
1276           {
1277             do
1278               {
1279                 if (e->caller == node)
1280                   return;
1281                 e = e->caller->callers;
1282               }
1283             while (!e->next_callee);
1284             e = e->next_callee;
1285           }
1286       }
1287 }
1288
1289 /* Recompute HEAP nodes for each of caller of NODE.
1290    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1291    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1292    it is inlinable. Otherwise check all edges.  */
1293
1294 static void
1295 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1296                     bitmap updated_nodes,
1297                     struct cgraph_edge *check_inlinablity_for)
1298 {
1299   struct cgraph_edge *edge;
1300   struct ipa_ref *ref;
1301
1302   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1303       || node->global.inlined_to)
1304     return;
1305   if (!bitmap_set_bit (updated_nodes, node->uid))
1306     return;
1307
1308   FOR_EACH_ALIAS (node, ref)
1309     {
1310       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1311       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1312     }
1313
1314   for (edge = node->callers; edge; edge = edge->next_caller)
1315     if (edge->inline_failed)
1316       {
1317         if (!check_inlinablity_for
1318             || check_inlinablity_for == edge)
1319           {
1320             if (want_inline_small_function_p (edge, false)
1321                 && can_inline_edge_p (edge, false))
1322               update_edge_key (heap, edge);
1323             else if (edge->aux)
1324               {
1325                 report_inline_failed_reason (edge);
1326                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1327                 edge->aux = NULL;
1328               }
1329           }
1330         else if (edge->aux)
1331           update_edge_key (heap, edge);
1332       }
1333 }
1334
1335 /* Recompute HEAP nodes for each uninlined call in NODE.
1336    This is used when we know that edge badnesses are going only to increase
1337    (we introduced new call site) and thus all we need is to insert newly
1338    created edges into heap.  */
1339
1340 static void
1341 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1342                     bitmap updated_nodes)
1343 {
1344   struct cgraph_edge *e = node->callees;
1345
1346   if (!e)
1347     return;
1348   while (true)
1349     if (!e->inline_failed && e->callee->callees)
1350       e = e->callee->callees;
1351     else
1352       {
1353         enum availability avail;
1354         struct cgraph_node *callee;
1355         /* We do not reset callee growth cache here.  Since we added a new call,
1356            growth chould have just increased and consequentely badness metric
1357            don't need updating.  */
1358         if (e->inline_failed
1359             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1360             && ipa_fn_summaries->get (callee)->inlinable
1361             && avail >= AVAIL_AVAILABLE
1362             && !bitmap_bit_p (updated_nodes, callee->uid))
1363           {
1364             if (want_inline_small_function_p (e, false)
1365                 && can_inline_edge_p (e, false))
1366               update_edge_key (heap, e);
1367             else if (e->aux)
1368               {
1369                 report_inline_failed_reason (e);
1370                 heap->delete_node ((edge_heap_node_t *) e->aux);
1371                 e->aux = NULL;
1372               }
1373           }
1374         if (e->next_callee)
1375           e = e->next_callee;
1376         else
1377           {
1378             do
1379               {
1380                 if (e->caller == node)
1381                   return;
1382                 e = e->caller->callers;
1383               }
1384             while (!e->next_callee);
1385             e = e->next_callee;
1386           }
1387       }
1388 }
1389
1390 /* Enqueue all recursive calls from NODE into priority queue depending on
1391    how likely we want to recursively inline the call.  */
1392
1393 static void
1394 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1395                         edge_heap_t *heap)
1396 {
1397   struct cgraph_edge *e;
1398   enum availability avail;
1399
1400   for (e = where->callees; e; e = e->next_callee)
1401     if (e->callee == node
1402         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1403             && avail > AVAIL_INTERPOSABLE))
1404       heap->insert (-e->sreal_frequency (), e);
1405   for (e = where->callees; e; e = e->next_callee)
1406     if (!e->inline_failed)
1407       lookup_recursive_calls (node, e->callee, heap);
1408 }
1409
1410 /* Decide on recursive inlining: in the case function has recursive calls,
1411    inline until body size reaches given argument.  If any new indirect edges
1412    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1413    is NULL.  */
1414
1415 static bool
1416 recursive_inlining (struct cgraph_edge *edge,
1417                     vec<cgraph_edge *> *new_edges)
1418 {
1419   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1420   edge_heap_t heap (sreal::min ());
1421   struct cgraph_node *node;
1422   struct cgraph_edge *e;
1423   struct cgraph_node *master_clone = NULL, *next;
1424   int depth = 0;
1425   int n = 0;
1426
1427   node = edge->caller;
1428   if (node->global.inlined_to)
1429     node = node->global.inlined_to;
1430
1431   if (DECL_DECLARED_INLINE_P (node->decl))
1432     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1433
1434   /* Make sure that function is small enough to be considered for inlining.  */
1435   if (estimate_size_after_inlining (node, edge)  >= limit)
1436     return false;
1437   lookup_recursive_calls (node, node, &heap);
1438   if (heap.empty ())
1439     return false;
1440
1441   if (dump_file)
1442     fprintf (dump_file,
1443              "  Performing recursive inlining on %s\n",
1444              node->name ());
1445
1446   /* Do the inlining and update list of recursive call during process.  */
1447   while (!heap.empty ())
1448     {
1449       struct cgraph_edge *curr = heap.extract_min ();
1450       struct cgraph_node *cnode, *dest = curr->callee;
1451
1452       if (!can_inline_edge_p (curr, true))
1453         continue;
1454
1455       /* MASTER_CLONE is produced in the case we already started modified
1456          the function. Be sure to redirect edge to the original body before
1457          estimating growths otherwise we will be seeing growths after inlining
1458          the already modified body.  */
1459       if (master_clone)
1460         {
1461           curr->redirect_callee (master_clone);
1462           reset_edge_growth_cache (curr);
1463         }
1464
1465       if (estimate_size_after_inlining (node, curr) > limit)
1466         {
1467           curr->redirect_callee (dest);
1468           reset_edge_growth_cache (curr);
1469           break;
1470         }
1471
1472       depth = 1;
1473       for (cnode = curr->caller;
1474            cnode->global.inlined_to; cnode = cnode->callers->caller)
1475         if (node->decl
1476             == curr->callee->ultimate_alias_target ()->decl)
1477           depth++;
1478
1479       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1480         {
1481           curr->redirect_callee (dest);
1482           reset_edge_growth_cache (curr);
1483           continue;
1484         }
1485
1486       if (dump_file)
1487         {
1488           fprintf (dump_file,
1489                    "   Inlining call of depth %i", depth);
1490           if (node->count.nonzero_p ())
1491             {
1492               fprintf (dump_file, " called approx. %.2f times per call",
1493                        (double)curr->count.to_gcov_type ()
1494                        / node->count.to_gcov_type ());
1495             }
1496           fprintf (dump_file, "\n");
1497         }
1498       if (!master_clone)
1499         {
1500           /* We need original clone to copy around.  */
1501           master_clone = node->create_clone (node->decl, node->count,
1502             false, vNULL, true, NULL, NULL);
1503           for (e = master_clone->callees; e; e = e->next_callee)
1504             if (!e->inline_failed)
1505               clone_inlined_nodes (e, true, false, NULL);
1506           curr->redirect_callee (master_clone);
1507           reset_edge_growth_cache (curr);
1508         }
1509
1510       inline_call (curr, false, new_edges, &overall_size, true);
1511       lookup_recursive_calls (node, curr->callee, &heap);
1512       n++;
1513     }
1514
1515   if (!heap.empty () && dump_file)
1516     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1517
1518   if (!master_clone)
1519     return false;
1520
1521   if (dump_file)
1522     fprintf (dump_file,
1523              "\n   Inlined %i times, "
1524              "body grown from size %i to %i, time %f to %f\n", n,
1525              ipa_fn_summaries->get (master_clone)->size,
1526              ipa_fn_summaries->get (node)->size,
1527              ipa_fn_summaries->get (master_clone)->time.to_double (),
1528              ipa_fn_summaries->get (node)->time.to_double ());
1529
1530   /* Remove master clone we used for inlining.  We rely that clones inlined
1531      into master clone gets queued just before master clone so we don't
1532      need recursion.  */
1533   for (node = symtab->first_function (); node != master_clone;
1534        node = next)
1535     {
1536       next = symtab->next_function (node);
1537       if (node->global.inlined_to == master_clone)
1538         node->remove ();
1539     }
1540   master_clone->remove ();
1541   return true;
1542 }
1543
1544
1545 /* Given whole compilation unit estimate of INSNS, compute how large we can
1546    allow the unit to grow.  */
1547
1548 static int
1549 compute_max_insns (int insns)
1550 {
1551   int max_insns = insns;
1552   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1553     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1554
1555   return ((int64_t) max_insns
1556           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1557 }
1558
1559
1560 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1561
1562 static void
1563 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1564 {
1565   while (new_edges.length () > 0)
1566     {
1567       struct cgraph_edge *edge = new_edges.pop ();
1568
1569       gcc_assert (!edge->aux);
1570       if (edge->inline_failed
1571           && can_inline_edge_p (edge, true)
1572           && want_inline_small_function_p (edge, true))
1573         edge->aux = heap->insert (edge_badness (edge, false), edge);
1574     }
1575 }
1576
1577 /* Remove EDGE from the fibheap.  */
1578
1579 static void
1580 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1581 {
1582   if (e->aux)
1583     {
1584       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1585       e->aux = NULL;
1586     }
1587 }
1588
1589 /* Return true if speculation of edge E seems useful.
1590    If ANTICIPATE_INLINING is true, be conservative and hope that E
1591    may get inlined.  */
1592
1593 bool
1594 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1595 {
1596   enum availability avail;
1597   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1598                                                                  e->caller);
1599   struct cgraph_edge *direct, *indirect;
1600   struct ipa_ref *ref;
1601
1602   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1603
1604   if (!e->maybe_hot_p ())
1605     return false;
1606
1607   /* See if IP optimizations found something potentially useful about the
1608      function.  For now we look only for CONST/PURE flags.  Almost everything
1609      else we propagate is useless.  */
1610   if (avail >= AVAIL_AVAILABLE)
1611     {
1612       int ecf_flags = flags_from_decl_or_type (target->decl);
1613       if (ecf_flags & ECF_CONST)
1614         {
1615           e->speculative_call_info (direct, indirect, ref);
1616           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1617             return true;
1618         }
1619       else if (ecf_flags & ECF_PURE)
1620         {
1621           e->speculative_call_info (direct, indirect, ref);
1622           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1623             return true;
1624         }
1625     }
1626   /* If we did not managed to inline the function nor redirect
1627      to an ipa-cp clone (that are seen by having local flag set),
1628      it is probably pointless to inline it unless hardware is missing
1629      indirect call predictor.  */
1630   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1631     return false;
1632   /* For overwritable targets there is not much to do.  */
1633   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1634     return false;
1635   /* OK, speculation seems interesting.  */
1636   return true;
1637 }
1638
1639 /* We know that EDGE is not going to be inlined.
1640    See if we can remove speculation.  */
1641
1642 static void
1643 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1644 {
1645   if (edge->speculative && !speculation_useful_p (edge, false))
1646     {
1647       struct cgraph_node *node = edge->caller;
1648       struct cgraph_node *where = node->global.inlined_to
1649                                   ? node->global.inlined_to : node;
1650       auto_bitmap updated_nodes;
1651
1652       if (edge->count.ipa ().initialized_p ())
1653         spec_rem += edge->count.ipa ();
1654       edge->resolve_speculation ();
1655       reset_edge_caches (where);
1656       ipa_update_overall_fn_summary (where);
1657       update_caller_keys (edge_heap, where,
1658                           updated_nodes, NULL);
1659       update_callee_keys (edge_heap, where,
1660                           updated_nodes);
1661     }
1662 }
1663
1664 /* Return true if NODE should be accounted for overall size estimate.
1665    Skip all nodes optimized for size so we can measure the growth of hot
1666    part of program no matter of the padding.  */
1667
1668 bool
1669 inline_account_function_p (struct cgraph_node *node)
1670 {
1671    return (!DECL_EXTERNAL (node->decl)
1672            && !opt_for_fn (node->decl, optimize_size)
1673            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1674 }
1675
1676 /* Count number of callers of NODE and store it into DATA (that
1677    points to int.  Worker for cgraph_for_node_and_aliases.  */
1678
1679 static bool
1680 sum_callers (struct cgraph_node *node, void *data)
1681 {
1682   struct cgraph_edge *e;
1683   int *num_calls = (int *)data;
1684
1685   for (e = node->callers; e; e = e->next_caller)
1686     (*num_calls)++;
1687   return false;
1688 }
1689
1690 /* We use greedy algorithm for inlining of small functions:
1691    All inline candidates are put into prioritized heap ordered in
1692    increasing badness.
1693
1694    The inlining of small functions is bounded by unit growth parameters.  */
1695
1696 static void
1697 inline_small_functions (void)
1698 {
1699   struct cgraph_node *node;
1700   struct cgraph_edge *edge;
1701   edge_heap_t edge_heap (sreal::min ());
1702   auto_bitmap updated_nodes;
1703   int min_size, max_size;
1704   auto_vec<cgraph_edge *> new_indirect_edges;
1705   int initial_size = 0;
1706   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1707   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1708   new_indirect_edges.create (8);
1709
1710   edge_removal_hook_holder
1711     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1712
1713   /* Compute overall unit size and other global parameters used by badness
1714      metrics.  */
1715
1716   max_count = profile_count::uninitialized ();
1717   ipa_reduced_postorder (order, true, true, NULL);
1718   free (order);
1719
1720   FOR_EACH_DEFINED_FUNCTION (node)
1721     if (!node->global.inlined_to)
1722       {
1723         if (!node->alias && node->analyzed
1724             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1725             && opt_for_fn (node->decl, optimize))
1726           {
1727             struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1728             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1729
1730             /* Do not account external functions, they will be optimized out
1731                if not inlined.  Also only count the non-cold portion of program.  */
1732             if (inline_account_function_p (node))
1733               initial_size += info->size;
1734             info->growth = estimate_growth (node);
1735
1736             int num_calls = 0;
1737             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1738                                                true);
1739             if (num_calls == 1)
1740               info->single_caller = true;
1741             if (dfs && dfs->next_cycle)
1742               {
1743                 struct cgraph_node *n2;
1744                 int id = dfs->scc_no + 1;
1745                 for (n2 = node; n2;
1746                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1747                   if (opt_for_fn (n2->decl, optimize))
1748                     {
1749                       struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1750                       if (info2->scc_no)
1751                         break;
1752                       info2->scc_no = id;
1753                     }
1754               }
1755           }
1756
1757         for (edge = node->callers; edge; edge = edge->next_caller)
1758           max_count = max_count.max (edge->count.ipa ());
1759       }
1760   ipa_free_postorder_info ();
1761   initialize_growth_caches ();
1762
1763   if (dump_file)
1764     fprintf (dump_file,
1765              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1766              initial_size);
1767
1768   overall_size = initial_size;
1769   max_size = compute_max_insns (overall_size);
1770   min_size = overall_size;
1771
1772   /* Populate the heap with all edges we might inline.  */
1773
1774   FOR_EACH_DEFINED_FUNCTION (node)
1775     {
1776       bool update = false;
1777       struct cgraph_edge *next = NULL;
1778       bool has_speculative = false;
1779
1780       if (!opt_for_fn (node->decl, optimize))
1781         continue;
1782
1783       if (dump_file)
1784         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1785
1786       for (edge = node->callees; edge; edge = next)
1787         {
1788           next = edge->next_callee;
1789           if (edge->inline_failed
1790               && !edge->aux
1791               && can_inline_edge_p (edge, true)
1792               && want_inline_small_function_p (edge, true)
1793               && edge->inline_failed)
1794             {
1795               gcc_assert (!edge->aux);
1796               update_edge_key (&edge_heap, edge);
1797             }
1798           if (edge->speculative)
1799             has_speculative = true;
1800         }
1801       if (has_speculative)
1802         for (edge = node->callees; edge; edge = next)
1803           if (edge->speculative && !speculation_useful_p (edge,
1804                                                           edge->aux != NULL))
1805             {
1806               edge->resolve_speculation ();
1807               update = true;
1808             }
1809       if (update)
1810         {
1811           struct cgraph_node *where = node->global.inlined_to
1812                                       ? node->global.inlined_to : node;
1813           ipa_update_overall_fn_summary (where);
1814           reset_edge_caches (where);
1815           update_caller_keys (&edge_heap, where,
1816                               updated_nodes, NULL);
1817           update_callee_keys (&edge_heap, where,
1818                               updated_nodes);
1819           bitmap_clear (updated_nodes);
1820         }
1821     }
1822
1823   gcc_assert (in_lto_p
1824               || !(max_count > 0)
1825               || (profile_info && flag_branch_probabilities));
1826
1827   while (!edge_heap.empty ())
1828     {
1829       int old_size = overall_size;
1830       struct cgraph_node *where, *callee;
1831       sreal badness = edge_heap.min_key ();
1832       sreal current_badness;
1833       int growth;
1834
1835       edge = edge_heap.extract_min ();
1836       gcc_assert (edge->aux);
1837       edge->aux = NULL;
1838       if (!edge->inline_failed || !edge->callee->analyzed)
1839         continue;
1840
1841 #if CHECKING_P
1842       /* Be sure that caches are maintained consistent.
1843          This check is affected by scaling roundoff errors when compiling for
1844          IPA this we skip it in that case.  */
1845       if (!edge->callee->count.ipa_p ()
1846           && (!max_count.initialized_p () || !max_count.nonzero_p ()))
1847         {
1848           sreal cached_badness = edge_badness (edge, false);
1849
1850           int old_size_est = estimate_edge_size (edge);
1851           sreal old_time_est = estimate_edge_time (edge);
1852           int old_hints_est = estimate_edge_hints (edge);
1853
1854           reset_edge_growth_cache (edge);
1855           gcc_assert (old_size_est == estimate_edge_size (edge));
1856           gcc_assert (old_time_est == estimate_edge_time (edge));
1857           /* FIXME:
1858
1859              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1860
1861              fails with profile feedback because some hints depends on
1862              maybe_hot_edge_p predicate and because callee gets inlined to other
1863              calls, the edge may become cold.
1864              This ought to be fixed by computing relative probabilities
1865              for given invocation but that will be better done once whole
1866              code is converted to sreals.  Disable for now and revert to "wrong"
1867              value so enable/disable checking paths agree.  */
1868           edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1869
1870           /* When updating the edge costs, we only decrease badness in the keys.
1871              Increases of badness are handled lazilly; when we see key with out
1872              of date value on it, we re-insert it now.  */
1873           current_badness = edge_badness (edge, false);
1874           gcc_assert (cached_badness == current_badness);
1875           gcc_assert (current_badness >= badness);
1876         }
1877       else
1878         current_badness = edge_badness (edge, false);
1879 #else
1880       current_badness = edge_badness (edge, false);
1881 #endif
1882       if (current_badness != badness)
1883         {
1884           if (edge_heap.min () && current_badness > edge_heap.min_key ())
1885             {
1886               edge->aux = edge_heap.insert (current_badness, edge);
1887               continue;
1888             }
1889           else
1890             badness = current_badness;
1891         }
1892
1893       if (!can_inline_edge_p (edge, true))
1894         {
1895           resolve_noninline_speculation (&edge_heap, edge);
1896           continue;
1897         }
1898
1899       callee = edge->callee->ultimate_alias_target ();
1900       growth = estimate_edge_growth (edge);
1901       if (dump_file)
1902         {
1903           fprintf (dump_file,
1904                    "\nConsidering %s with %i size\n",
1905                    callee->dump_name (),
1906                    ipa_fn_summaries->get (callee)->size);
1907           fprintf (dump_file,
1908                    " to be inlined into %s in %s:%i\n"
1909                    " Estimated badness is %f, frequency %.2f.\n",
1910                    edge->caller->dump_name (),
1911                    edge->call_stmt
1912                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
1913                                                         edge->call_stmt))
1914                        > BUILTINS_LOCATION)
1915                    ? gimple_filename ((const gimple *) edge->call_stmt)
1916                    : "unknown",
1917                    edge->call_stmt
1918                    ? gimple_lineno ((const gimple *) edge->call_stmt)
1919                    : -1,
1920                    badness.to_double (),
1921                    edge->sreal_frequency ().to_double ());
1922           if (edge->count.ipa ().initialized_p ())
1923             {
1924               fprintf (dump_file, " Called ");
1925               edge->count.ipa ().dump (dump_file);
1926               fprintf (dump_file, " times\n");
1927             }
1928           if (dump_flags & TDF_DETAILS)
1929             edge_badness (edge, true);
1930         }
1931
1932       if (overall_size + growth > max_size
1933           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1934         {
1935           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1936           report_inline_failed_reason (edge);
1937           resolve_noninline_speculation (&edge_heap, edge);
1938           continue;
1939         }
1940
1941       if (!want_inline_small_function_p (edge, true))
1942         {
1943           resolve_noninline_speculation (&edge_heap, edge);
1944           continue;
1945         }
1946
1947       /* Heuristics for inlining small functions work poorly for
1948          recursive calls where we do effects similar to loop unrolling.
1949          When inlining such edge seems profitable, leave decision on
1950          specific inliner.  */
1951       if (edge->recursive_p ())
1952         {
1953           where = edge->caller;
1954           if (where->global.inlined_to)
1955             where = where->global.inlined_to;
1956           if (!recursive_inlining (edge,
1957                                    opt_for_fn (edge->caller->decl,
1958                                                flag_indirect_inlining)
1959                                    ? &new_indirect_edges : NULL))
1960             {
1961               edge->inline_failed = CIF_RECURSIVE_INLINING;
1962               resolve_noninline_speculation (&edge_heap, edge);
1963               continue;
1964             }
1965           reset_edge_caches (where);
1966           /* Recursive inliner inlines all recursive calls of the function
1967              at once. Consequently we need to update all callee keys.  */
1968           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1969             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1970           update_callee_keys (&edge_heap, where, updated_nodes);
1971           bitmap_clear (updated_nodes);
1972         }
1973       else
1974         {
1975           struct cgraph_node *outer_node = NULL;
1976           int depth = 0;
1977
1978           /* Consider the case where self recursive function A is inlined
1979              into B.  This is desired optimization in some cases, since it
1980              leads to effect similar of loop peeling and we might completely
1981              optimize out the recursive call.  However we must be extra
1982              selective.  */
1983
1984           where = edge->caller;
1985           while (where->global.inlined_to)
1986             {
1987               if (where->decl == callee->decl)
1988                 outer_node = where, depth++;
1989               where = where->callers->caller;
1990             }
1991           if (outer_node
1992               && !want_inline_self_recursive_call_p (edge, outer_node,
1993                                                      true, depth))
1994             {
1995               edge->inline_failed
1996                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1997                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1998               resolve_noninline_speculation (&edge_heap, edge);
1999               continue;
2000             }
2001           else if (depth && dump_file)
2002             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2003
2004           gcc_checking_assert (!callee->global.inlined_to);
2005           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2006           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2007
2008           reset_edge_caches (edge->callee);
2009
2010           update_callee_keys (&edge_heap, where, updated_nodes);
2011         }
2012       where = edge->caller;
2013       if (where->global.inlined_to)
2014         where = where->global.inlined_to;
2015
2016       /* Our profitability metric can depend on local properties
2017          such as number of inlinable calls and size of the function body.
2018          After inlining these properties might change for the function we
2019          inlined into (since it's body size changed) and for the functions
2020          called by function we inlined (since number of it inlinable callers
2021          might change).  */
2022       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2023       /* Offline copy count has possibly changed, recompute if profile is
2024          available.  */
2025       struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2026       if (n != edge->callee && n->analyzed && n->count.ipa ().initialized_p ())
2027         update_callee_keys (&edge_heap, n, updated_nodes);
2028       bitmap_clear (updated_nodes);
2029
2030       if (dump_file)
2031         {
2032           fprintf (dump_file,
2033                    " Inlined %s into %s which now has time %f and size %i, "
2034                    "net change of %+i.\n",
2035                    xstrdup_for_dump (edge->callee->name ()),
2036                    xstrdup_for_dump (edge->caller->name ()),
2037                    ipa_fn_summaries->get (edge->caller)->time.to_double (),
2038                    ipa_fn_summaries->get (edge->caller)->size,
2039                    overall_size - old_size);
2040         }
2041       if (min_size > overall_size)
2042         {
2043           min_size = overall_size;
2044           max_size = compute_max_insns (min_size);
2045
2046           if (dump_file)
2047             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2048         }
2049     }
2050
2051   free_growth_caches ();
2052   if (dump_file)
2053     fprintf (dump_file,
2054              "Unit growth for small function inlining: %i->%i (%i%%)\n",
2055              initial_size, overall_size,
2056              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2057   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2058 }
2059
2060 /* Flatten NODE.  Performed both during early inlining and
2061    at IPA inlining time.  */
2062
2063 static void
2064 flatten_function (struct cgraph_node *node, bool early)
2065 {
2066   struct cgraph_edge *e;
2067
2068   /* We shouldn't be called recursively when we are being processed.  */
2069   gcc_assert (node->aux == NULL);
2070
2071   node->aux = (void *) node;
2072
2073   for (e = node->callees; e; e = e->next_callee)
2074     {
2075       struct cgraph_node *orig_callee;
2076       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2077
2078       /* We've hit cycle?  It is time to give up.  */
2079       if (callee->aux)
2080         {
2081           if (dump_file)
2082             fprintf (dump_file,
2083                      "Not inlining %s into %s to avoid cycle.\n",
2084                      xstrdup_for_dump (callee->name ()),
2085                      xstrdup_for_dump (e->caller->name ()));
2086           if (cgraph_inline_failed_type (e->inline_failed) != CIF_FINAL_ERROR)
2087             e->inline_failed = CIF_RECURSIVE_INLINING;
2088           continue;
2089         }
2090
2091       /* When the edge is already inlined, we just need to recurse into
2092          it in order to fully flatten the leaves.  */
2093       if (!e->inline_failed)
2094         {
2095           flatten_function (callee, early);
2096           continue;
2097         }
2098
2099       /* Flatten attribute needs to be processed during late inlining. For
2100          extra code quality we however do flattening during early optimization,
2101          too.  */
2102       if (!early
2103           ? !can_inline_edge_p (e, true)
2104           : !can_early_inline_edge_p (e))
2105         continue;
2106
2107       if (e->recursive_p ())
2108         {
2109           if (dump_file)
2110             fprintf (dump_file, "Not inlining: recursive call.\n");
2111           continue;
2112         }
2113
2114       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2115           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2116         {
2117           if (dump_file)
2118             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2119           continue;
2120         }
2121
2122       /* Inline the edge and flatten the inline clone.  Avoid
2123          recursing through the original node if the node was cloned.  */
2124       if (dump_file)
2125         fprintf (dump_file, " Inlining %s into %s.\n",
2126                  xstrdup_for_dump (callee->name ()),
2127                  xstrdup_for_dump (e->caller->name ()));
2128       orig_callee = callee;
2129       inline_call (e, true, NULL, NULL, false);
2130       if (e->callee != orig_callee)
2131         orig_callee->aux = (void *) node;
2132       flatten_function (e->callee, early);
2133       if (e->callee != orig_callee)
2134         orig_callee->aux = NULL;
2135     }
2136
2137   node->aux = NULL;
2138   if (!node->global.inlined_to)
2139     ipa_update_overall_fn_summary (node);
2140 }
2141
2142 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2143    DATA points to number of calls originally found so we avoid infinite
2144    recursion.  */
2145
2146 static bool
2147 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2148                          hash_set<cgraph_node *> *callers)
2149 {
2150   int *num_calls = (int *)data;
2151   bool callee_removed = false;
2152
2153   while (node->callers && !node->global.inlined_to)
2154     {
2155       struct cgraph_node *caller = node->callers->caller;
2156
2157       if (!can_inline_edge_p (node->callers, true)
2158           || node->callers->recursive_p ())
2159         {
2160           if (dump_file)
2161             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2162           *num_calls = 0;
2163           return false;
2164         }
2165
2166       if (dump_file)
2167         {
2168           fprintf (dump_file,
2169                    "\nInlining %s size %i.\n",
2170                    node->name (),
2171                    ipa_fn_summaries->get (node)->size);
2172           fprintf (dump_file,
2173                    " Called once from %s %i insns.\n",
2174                    node->callers->caller->name (),
2175                    ipa_fn_summaries->get (node->callers->caller)->size);
2176         }
2177
2178       /* Remember which callers we inlined to, delaying updating the
2179          overall summary.  */
2180       callers->add (node->callers->caller);
2181       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2182       if (dump_file)
2183         fprintf (dump_file,
2184                  " Inlined into %s which now has %i size\n",
2185                  caller->name (),
2186                  ipa_fn_summaries->get (caller)->size);
2187       if (!(*num_calls)--)
2188         {
2189           if (dump_file)
2190             fprintf (dump_file, "New calls found; giving up.\n");
2191           return callee_removed;
2192         }
2193       if (callee_removed)
2194         return true;
2195     }
2196   return false;
2197 }
2198
2199 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2200    update.  */
2201
2202 static bool
2203 inline_to_all_callers (struct cgraph_node *node, void *data)
2204 {
2205   hash_set<cgraph_node *> callers;
2206   bool res = inline_to_all_callers_1 (node, data, &callers);
2207   /* Perform the delayed update of the overall summary of all callers
2208      processed.  This avoids quadratic behavior in the cases where
2209      we have a lot of calls to the same function.  */
2210   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2211        i != callers.end (); ++i)
2212     ipa_update_overall_fn_summary (*i);
2213   return res;
2214 }
2215
2216 /* Output overall time estimate.  */
2217 static void
2218 dump_overall_stats (void)
2219 {
2220   sreal sum_weighted = 0, sum = 0;
2221   struct cgraph_node *node;
2222
2223   FOR_EACH_DEFINED_FUNCTION (node)
2224     if (!node->global.inlined_to
2225         && !node->alias)
2226       {
2227         sreal time = ipa_fn_summaries->get (node)->time;
2228         sum += time;
2229         if (node->count.ipa ().initialized_p ())
2230           sum_weighted += time * node->count.ipa ().to_gcov_type ();
2231       }
2232   fprintf (dump_file, "Overall time estimate: "
2233            "%f weighted by profile: "
2234            "%f\n", sum.to_double (), sum_weighted.to_double ());
2235 }
2236
2237 /* Output some useful stats about inlining.  */
2238
2239 static void
2240 dump_inline_stats (void)
2241 {
2242   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2243   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2244   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2245   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2246   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2247   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2248   int64_t reason[CIF_N_REASONS][2];
2249   sreal reason_freq[CIF_N_REASONS];
2250   int i;
2251   struct cgraph_node *node;
2252
2253   memset (reason, 0, sizeof (reason));
2254   for (i=0; i < CIF_N_REASONS; i++)
2255     reason_freq[i] = 0;
2256   FOR_EACH_DEFINED_FUNCTION (node)
2257   {
2258     struct cgraph_edge *e;
2259     for (e = node->callees; e; e = e->next_callee)
2260       {
2261         if (e->inline_failed)
2262           {
2263             if (e->count.ipa ().initialized_p ())
2264               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2265             reason_freq[(int) e->inline_failed] += e->sreal_frequency ();
2266             reason[(int) e->inline_failed][1] ++;
2267             if (DECL_VIRTUAL_P (e->callee->decl)
2268                 && e->count.ipa ().initialized_p ())
2269               {
2270                 if (e->indirect_inlining_edge)
2271                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2272                 else
2273                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2274               }
2275             else if (e->count.ipa ().initialized_p ())
2276               {
2277                 if (e->indirect_inlining_edge)
2278                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2279                 else
2280                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2281               }
2282           }
2283         else if (e->count.ipa ().initialized_p ())
2284           {
2285             if (e->speculative)
2286               {
2287                 if (DECL_VIRTUAL_P (e->callee->decl))
2288                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2289                 else
2290                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2291               }
2292             else if (DECL_VIRTUAL_P (e->callee->decl))
2293               {
2294                 if (e->indirect_inlining_edge)
2295                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2296                 else
2297                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2298               }
2299             else
2300               {
2301                 if (e->indirect_inlining_edge)
2302                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2303                 else
2304                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2305               }
2306           }
2307       }
2308     for (e = node->indirect_calls; e; e = e->next_callee)
2309       if (e->indirect_info->polymorphic
2310           & e->count.ipa ().initialized_p ())
2311         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2312       else if (e->count.ipa ().initialized_p ())
2313         indirect_cnt += e->count.ipa ().to_gcov_type ();
2314   }
2315   if (max_count.initialized_p ())
2316     {
2317       fprintf (dump_file,
2318                "Inlined %" PRId64 " + speculative "
2319                "%" PRId64 " + speculative polymorphic "
2320                "%" PRId64 " + previously indirect "
2321                "%" PRId64 " + virtual "
2322                "%" PRId64 " + virtual and previously indirect "
2323                "%" PRId64 "\n" "Not inlined "
2324                "%" PRId64 " + previously indirect "
2325                "%" PRId64 " + virtual "
2326                "%" PRId64 " + virtual and previously indirect "
2327                "%" PRId64 " + stil indirect "
2328                "%" PRId64 " + still indirect polymorphic "
2329                "%" PRId64 "\n", inlined_cnt,
2330                inlined_speculative, inlined_speculative_ply,
2331                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2332                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2333                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2334       fprintf (dump_file, "Removed speculations ");
2335       spec_rem.dump (dump_file);
2336       fprintf (dump_file, "\n");
2337     }
2338   dump_overall_stats ();
2339   fprintf (dump_file, "\nWhy inlining failed?\n");
2340   for (i = 0; i < CIF_N_REASONS; i++)
2341     if (reason[i][1])
2342       fprintf (dump_file, "%-50s: %8i calls, %8f freq, %" PRId64" count\n",
2343                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2344                (int) reason[i][1], reason_freq[i].to_double (), reason[i][0]);
2345 }
2346
2347 /* Called when node is removed.  */
2348
2349 static void
2350 flatten_remove_node_hook (struct cgraph_node *node, void *data)
2351 {
2352   if (lookup_attribute ("flatten", DECL_ATTRIBUTES (node->decl)) == NULL)
2353     return;
2354
2355   hash_set<struct cgraph_node *> *removed
2356     = (hash_set<struct cgraph_node *> *) data;
2357   removed->add (node);
2358 }
2359
2360 /* Decide on the inlining.  We do so in the topological order to avoid
2361    expenses on updating data structures.  */
2362
2363 static unsigned int
2364 ipa_inline (void)
2365 {
2366   struct cgraph_node *node;
2367   int nnodes;
2368   struct cgraph_node **order;
2369   int i, j;
2370   int cold;
2371   bool remove_functions = false;
2372
2373   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2374
2375   if (dump_file)
2376     ipa_dump_fn_summaries (dump_file);
2377
2378   nnodes = ipa_reverse_postorder (order);
2379   spec_rem = profile_count::zero ();
2380
2381   FOR_EACH_FUNCTION (node)
2382     {
2383       node->aux = 0;
2384
2385       /* Recompute the default reasons for inlining because they may have
2386          changed during merging.  */
2387       if (in_lto_p)
2388         {
2389           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2390             {
2391               gcc_assert (e->inline_failed);
2392               initialize_inline_failed (e);
2393             }
2394           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2395             initialize_inline_failed (e);
2396         }
2397     }
2398
2399   if (dump_file)
2400     fprintf (dump_file, "\nFlattening functions:\n");
2401
2402   /* First shrink order array, so that it only contains nodes with
2403      flatten attribute.  */
2404   for (i = nnodes - 1, j = i; i >= 0; i--)
2405     {
2406       node = order[i];
2407       if (lookup_attribute ("flatten",
2408                             DECL_ATTRIBUTES (node->decl)) != NULL)
2409         order[j--] = order[i];
2410     }
2411
2412   /* After the above loop, order[j + 1] ... order[nnodes - 1] contain
2413      nodes with flatten attribute.  If there is more than one such
2414      node, we need to register a node removal hook, as flatten_function
2415      could remove other nodes with flatten attribute.  See PR82801.  */
2416   struct cgraph_node_hook_list *node_removal_hook_holder = NULL;
2417   hash_set<struct cgraph_node *> *flatten_removed_nodes = NULL;
2418   if (j < nnodes - 2)
2419     {
2420       flatten_removed_nodes = new hash_set<struct cgraph_node *>;
2421       node_removal_hook_holder
2422         = symtab->add_cgraph_removal_hook (&flatten_remove_node_hook,
2423                                            flatten_removed_nodes);
2424     }
2425
2426   /* In the first pass handle functions to be flattened.  Do this with
2427      a priority so none of our later choices will make this impossible.  */
2428   for (i = nnodes - 1; i > j; i--)
2429     {
2430       node = order[i];
2431       if (flatten_removed_nodes
2432           && flatten_removed_nodes->contains (node))
2433         continue;
2434
2435       /* Handle nodes to be flattened.
2436          Ideally when processing callees we stop inlining at the
2437          entry of cycles, possibly cloning that entry point and
2438          try to flatten itself turning it into a self-recursive
2439          function.  */
2440       if (dump_file)
2441         fprintf (dump_file, "Flattening %s\n", node->name ());
2442       flatten_function (node, false);
2443     }
2444
2445   if (j < nnodes - 2)
2446     {
2447       symtab->remove_cgraph_removal_hook (node_removal_hook_holder);
2448       delete flatten_removed_nodes;
2449     }
2450   free (order);
2451
2452   if (dump_file)
2453     dump_overall_stats ();
2454
2455   inline_small_functions ();
2456
2457   gcc_assert (symtab->state == IPA_SSA);
2458   symtab->state = IPA_SSA_AFTER_INLINING;
2459   /* Do first after-inlining removal.  We want to remove all "stale" extern
2460      inline functions and virtual functions so we really know what is called
2461      once.  */
2462   symtab->remove_unreachable_nodes (dump_file);
2463
2464   /* Inline functions with a property that after inlining into all callers the
2465      code size will shrink because the out-of-line copy is eliminated.
2466      We do this regardless on the callee size as long as function growth limits
2467      are met.  */
2468   if (dump_file)
2469     fprintf (dump_file,
2470              "\nDeciding on functions to be inlined into all callers and "
2471              "removing useless speculations:\n");
2472
2473   /* Inlining one function called once has good chance of preventing
2474      inlining other function into the same callee.  Ideally we should
2475      work in priority order, but probably inlining hot functions first
2476      is good cut without the extra pain of maintaining the queue.
2477
2478      ??? this is not really fitting the bill perfectly: inlining function
2479      into callee often leads to better optimization of callee due to
2480      increased context for optimization.
2481      For example if main() function calls a function that outputs help
2482      and then function that does the main optmization, we should inline
2483      the second with priority even if both calls are cold by themselves.
2484
2485      We probably want to implement new predicate replacing our use of
2486      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2487      to be hot.  */
2488   for (cold = 0; cold <= 1; cold ++)
2489     {
2490       FOR_EACH_DEFINED_FUNCTION (node)
2491         {
2492           struct cgraph_edge *edge, *next;
2493           bool update=false;
2494
2495           if (!opt_for_fn (node->decl, optimize)
2496               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2497             continue;
2498
2499           for (edge = node->callees; edge; edge = next)
2500             {
2501               next = edge->next_callee;
2502               if (edge->speculative && !speculation_useful_p (edge, false))
2503                 {
2504                   if (edge->count.ipa ().initialized_p ())
2505                     spec_rem += edge->count.ipa ();
2506                   edge->resolve_speculation ();
2507                   update = true;
2508                   remove_functions = true;
2509                 }
2510             }
2511           if (update)
2512             {
2513               struct cgraph_node *where = node->global.inlined_to
2514                                           ? node->global.inlined_to : node;
2515               reset_edge_caches (where);
2516               ipa_update_overall_fn_summary (where);
2517             }
2518           if (want_inline_function_to_all_callers_p (node, cold))
2519             {
2520               int num_calls = 0;
2521               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2522                                                  true);
2523               while (node->call_for_symbol_and_aliases
2524                        (inline_to_all_callers, &num_calls, true))
2525                 ;
2526               remove_functions = true;
2527             }
2528         }
2529     }
2530
2531   /* Free ipa-prop structures if they are no longer needed.  */
2532   ipa_free_all_structures_after_iinln ();
2533
2534   if (dump_file)
2535     {
2536       fprintf (dump_file,
2537                "\nInlined %i calls, eliminated %i functions\n\n",
2538                ncalls_inlined, nfunctions_inlined);
2539       dump_inline_stats ();
2540     }
2541
2542   if (dump_file)
2543     ipa_dump_fn_summaries (dump_file);
2544   return remove_functions ? TODO_remove_functions : 0;
2545 }
2546
2547 /* Inline always-inline function calls in NODE.  */
2548
2549 static bool
2550 inline_always_inline_functions (struct cgraph_node *node)
2551 {
2552   struct cgraph_edge *e;
2553   bool inlined = false;
2554
2555   for (e = node->callees; e; e = e->next_callee)
2556     {
2557       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2558       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2559         continue;
2560
2561       if (e->recursive_p ())
2562         {
2563           if (dump_file)
2564             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2565                      e->callee->name ());
2566           e->inline_failed = CIF_RECURSIVE_INLINING;
2567           continue;
2568         }
2569
2570       if (!can_early_inline_edge_p (e))
2571         {
2572           /* Set inlined to true if the callee is marked "always_inline" but
2573              is not inlinable.  This will allow flagging an error later in
2574              expand_call_inline in tree-inline.c.  */
2575           if (lookup_attribute ("always_inline",
2576                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2577             inlined = true;
2578           continue;
2579         }
2580
2581       if (dump_file)
2582         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2583                  xstrdup_for_dump (e->callee->name ()),
2584                  xstrdup_for_dump (e->caller->name ()));
2585       inline_call (e, true, NULL, NULL, false);
2586       inlined = true;
2587     }
2588   if (inlined)
2589     ipa_update_overall_fn_summary (node);
2590
2591   return inlined;
2592 }
2593
2594 /* Decide on the inlining.  We do so in the topological order to avoid
2595    expenses on updating data structures.  */
2596
2597 static bool
2598 early_inline_small_functions (struct cgraph_node *node)
2599 {
2600   struct cgraph_edge *e;
2601   bool inlined = false;
2602
2603   for (e = node->callees; e; e = e->next_callee)
2604     {
2605       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2606       if (!ipa_fn_summaries->get (callee)->inlinable
2607           || !e->inline_failed)
2608         continue;
2609
2610       /* Do not consider functions not declared inline.  */
2611       if (!DECL_DECLARED_INLINE_P (callee->decl)
2612           && !opt_for_fn (node->decl, flag_inline_small_functions)
2613           && !opt_for_fn (node->decl, flag_inline_functions))
2614         continue;
2615
2616       if (dump_file)
2617         fprintf (dump_file, "Considering inline candidate %s.\n",
2618                  callee->name ());
2619
2620       if (!can_early_inline_edge_p (e))
2621         continue;
2622
2623       if (e->recursive_p ())
2624         {
2625           if (dump_file)
2626             fprintf (dump_file, "  Not inlining: recursive call.\n");
2627           continue;
2628         }
2629
2630       if (!want_early_inline_function_p (e))
2631         continue;
2632
2633       if (dump_file)
2634         fprintf (dump_file, " Inlining %s into %s.\n",
2635                  xstrdup_for_dump (callee->name ()),
2636                  xstrdup_for_dump (e->caller->name ()));
2637       inline_call (e, true, NULL, NULL, false);
2638       inlined = true;
2639     }
2640
2641   if (inlined)
2642     ipa_update_overall_fn_summary (node);
2643
2644   return inlined;
2645 }
2646
2647 unsigned int
2648 early_inliner (function *fun)
2649 {
2650   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2651   struct cgraph_edge *edge;
2652   unsigned int todo = 0;
2653   int iterations = 0;
2654   bool inlined = false;
2655
2656   if (seen_error ())
2657     return 0;
2658
2659   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2660      happens when some pass decides to construct new function and
2661      cgraph_add_new_function calls lowering passes and early optimization on
2662      it.  This may confuse ourself when early inliner decide to inline call to
2663      function clone, because function clones don't have parameter list in
2664      ipa-prop matching their signature.  */
2665   if (ipa_node_params_sum)
2666     return 0;
2667
2668   if (flag_checking)
2669     node->verify ();
2670   node->remove_all_references ();
2671
2672   /* Rebuild this reference because it dosn't depend on
2673      function's body and it's required to pass cgraph_node
2674      verification.  */
2675   if (node->instrumented_version
2676       && !node->instrumentation_clone)
2677     node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2678
2679   /* Even when not optimizing or not inlining inline always-inline
2680      functions.  */
2681   inlined = inline_always_inline_functions (node);
2682
2683   if (!optimize
2684       || flag_no_inline
2685       || !flag_early_inlining
2686       /* Never inline regular functions into always-inline functions
2687          during incremental inlining.  This sucks as functions calling
2688          always inline functions will get less optimized, but at the
2689          same time inlining of functions calling always inline
2690          function into an always inline function might introduce
2691          cycles of edges to be always inlined in the callgraph.
2692
2693          We might want to be smarter and just avoid this type of inlining.  */
2694       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2695           && lookup_attribute ("always_inline",
2696                                DECL_ATTRIBUTES (node->decl))))
2697     ;
2698   else if (lookup_attribute ("flatten",
2699                              DECL_ATTRIBUTES (node->decl)) != NULL)
2700     {
2701       /* When the function is marked to be flattened, recursively inline
2702          all calls in it.  */
2703       if (dump_file)
2704         fprintf (dump_file,
2705                  "Flattening %s\n", node->name ());
2706       flatten_function (node, true);
2707       inlined = true;
2708     }
2709   else
2710     {
2711       /* If some always_inline functions was inlined, apply the changes.
2712          This way we will not account always inline into growth limits and
2713          moreover we will inline calls from always inlines that we skipped
2714          previously because of conditional above.  */
2715       if (inlined)
2716         {
2717           timevar_push (TV_INTEGRATION);
2718           todo |= optimize_inline_calls (current_function_decl);
2719           /* optimize_inline_calls call above might have introduced new
2720              statements that don't have inline parameters computed.  */
2721           for (edge = node->callees; edge; edge = edge->next_callee)
2722             {
2723               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2724               es->call_stmt_size
2725                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2726               es->call_stmt_time
2727                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2728             }
2729           ipa_update_overall_fn_summary (node);
2730           inlined = false;
2731           timevar_pop (TV_INTEGRATION);
2732         }
2733       /* We iterate incremental inlining to get trivial cases of indirect
2734          inlining.  */
2735       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2736              && early_inline_small_functions (node))
2737         {
2738           timevar_push (TV_INTEGRATION);
2739           todo |= optimize_inline_calls (current_function_decl);
2740
2741           /* Technically we ought to recompute inline parameters so the new
2742              iteration of early inliner works as expected.  We however have
2743              values approximately right and thus we only need to update edge
2744              info that might be cleared out for newly discovered edges.  */
2745           for (edge = node->callees; edge; edge = edge->next_callee)
2746             {
2747               /* We have no summary for new bound store calls yet.  */
2748               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2749               es->call_stmt_size
2750                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2751               es->call_stmt_time
2752                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2753
2754               if (edge->callee->decl
2755                   && !gimple_check_call_matching_types (
2756                       edge->call_stmt, edge->callee->decl, false))
2757                 {
2758                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2759                   edge->call_stmt_cannot_inline_p = true;
2760                 }
2761             }
2762           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2763             ipa_update_overall_fn_summary (node);
2764           timevar_pop (TV_INTEGRATION);
2765           iterations++;
2766           inlined = false;
2767         }
2768       if (dump_file)
2769         fprintf (dump_file, "Iterations: %i\n", iterations);
2770     }
2771
2772   if (inlined)
2773     {
2774       timevar_push (TV_INTEGRATION);
2775       todo |= optimize_inline_calls (current_function_decl);
2776       timevar_pop (TV_INTEGRATION);
2777     }
2778
2779   fun->always_inline_functions_inlined = true;
2780
2781   return todo;
2782 }
2783
2784 /* Do inlining of small functions.  Doing so early helps profiling and other
2785    passes to be somewhat more effective and avoids some code duplication in
2786    later real inlining pass for testcases with very many function calls.  */
2787
2788 namespace {
2789
2790 const pass_data pass_data_early_inline =
2791 {
2792   GIMPLE_PASS, /* type */
2793   "einline", /* name */
2794   OPTGROUP_INLINE, /* optinfo_flags */
2795   TV_EARLY_INLINING, /* tv_id */
2796   PROP_ssa, /* properties_required */
2797   0, /* properties_provided */
2798   0, /* properties_destroyed */
2799   0, /* todo_flags_start */
2800   0, /* todo_flags_finish */
2801 };
2802
2803 class pass_early_inline : public gimple_opt_pass
2804 {
2805 public:
2806   pass_early_inline (gcc::context *ctxt)
2807     : gimple_opt_pass (pass_data_early_inline, ctxt)
2808   {}
2809
2810   /* opt_pass methods: */
2811   virtual unsigned int execute (function *);
2812
2813 }; // class pass_early_inline
2814
2815 unsigned int
2816 pass_early_inline::execute (function *fun)
2817 {
2818   return early_inliner (fun);
2819 }
2820
2821 } // anon namespace
2822
2823 gimple_opt_pass *
2824 make_pass_early_inline (gcc::context *ctxt)
2825 {
2826   return new pass_early_inline (ctxt);
2827 }
2828
2829 namespace {
2830
2831 const pass_data pass_data_ipa_inline =
2832 {
2833   IPA_PASS, /* type */
2834   "inline", /* name */
2835   OPTGROUP_INLINE, /* optinfo_flags */
2836   TV_IPA_INLINING, /* tv_id */
2837   0, /* properties_required */
2838   0, /* properties_provided */
2839   0, /* properties_destroyed */
2840   0, /* todo_flags_start */
2841   ( TODO_dump_symtab ), /* todo_flags_finish */
2842 };
2843
2844 class pass_ipa_inline : public ipa_opt_pass_d
2845 {
2846 public:
2847   pass_ipa_inline (gcc::context *ctxt)
2848     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2849                       NULL, /* generate_summary */
2850                       NULL, /* write_summary */
2851                       NULL, /* read_summary */
2852                       NULL, /* write_optimization_summary */
2853                       NULL, /* read_optimization_summary */
2854                       NULL, /* stmt_fixup */
2855                       0, /* function_transform_todo_flags_start */
2856                       inline_transform, /* function_transform */
2857                       NULL) /* variable_transform */
2858   {}
2859
2860   /* opt_pass methods: */
2861   virtual unsigned int execute (function *) { return ipa_inline (); }
2862
2863 }; // class pass_ipa_inline
2864
2865 } // anon namespace
2866
2867 ipa_opt_pass_d *
2868 make_pass_ipa_inline (gcc::context *ctxt)
2869 {
2870   return new pass_ipa_inline (ctxt);
2871 }