gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "target.h"
  97 #include "rtl.h"
  98 #include "tree.h"
  99 #include "gimple.h"
 100 #include "alloc-pool.h"
 101 #include "tree-pass.h"
 102 #include "gimple-ssa.h"
 103 #include "cgraph.h"
 104 #include "lto-streamer.h"
 105 #include "trans-mem.h"
 106 #include "calls.h"
 107 #include "tree-inline.h"
 108 #include "params.h"
 109 #include "profile.h"
 110 #include "symbol-summary.h"
 111 #include "tree-vrp.h"
 112 #include "ipa-prop.h"
 113 #include "ipa-fnsummary.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117 #include "auto-profile.h"
 118 #include "builtins.h"
 119 #include "fibonacci_heap.h"
 120 #include "stringpool.h"
 121 #include "attribs.h"
 122 #include "asan.h"
 123
 124 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 125 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 126
 127 /* Statistics we collect about inlining algorithm.  */
 128 static int overall_size;
 129 static profile_count max_count;
 130 static profile_count spec_rem;
 131
 132 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
 133 static sreal cgraph_freq_base_rec, percent_rec;
 134
 135 /* Return false when inlining edge E would lead to violating
 136    limits on function unit growth or stack usage growth.
 137
 138    The relative function body growth limit is present generally
 139    to avoid problems with non-linear behavior of the compiler.
 140    To allow inlining huge functions into tiny wrapper, the limit
 141    is always based on the bigger of the two functions considered.
 142
 143    For stack growth limits we always base the growth in stack usage
 144    of the callers.  We want to prevent applications from segfaulting
 145    on stack overflow when functions with huge stack frames gets
 146    inlined. */
 147
 148 static bool
 149 caller_growth_limits (struct cgraph_edge *e)
 150 {
 151   struct cgraph_node *to = e->caller;
 152   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 153   int newsize;
 154   int limit = 0;
 155   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 156   ipa_fn_summary *info, *what_info, *outer_info = ipa_fn_summaries->get (to);
 157
 158   /* Look for function e->caller is inlined to.  While doing
 159      so work out the largest function body on the way.  As
 160      described above, we want to base our function growth
 161      limits based on that.  Not on the self size of the
 162      outer function, not on the self size of inline code
 163      we immediately inline to.  This is the most relaxed
 164      interpretation of the rule "do not grow large functions
 165      too much in order to prevent compiler from exploding".  */
 166   while (true)
 167     {
 168       info = ipa_fn_summaries->get (to);
 169       if (limit < info->self_size)
 170         limit = info->self_size;
 171       if (stack_size_limit < info->estimated_self_stack_size)
 172         stack_size_limit = info->estimated_self_stack_size;
 173       if (to->global.inlined_to)
 174         to = to->callers->caller;
 175       else
 176         break;
 177     }
 178
 179   what_info = ipa_fn_summaries->get (what);
 180
 181   if (limit < what_info->self_size)
 182     limit = what_info->self_size;
 183
 184   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 185
 186   /* Check the size after inlining against the function limits.  But allow
 187      the function to shrink if it went over the limits by forced inlining.  */
 188   newsize = estimate_size_after_inlining (to, e);
 189   if (newsize >= info->size
 190       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 191       && newsize > limit)
 192     {
 193       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 194       return false;
 195     }
 196
 197   if (!what_info->estimated_stack_size)
 198     return true;
 199
 200   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 201      due to large i/o datastructures used by the Fortran front-end.
 202      We ought to ignore this limit when we know that the edge is executed
 203      on every invocation of the caller (i.e. its call statement dominates
 204      exit block).  We do not track this information, yet.  */
 205   stack_size_limit += ((gcov_type)stack_size_limit
 206                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 207
 208   inlined_stack = (outer_info->stack_frame_offset
 209                    + outer_info->estimated_self_stack_size
 210                    + what_info->estimated_stack_size);
 211   /* Check new stack consumption with stack consumption at the place
 212      stack is used.  */
 213   if (inlined_stack > stack_size_limit
 214       /* If function already has large stack usage from sibling
 215          inline call, we can inline, too.
 216          This bit overoptimistically assume that we are good at stack
 217          packing.  */
 218       && inlined_stack > info->estimated_stack_size
 219       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 220     {
 221       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 222       return false;
 223     }
 224   return true;
 225 }
 226
 227 /* Dump info about why inlining has failed.  */
 228
 229 static void
 230 report_inline_failed_reason (struct cgraph_edge *e)
 231 {
 232   if (dump_file)
 233     {
 234       fprintf (dump_file, "  not inlinable: %s -> %s, %s\n",
 235                e->caller->dump_name (),
 236                e->callee->dump_name (),
 237                cgraph_inline_failed_string (e->inline_failed));
 238       if ((e->inline_failed == CIF_TARGET_OPTION_MISMATCH
 239            || e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 240           && e->caller->lto_file_data
 241           && e->callee->ultimate_alias_target ()->lto_file_data)
 242         {
 243           fprintf (dump_file, "  LTO objects: %s, %s\n",
 244                    e->caller->lto_file_data->file_name,
 245                    e->callee->ultimate_alias_target ()->lto_file_data->file_name);
 246         }
 247       if (e->inline_failed == CIF_TARGET_OPTION_MISMATCH)
 248         cl_target_option_print_diff
 249          (dump_file, 2, target_opts_for_fn (e->caller->decl),
 250           target_opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 251       if (e->inline_failed == CIF_OPTIMIZATION_MISMATCH)
 252         cl_optimization_print_diff
 253           (dump_file, 2, opts_for_fn (e->caller->decl),
 254            opts_for_fn (e->callee->ultimate_alias_target ()->decl));
 255     }
 256 }
 257
 258  /* Decide whether sanitizer-related attributes allow inlining. */
 259
 260 static bool
 261 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 262 {
 263   if (!caller || !callee)
 264     return true;
 265
 266   return sanitize_flags_p (SANITIZE_ADDRESS, caller)
 267     == sanitize_flags_p (SANITIZE_ADDRESS, callee);
 268 }
 269
 270 /* Used for flags where it is safe to inline when caller's value is
 271    grater than callee's.  */
 272 #define check_maybe_up(flag) \
 273       (opts_for_fn (caller->decl)->x_##flag             \
 274        != opts_for_fn (callee->decl)->x_##flag          \
 275        && (!always_inline                               \
 276            || opts_for_fn (caller->decl)->x_##flag      \
 277               < opts_for_fn (callee->decl)->x_##flag))
 278 /* Used for flags where it is safe to inline when caller's value is
 279    smaller than callee's.  */
 280 #define check_maybe_down(flag) \
 281       (opts_for_fn (caller->decl)->x_##flag             \
 282        != opts_for_fn (callee->decl)->x_##flag          \
 283        && (!always_inline                               \
 284            || opts_for_fn (caller->decl)->x_##flag      \
 285               > opts_for_fn (callee->decl)->x_##flag))
 286 /* Used for flags where exact match is needed for correctness.  */
 287 #define check_match(flag) \
 288       (opts_for_fn (caller->decl)->x_##flag             \
 289        != opts_for_fn (callee->decl)->x_##flag)
 290
 291  /* Decide if we can inline the edge and possibly update
 292    inline_failed reason.
 293    We check whether inlining is possible at all and whether
 294    caller growth limits allow doing so.
 295
 296    if REPORT is true, output reason to the dump file.
 297
 298    if DISREGARD_LIMITS is true, ignore size limits.*/
 299
 300 static bool
 301 can_inline_edge_p (struct cgraph_edge *e, bool report,
 302                    bool disregard_limits = false, bool early = false)
 303 {
 304   gcc_checking_assert (e->inline_failed);
 305
 306   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 307     {
 308       if (report)
 309         report_inline_failed_reason (e);
 310       return false;
 311     }
 312
 313   bool inlinable = true;
 314   enum availability avail;
 315   cgraph_node *caller = e->caller->global.inlined_to
 316                         ? e->caller->global.inlined_to : e->caller;
 317   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
 318   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
 319   tree callee_tree
 320     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 321
 322   if (!callee->definition)
 323     {
 324       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 325       inlinable = false;
 326     }
 327   if (!early && !opt_for_fn (callee->decl, optimize))
 328     {
 329       e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
 330       inlinable = false;
 331     }
 332   else if (callee->calls_comdat_local)
 333     {
 334       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 335       inlinable = false;
 336     }
 337   else if (avail <= AVAIL_INTERPOSABLE)
 338     {
 339       e->inline_failed = CIF_OVERWRITABLE;
 340       inlinable = false;
 341     }
 342   /* All edges with call_stmt_cannot_inline_p should have inline_failed
 343      initialized to one of FINAL_ERROR reasons.  */
 344   else if (e->call_stmt_cannot_inline_p)
 345     gcc_unreachable ();
 346   /* Don't inline if the functions have different EH personalities.  */
 347   else if (DECL_FUNCTION_PERSONALITY (caller->decl)
 348            && DECL_FUNCTION_PERSONALITY (callee->decl)
 349            && (DECL_FUNCTION_PERSONALITY (caller->decl)
 350                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 351     {
 352       e->inline_failed = CIF_EH_PERSONALITY;
 353       inlinable = false;
 354     }
 355   /* TM pure functions should not be inlined into non-TM_pure
 356      functions.  */
 357   else if (is_tm_pure (callee->decl) && !is_tm_pure (caller->decl))
 358     {
 359       e->inline_failed = CIF_UNSPECIFIED;
 360       inlinable = false;
 361     }
 362   /* Check compatibility of target optimization options.  */
 363   else if (!targetm.target_option.can_inline_p (caller->decl,
 364                                                 callee->decl))
 365     {
 366       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 367       inlinable = false;
 368     }
 369   else if (!ipa_fn_summaries->get (callee)->inlinable)
 370     {
 371       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 372       inlinable = false;
 373     }
 374   /* Don't inline a function with mismatched sanitization attributes. */
 375   else if (!sanitize_attrs_match_for_inline_p (caller->decl, callee->decl))
 376     {
 377       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 378       inlinable = false;
 379     }
 380   /* Check if caller growth allows the inlining.  */
 381   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 382            && !disregard_limits
 383            && !lookup_attribute ("flatten",
 384                                  DECL_ATTRIBUTES (caller->decl))
 385            && !caller_growth_limits (e))
 386     inlinable = false;
 387   /* Don't inline a function with a higher optimization level than the
 388      caller.  FIXME: this is really just tip of iceberg of handling
 389      optimization attribute.  */
 390   else if (caller_tree != callee_tree)
 391     {
 392       bool always_inline =
 393              (DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 394               && lookup_attribute ("always_inline",
 395                                    DECL_ATTRIBUTES (callee->decl)));
 396       ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
 397       ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
 398
 399      /* Until GCC 4.9 we did not check the semantics alterning flags
 400         bellow and inline across optimization boundry.
 401         Enabling checks bellow breaks several packages by refusing
 402         to inline library always_inline functions. See PR65873.
 403         Disable the check for early inlining for now until better solution
 404         is found.  */
 405      if (always_inline && early)
 406         ;
 407       /* There are some options that change IL semantics which means
 408          we cannot inline in these cases for correctness reason.
 409          Not even for always_inline declared functions.  */
 410      else if (check_match (flag_wrapv)
 411               || check_match (flag_trapv)
 412               || check_match (flag_pcc_struct_return)
 413               /* When caller or callee does FP math, be sure FP codegen flags
 414                  compatible.  */
 415               || ((caller_info->fp_expressions && callee_info->fp_expressions)
 416                   && (check_maybe_up (flag_rounding_math)
 417                       || check_maybe_up (flag_trapping_math)
 418                       || check_maybe_down (flag_unsafe_math_optimizations)
 419                       || check_maybe_down (flag_finite_math_only)
 420                       || check_maybe_up (flag_signaling_nans)
 421                       || check_maybe_down (flag_cx_limited_range)
 422                       || check_maybe_up (flag_signed_zeros)
 423                       || check_maybe_down (flag_associative_math)
 424                       || check_maybe_down (flag_reciprocal_math)
 425                       || check_maybe_down (flag_fp_int_builtin_inexact)
 426                       /* Strictly speaking only when the callee contains function
 427                          calls that may end up setting errno.  */
 428                       || check_maybe_up (flag_errno_math)))
 429               /* We do not want to make code compiled with exceptions to be
 430                  brought into a non-EH function unless we know that the callee
 431                  does not throw.
 432                  This is tracked by DECL_FUNCTION_PERSONALITY.  */
 433               || (check_maybe_up (flag_non_call_exceptions)
 434                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 435               || (check_maybe_up (flag_exceptions)
 436                   && DECL_FUNCTION_PERSONALITY (callee->decl))
 437               /* When devirtualization is diabled for callee, it is not safe
 438                  to inline it as we possibly mangled the type info.
 439                  Allow early inlining of always inlines.  */
 440               || (!early && check_maybe_down (flag_devirtualize)))
 441         {
 442           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 443           inlinable = false;
 444         }
 445       /* gcc.dg/pr43564.c.  Apply user-forced inline even at -O0.  */
 446       else if (always_inline)
 447         ;
 448       /* When user added an attribute to the callee honor it.  */
 449       else if (lookup_attribute ("optimize", DECL_ATTRIBUTES (callee->decl))
 450                && opts_for_fn (caller->decl) != opts_for_fn (callee->decl))
 451         {
 452           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 453           inlinable = false;
 454         }
 455       /* If explicit optimize attribute are not used, the mismatch is caused
 456          by different command line options used to build different units.
 457          Do not care about COMDAT functions - those are intended to be
 458          optimized with the optimization flags of module they are used in.
 459          Also do not care about mixing up size/speed optimization when
 460          DECL_DISREGARD_INLINE_LIMITS is set.  */
 461       else if ((callee->merged_comdat
 462                 && !lookup_attribute ("optimize",
 463                                       DECL_ATTRIBUTES (caller->decl)))
 464                || DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 465         ;
 466       /* If mismatch is caused by merging two LTO units with different
 467          optimizationflags we want to be bit nicer.  However never inline
 468          if one of functions is not optimized at all.  */
 469       else if (!opt_for_fn (callee->decl, optimize)
 470                || !opt_for_fn (caller->decl, optimize))
 471         {
 472           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 473           inlinable = false;
 474         }
 475       /* If callee is optimized for size and caller is not, allow inlining if
 476          code shrinks or we are in MAX_INLINE_INSNS_SINGLE limit and callee
 477          is inline (and thus likely an unified comdat).  This will allow caller
 478          to run faster.  */
 479       else if (opt_for_fn (callee->decl, optimize_size)
 480                > opt_for_fn (caller->decl, optimize_size))
 481         {
 482           int growth = estimate_edge_growth (e);
 483           if (growth > 0
 484               && (!DECL_DECLARED_INLINE_P (callee->decl)
 485                   && growth >= MAX (MAX_INLINE_INSNS_SINGLE,
 486                                     MAX_INLINE_INSNS_AUTO)))
 487             {
 488               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 489               inlinable = false;
 490             }
 491         }
 492       /* If callee is more aggressively optimized for performance than caller,
 493          we generally want to inline only cheap (runtime wise) functions.  */
 494       else if (opt_for_fn (callee->decl, optimize_size)
 495                < opt_for_fn (caller->decl, optimize_size)
 496                || (opt_for_fn (callee->decl, optimize)
 497                    > opt_for_fn (caller->decl, optimize)))
 498         {
 499           if (estimate_edge_time (e)
 500               >= 20 + ipa_call_summaries->get (e)->call_stmt_time)
 501             {
 502               e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 503               inlinable = false;
 504             }
 505         }
 506
 507     }
 508
 509   if (!inlinable && report)
 510     report_inline_failed_reason (e);
 511   return inlinable;
 512 }
 513
 514
 515 /* Return true if the edge E is inlinable during early inlining.  */
 516
 517 static bool
 518 can_early_inline_edge_p (struct cgraph_edge *e)
 519 {
 520   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 521   /* Early inliner might get called at WPA stage when IPA pass adds new
 522      function.  In this case we can not really do any of early inlining
 523      because function bodies are missing.  */
 524   if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR)
 525     return false;
 526   if (!gimple_has_body_p (callee->decl))
 527     {
 528       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 529       return false;
 530     }
 531   /* In early inliner some of callees may not be in SSA form yet
 532      (i.e. the callgraph is cyclic and we did not process
 533      the callee by early inliner, yet).  We don't have CIF code for this
 534      case; later we will re-do the decision in the real inliner.  */
 535   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 536       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 537     {
 538       if (dump_file)
 539         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 540       return false;
 541     }
 542   if (!can_inline_edge_p (e, true, false, true))
 543     return false;
 544   return true;
 545 }
 546
 547
 548 /* Return number of calls in N.  Ignore cheap builtins.  */
 549
 550 static int
 551 num_calls (struct cgraph_node *n)
 552 {
 553   struct cgraph_edge *e;
 554   int num = 0;
 555
 556   for (e = n->callees; e; e = e->next_callee)
 557     if (!is_inexpensive_builtin (e->callee->decl))
 558       num++;
 559   return num;
 560 }
 561
 562
 563 /* Return true if we are interested in inlining small function.  */
 564
 565 static bool
 566 want_early_inline_function_p (struct cgraph_edge *e)
 567 {
 568   bool want_inline = true;
 569   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 570
 571   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 572     ;
 573   /* For AutoFDO, we need to make sure that before profile summary, all
 574      hot paths' IR look exactly the same as profiled binary. As a result,
 575      in einliner, we will disregard size limit and inline those callsites
 576      that are:
 577        * inlined in the profiled binary, and
 578        * the cloned callee has enough samples to be considered "hot".  */
 579   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 580     ;
 581   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 582            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 583     {
 584       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 585       report_inline_failed_reason (e);
 586       want_inline = false;
 587     }
 588   else
 589     {
 590       int growth = estimate_edge_growth (e);
 591       int n;
 592
 593       if (growth <= 0)
 594         ;
 595       else if (!e->maybe_hot_p ()
 596                && growth > 0)
 597         {
 598           if (dump_file)
 599             fprintf (dump_file, "  will not early inline: %s->%s, "
 600                      "call is cold and code would grow by %i\n",
 601                      e->caller->dump_name (),
 602                      callee->dump_name (),
 603                      growth);
 604           want_inline = false;
 605         }
 606       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 607         {
 608           if (dump_file)
 609             fprintf (dump_file, "  will not early inline: %s->%s, "
 610                      "growth %i exceeds --param early-inlining-insns\n",
 611                      e->caller->dump_name (),
 612                      callee->dump_name (),
 613                      growth);
 614           want_inline = false;
 615         }
 616       else if ((n = num_calls (callee)) != 0
 617                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 618         {
 619           if (dump_file)
 620             fprintf (dump_file, "  will not early inline: %s->%s, "
 621                      "growth %i exceeds --param early-inlining-insns "
 622                      "divided by number of calls\n",
 623                      e->caller->dump_name (),
 624                      callee->dump_name (),
 625                      growth);
 626           want_inline = false;
 627         }
 628     }
 629   return want_inline;
 630 }
 631
 632 /* Compute time of the edge->caller + edge->callee execution when inlining
 633    does not happen.  */
 634
 635 inline sreal
 636 compute_uninlined_call_time (struct cgraph_edge *edge,
 637                              sreal uninlined_call_time)
 638 {
 639   cgraph_node *caller = (edge->caller->global.inlined_to
 640                          ? edge->caller->global.inlined_to
 641                          : edge->caller);
 642
 643   if (edge->count.ipa ().nonzero_p ()
 644       && caller->count.ipa ().nonzero_p ())
 645     uninlined_call_time *= (sreal)edge->count.ipa ().to_gcov_type ()
 646                            / caller->count.ipa ().to_gcov_type ();
 647   if (edge->frequency ())
 648     uninlined_call_time *= cgraph_freq_base_rec * edge->frequency ();
 649   else
 650     uninlined_call_time = uninlined_call_time >> 11;
 651
 652   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 653   return uninlined_call_time + caller_time;
 654 }
 655
 656 /* Same as compute_uinlined_call_time but compute time when inlining
 657    does happen.  */
 658
 659 inline sreal
 660 compute_inlined_call_time (struct cgraph_edge *edge,
 661                            sreal time)
 662 {
 663   cgraph_node *caller = (edge->caller->global.inlined_to
 664                          ? edge->caller->global.inlined_to
 665                          : edge->caller);
 666   sreal caller_time = ipa_fn_summaries->get (caller)->time;
 667
 668   if (edge->count.ipa ().nonzero_p ()
 669       && caller->count.ipa ().nonzero_p ())
 670     time *= (sreal)edge->count.to_gcov_type () / caller->count.to_gcov_type ();
 671   if (edge->frequency ())
 672     time *= cgraph_freq_base_rec * edge->frequency ();
 673   else
 674     time = time >> 11;
 675
 676   /* This calculation should match one in ipa-inline-analysis.c
 677      (estimate_edge_size_and_time).  */
 678   time -= (sreal) edge->frequency ()
 679            * ipa_call_summaries->get (edge)->call_stmt_time / CGRAPH_FREQ_BASE;
 680   time += caller_time;
 681   if (time <= 0)
 682     time = ((sreal) 1) >> 8;
 683   gcc_checking_assert (time >= 0);
 684   return time;
 685 }
 686
 687 /* Return true if the speedup for inlining E is bigger than
 688    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 689
 690 static bool
 691 big_speedup_p (struct cgraph_edge *e)
 692 {
 693   sreal unspec_time;
 694   sreal spec_time = estimate_edge_time (e, &unspec_time);
 695   sreal time = compute_uninlined_call_time (e, unspec_time);
 696   sreal inlined_time = compute_inlined_call_time (e, spec_time);
 697
 698   if (time - inlined_time
 699       > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))
 700          * percent_rec)
 701     return true;
 702   return false;
 703 }
 704
 705 /* Return true if we are interested in inlining small function.
 706    When REPORT is true, report reason to dump file.  */
 707
 708 static bool
 709 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 710 {
 711   bool want_inline = true;
 712   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 713
 714   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 715     ;
 716   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 717            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 718     {
 719       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 720       want_inline = false;
 721     }
 722   /* Do fast and conservative check if the function can be good
 723      inline candidate.  At the moment we allow inline hints to
 724      promote non-inline functions to inline and we increase
 725      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 726   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 727            && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ()))
 728            && ipa_fn_summaries->get (callee)->min_size
 729                 - ipa_call_summaries->get (e)->call_stmt_size
 730               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 731     {
 732       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 733       want_inline = false;
 734     }
 735   else if ((DECL_DECLARED_INLINE_P (callee->decl)
 736             || e->count.ipa ().nonzero_p ())
 737            && ipa_fn_summaries->get (callee)->min_size
 738                 - ipa_call_summaries->get (e)->call_stmt_size
 739               > 16 * MAX_INLINE_INSNS_SINGLE)
 740     {
 741       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 742                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 743                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 744       want_inline = false;
 745     }
 746   else
 747     {
 748       int growth = estimate_edge_growth (e);
 749       ipa_hints hints = estimate_edge_hints (e);
 750       bool big_speedup = big_speedup_p (e);
 751
 752       if (growth <= 0)
 753         ;
 754       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 755          hints suggests that inlining given function is very profitable.  */
 756       else if (DECL_DECLARED_INLINE_P (callee->decl)
 757                && growth >= MAX_INLINE_INSNS_SINGLE
 758                && ((!big_speedup
 759                     && !(hints & (INLINE_HINT_indirect_call
 760                                   | INLINE_HINT_known_hot
 761                                   | INLINE_HINT_loop_iterations
 762                                   | INLINE_HINT_array_index
 763                                   | INLINE_HINT_loop_stride)))
 764                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 765         {
 766           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 767           want_inline = false;
 768         }
 769       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 770                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 771         {
 772           /* growth_likely_positive is expensive, always test it last.  */
 773           if (growth >= MAX_INLINE_INSNS_SINGLE
 774               || growth_likely_positive (callee, growth))
 775             {
 776               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 777               want_inline = false;
 778             }
 779         }
 780       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 781          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 782          inlining given function is very profitable.  */
 783       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 784                && !big_speedup
 785                && !(hints & INLINE_HINT_known_hot)
 786                && growth >= ((hints & (INLINE_HINT_indirect_call
 787                                        | INLINE_HINT_loop_iterations
 788                                        | INLINE_HINT_array_index
 789                                        | INLINE_HINT_loop_stride))
 790                              ? MAX (MAX_INLINE_INSNS_AUTO,
 791                                     MAX_INLINE_INSNS_SINGLE)
 792                              : MAX_INLINE_INSNS_AUTO))
 793         {
 794           /* growth_likely_positive is expensive, always test it last.  */
 795           if (growth >= MAX_INLINE_INSNS_SINGLE
 796               || growth_likely_positive (callee, growth))
 797             {
 798               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 799               want_inline = false;
 800             }
 801         }
 802       /* If call is cold, do not inline when function body would grow. */
 803       else if (!e->maybe_hot_p ()
 804                && (growth >= MAX_INLINE_INSNS_SINGLE
 805                    || growth_likely_positive (callee, growth)))
 806         {
 807           e->inline_failed = CIF_UNLIKELY_CALL;
 808           want_inline = false;
 809         }
 810     }
 811   if (!want_inline && report)
 812     report_inline_failed_reason (e);
 813   return want_inline;
 814 }
 815
 816 /* EDGE is self recursive edge.
 817    We hand two cases - when function A is inlining into itself
 818    or when function A is being inlined into another inliner copy of function
 819    A within function B.
 820
 821    In first case OUTER_NODE points to the toplevel copy of A, while
 822    in the second case OUTER_NODE points to the outermost copy of A in B.
 823
 824    In both cases we want to be extra selective since
 825    inlining the call will just introduce new recursive calls to appear.  */
 826
 827 static bool
 828 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 829                                    struct cgraph_node *outer_node,
 830                                    bool peeling,
 831                                    int depth)
 832 {
 833   char const *reason = NULL;
 834   bool want_inline = true;
 835   int caller_freq = CGRAPH_FREQ_BASE;
 836   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 837
 838   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 839     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 840
 841   if (!edge->maybe_hot_p ())
 842     {
 843       reason = "recursive call is cold";
 844       want_inline = false;
 845     }
 846   else if (!outer_node->count.ipa ().nonzero_p ())
 847     {
 848       reason = "not executed in profile";
 849       want_inline = false;
 850     }
 851   else if (depth > max_depth)
 852     {
 853       reason = "--param max-inline-recursive-depth exceeded.";
 854       want_inline = false;
 855     }
 856
 857   if (outer_node->global.inlined_to)
 858     caller_freq = outer_node->callers->frequency ();
 859
 860   if (!caller_freq)
 861     {
 862       reason = "function is inlined and unlikely";
 863       want_inline = false;
 864     }
 865
 866   if (!want_inline)
 867     ;
 868   /* Inlining of self recursive function into copy of itself within other function
 869      is transformation similar to loop peeling.
 870
 871      Peeling is profitable if we can inline enough copies to make probability
 872      of actual call to the self recursive function very small.  Be sure that
 873      the probability of recursion is small.
 874
 875      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 876      This way the expected number of recision is at most max_depth.  */
 877   else if (peeling)
 878     {
 879       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 880                                          / max_depth);
 881       int i;
 882       for (i = 1; i < depth; i++)
 883         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 884       if (max_count.nonzero_p () && edge->count.ipa ().nonzero_p ()
 885           && (edge->count.ipa ().to_gcov_type () * CGRAPH_FREQ_BASE
 886               / outer_node->count.ipa ().to_gcov_type ()
 887               >= max_prob))
 888         {
 889           reason = "profile of recursive call is too large";
 890           want_inline = false;
 891         }
 892       if (!max_count.nonzero_p ()
 893           && (edge->frequency () * CGRAPH_FREQ_BASE / caller_freq
 894               >= max_prob))
 895         {
 896           reason = "frequency of recursive call is too large";
 897           want_inline = false;
 898         }
 899     }
 900   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 901      depth is large.  We reduce function call overhead and increase chances that
 902      things fit in hardware return predictor.
 903
 904      Recursive inlining might however increase cost of stack frame setup
 905      actually slowing down functions whose recursion tree is wide rather than
 906      deep.
 907
 908      Deciding reliably on when to do recursive inlining without profile feedback
 909      is tricky.  For now we disable recursive inlining when probability of self
 910      recursion is low.
 911
 912      Recursive inlining of self recursive call within loop also results in large loop
 913      depths that generally optimize badly.  We may want to throttle down inlining
 914      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 915      methods.  */
 916   else
 917     {
 918       if (max_count.nonzero_p () && edge->count.ipa ().initialized_p ()
 919           && (edge->count.ipa ().to_gcov_type () * 100
 920               / outer_node->count.ipa ().to_gcov_type ()
 921               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 922         {
 923           reason = "profile of recursive call is too small";
 924           want_inline = false;
 925         }
 926       else if ((!max_count.nonzero_p ()
 927                 || !edge->count.ipa ().initialized_p ())
 928                && (edge->frequency () * 100 / caller_freq
 929                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 930         {
 931           reason = "frequency of recursive call is too small";
 932           want_inline = false;
 933         }
 934     }
 935   if (!want_inline && dump_file)
 936     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 937   return want_inline;
 938 }
 939
 940 /* Return true when NODE has uninlinable caller;
 941    set HAS_HOT_CALL if it has hot call.
 942    Worker for cgraph_for_node_and_aliases.  */
 943
 944 static bool
 945 check_callers (struct cgraph_node *node, void *has_hot_call)
 946 {
 947   struct cgraph_edge *e;
 948    for (e = node->callers; e; e = e->next_caller)
 949      {
 950        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once)
 951            || !opt_for_fn (e->caller->decl, optimize))
 952          return true;
 953        if (!can_inline_edge_p (e, true))
 954          return true;
 955        if (e->recursive_p ())
 956          return true;
 957        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 958          *(bool *)has_hot_call = true;
 959      }
 960   return false;
 961 }
 962
 963 /* If NODE has a caller, return true.  */
 964
 965 static bool
 966 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 967 {
 968   if (node->callers)
 969     return true;
 970   return false;
 971 }
 972
 973 /* Decide if inlining NODE would reduce unit size by eliminating
 974    the offline copy of function.
 975    When COLD is true the cold calls are considered, too.  */
 976
 977 static bool
 978 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 979 {
 980   bool has_hot_call = false;
 981
 982   /* Aliases gets inlined along with the function they alias.  */
 983   if (node->alias)
 984     return false;
 985   /* Already inlined?  */
 986   if (node->global.inlined_to)
 987     return false;
 988   /* Does it have callers?  */
 989   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
 990     return false;
 991   /* Inlining into all callers would increase size?  */
 992   if (estimate_growth (node) > 0)
 993     return false;
 994   /* All inlines must be possible.  */
 995   if (node->call_for_symbol_and_aliases (check_callers, &has_hot_call,
 996                                          true))
 997     return false;
 998   if (!cold && !has_hot_call)
 999     return false;
1000   return true;
1001 }
1002
1003 /* A cost model driving the inlining heuristics in a way so the edges with
1004    smallest badness are inlined first.  After each inlining is performed
1005    the costs of all caller edges of nodes affected are recomputed so the
1006    metrics may accurately depend on values such as number of inlinable callers
1007    of the function or function body size.  */
1008
1009 static sreal
1010 edge_badness (struct cgraph_edge *edge, bool dump)
1011 {
1012   sreal badness;
1013   int growth;
1014   sreal edge_time, unspec_edge_time;
1015   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1016   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
1017   ipa_hints hints;
1018   cgraph_node *caller = (edge->caller->global.inlined_to
1019                          ? edge->caller->global.inlined_to
1020                          : edge->caller);
1021
1022   growth = estimate_edge_growth (edge);
1023   edge_time = estimate_edge_time (edge, &unspec_edge_time);
1024   hints = estimate_edge_hints (edge);
1025   gcc_checking_assert (edge_time >= 0);
1026   /* Check that inlined time is better, but tolerate some roundoff issues.
1027      FIXME: When callee profile drops to 0 we account calls more.  This
1028      should be fixed by never doing that.  */
1029   gcc_checking_assert ((edge_time - callee_info->time).to_int () <= 0
1030                         || callee->count.ipa ().initialized_p ());
1031   gcc_checking_assert (growth <= callee_info->size);
1032
1033   if (dump)
1034     {
1035       fprintf (dump_file, "    Badness calculation for %s -> %s\n",
1036                edge->caller->dump_name (),
1037                edge->callee->dump_name ());
1038       fprintf (dump_file, "      size growth %i, time %f unspec %f ",
1039                growth,
1040                edge_time.to_double (),
1041                unspec_edge_time.to_double ());
1042       ipa_dump_hints (dump_file, hints);
1043       if (big_speedup_p (edge))
1044         fprintf (dump_file, " big_speedup");
1045       fprintf (dump_file, "\n");
1046     }
1047
1048   /* Always prefer inlining saving code size.  */
1049   if (growth <= 0)
1050     {
1051       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
1052       if (dump)
1053         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
1054                  growth);
1055     }
1056    /* Inlining into EXTERNAL functions is not going to change anything unless
1057       they are themselves inlined.  */
1058    else if (DECL_EXTERNAL (caller->decl))
1059     {
1060       if (dump)
1061         fprintf (dump_file, "      max: function is external\n");
1062       return sreal::max ();
1063     }
1064   /* When profile is available. Compute badness as:
1065
1066                  time_saved * caller_count
1067      goodness =  -------------------------------------------------
1068                  growth_of_caller * overall_growth * combined_size
1069
1070      badness = - goodness
1071
1072      Again use negative value to make calls with profile appear hotter
1073      then calls without.
1074   */
1075   else if (opt_for_fn (caller->decl, flag_guess_branch_prob)
1076            || caller->count.ipa ().nonzero_p ())
1077     {
1078       sreal numerator, denominator;
1079       int overall_growth;
1080       sreal inlined_time = compute_inlined_call_time (edge, edge_time);
1081
1082       numerator = (compute_uninlined_call_time (edge, unspec_edge_time)
1083                    - inlined_time);
1084       if (numerator == 0)
1085         numerator = ((sreal) 1 >> 8);
1086       if (caller->count.ipa ().nonzero_p ())
1087         numerator *= caller->count.ipa ().to_gcov_type ();
1088       else if (caller->count.ipa ().initialized_p ())
1089         numerator = numerator >> 11;
1090       denominator = growth;
1091
1092       overall_growth = callee_info->growth;
1093
1094       /* Look for inliner wrappers of the form:
1095
1096          inline_caller ()
1097            {
1098              do_fast_job...
1099              if (need_more_work)
1100                noninline_callee ();
1101            }
1102          Withhout panilizing this case, we usually inline noninline_callee
1103          into the inline_caller because overall_growth is small preventing
1104          further inlining of inline_caller.
1105
1106          Penalize only callgraph edges to functions with small overall
1107          growth ...
1108         */
1109       if (growth > overall_growth
1110           /* ... and having only one caller which is not inlined ... */
1111           && callee_info->single_caller
1112           && !edge->caller->global.inlined_to
1113           /* ... and edges executed only conditionally ... */
1114           && edge->frequency () < CGRAPH_FREQ_BASE
1115           /* ... consider case where callee is not inline but caller is ... */
1116           && ((!DECL_DECLARED_INLINE_P (edge->callee->decl)
1117                && DECL_DECLARED_INLINE_P (caller->decl))
1118               /* ... or when early optimizers decided to split and edge
1119                  frequency still indicates splitting is a win ... */
1120               || (callee->split_part && !caller->split_part
1121                   && edge->frequency ()
1122                      < CGRAPH_FREQ_BASE
1123                        * PARAM_VALUE
1124                           (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100
1125                   /* ... and do not overwrite user specified hints.   */
1126                   && (!DECL_DECLARED_INLINE_P (edge->callee->decl)
1127                       || DECL_DECLARED_INLINE_P (caller->decl)))))
1128         {
1129           struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (caller);
1130           int caller_growth = caller_info->growth;
1131
1132           /* Only apply the penalty when caller looks like inline candidate,
1133              and it is not called once and.  */
1134           if (!caller_info->single_caller && overall_growth < caller_growth
1135               && caller_info->inlinable
1136               && caller_info->size
1137                  < (DECL_DECLARED_INLINE_P (caller->decl)
1138                     ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
1139             {
1140               if (dump)
1141                 fprintf (dump_file,
1142                          "     Wrapper penalty. Increasing growth %i to %i\n",
1143                          overall_growth, caller_growth);
1144               overall_growth = caller_growth;
1145             }
1146         }
1147       if (overall_growth > 0)
1148         {
1149           /* Strongly preffer functions with few callers that can be inlined
1150              fully.  The square root here leads to smaller binaries at average.
1151              Watch however for extreme cases and return to linear function
1152              when growth is large.  */
1153           if (overall_growth < 256)
1154             overall_growth *= overall_growth;
1155           else
1156             overall_growth += 256 * 256 - 256;
1157           denominator *= overall_growth;
1158         }
1159       denominator *= inlined_time;
1160
1161       badness = - numerator / denominator;
1162
1163       if (dump)
1164         {
1165           fprintf (dump_file,
1166                    "      %f: guessed profile. frequency %f, count %" PRId64
1167                    " caller count %" PRId64
1168                    " time w/o inlining %f, time with inlining %f"
1169                    " overall growth %i (current) %i (original)"
1170                    " %i (compensated)\n",
1171                    badness.to_double (),
1172                   (double)edge->frequency () / CGRAPH_FREQ_BASE,
1173                    edge->count.ipa ().initialized_p () ? edge->count.ipa ().to_gcov_type () : -1,
1174                    caller->count.ipa ().initialized_p () ? caller->count.ipa ().to_gcov_type () : -1,
1175                    compute_uninlined_call_time (edge,
1176                                                 unspec_edge_time).to_double (),
1177                    compute_inlined_call_time (edge, edge_time).to_double (),
1178                    estimate_growth (callee),
1179                    callee_info->growth, overall_growth);
1180         }
1181     }
1182   /* When function local profile is not available or it does not give
1183      useful information (ie frequency is zero), base the cost on
1184      loop nest and overall size growth, so we optimize for overall number
1185      of functions fully inlined in program.  */
1186   else
1187     {
1188       int nest = MIN (ipa_call_summaries->get (edge)->loop_depth, 8);
1189       badness = growth;
1190
1191       /* Decrease badness if call is nested.  */
1192       if (badness > 0)
1193         badness = badness >> nest;
1194       else
1195         badness = badness << nest;
1196       if (dump)
1197         fprintf (dump_file, "      %f: no profile. nest %i\n",
1198                  badness.to_double (), nest);
1199     }
1200   gcc_checking_assert (badness != 0);
1201
1202   if (edge->recursive_p ())
1203     badness = badness.shift (badness > 0 ? 4 : -4);
1204   if ((hints & (INLINE_HINT_indirect_call
1205                 | INLINE_HINT_loop_iterations
1206                 | INLINE_HINT_array_index
1207                 | INLINE_HINT_loop_stride))
1208       || callee_info->growth <= 0)
1209     badness = badness.shift (badness > 0 ? -2 : 2);
1210   if (hints & (INLINE_HINT_same_scc))
1211     badness = badness.shift (badness > 0 ? 3 : -3);
1212   else if (hints & (INLINE_HINT_in_scc))
1213     badness = badness.shift (badness > 0 ? 2 : -2);
1214   else if (hints & (INLINE_HINT_cross_module))
1215     badness = badness.shift (badness > 0 ? 1 : -1);
1216   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1217     badness = badness.shift (badness > 0 ? -4 : 4);
1218   else if ((hints & INLINE_HINT_declared_inline))
1219     badness = badness.shift (badness > 0 ? -3 : 3);
1220   if (dump)
1221     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1222   return badness;
1223 }
1224
1225 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1226 static inline void
1227 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1228 {
1229   sreal badness = edge_badness (edge, false);
1230   if (edge->aux)
1231     {
1232       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1233       gcc_checking_assert (n->get_data () == edge);
1234
1235       /* fibonacci_heap::replace_key does busy updating of the
1236          heap that is unnecesarily expensive.
1237          We do lazy increases: after extracting minimum if the key
1238          turns out to be out of date, it is re-inserted into heap
1239          with correct value.  */
1240       if (badness < n->get_key ())
1241         {
1242           if (dump_file && (dump_flags & TDF_DETAILS))
1243             {
1244               fprintf (dump_file,
1245                        "  decreasing badness %s -> %s, %f to %f\n",
1246                        edge->caller->dump_name (),
1247                        edge->callee->dump_name (),
1248                        n->get_key ().to_double (),
1249                        badness.to_double ());
1250             }
1251           heap->decrease_key (n, badness);
1252         }
1253     }
1254   else
1255     {
1256        if (dump_file && (dump_flags & TDF_DETAILS))
1257          {
1258            fprintf (dump_file,
1259                     "  enqueuing call %s -> %s, badness %f\n",
1260                     edge->caller->dump_name (),
1261                     edge->callee->dump_name (),
1262                     badness.to_double ());
1263          }
1264       edge->aux = heap->insert (badness, edge);
1265     }
1266 }
1267
1268
1269 /* NODE was inlined.
1270    All caller edges needs to be resetted because
1271    size estimates change. Similarly callees needs reset
1272    because better context may be known.  */
1273
1274 static void
1275 reset_edge_caches (struct cgraph_node *node)
1276 {
1277   struct cgraph_edge *edge;
1278   struct cgraph_edge *e = node->callees;
1279   struct cgraph_node *where = node;
1280   struct ipa_ref *ref;
1281
1282   if (where->global.inlined_to)
1283     where = where->global.inlined_to;
1284
1285   for (edge = where->callers; edge; edge = edge->next_caller)
1286     if (edge->inline_failed)
1287       reset_edge_growth_cache (edge);
1288
1289   FOR_EACH_ALIAS (where, ref)
1290     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1291
1292   if (!e)
1293     return;
1294
1295   while (true)
1296     if (!e->inline_failed && e->callee->callees)
1297       e = e->callee->callees;
1298     else
1299       {
1300         if (e->inline_failed)
1301           reset_edge_growth_cache (e);
1302         if (e->next_callee)
1303           e = e->next_callee;
1304         else
1305           {
1306             do
1307               {
1308                 if (e->caller == node)
1309                   return;
1310                 e = e->caller->callers;
1311               }
1312             while (!e->next_callee);
1313             e = e->next_callee;
1314           }
1315       }
1316 }
1317
1318 /* Recompute HEAP nodes for each of caller of NODE.
1319    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1320    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1321    it is inlinable. Otherwise check all edges.  */
1322
1323 static void
1324 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1325                     bitmap updated_nodes,
1326                     struct cgraph_edge *check_inlinablity_for)
1327 {
1328   struct cgraph_edge *edge;
1329   struct ipa_ref *ref;
1330
1331   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
1332       || node->global.inlined_to)
1333     return;
1334   if (!bitmap_set_bit (updated_nodes, node->uid))
1335     return;
1336
1337   FOR_EACH_ALIAS (node, ref)
1338     {
1339       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1340       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1341     }
1342
1343   for (edge = node->callers; edge; edge = edge->next_caller)
1344     if (edge->inline_failed)
1345       {
1346         if (!check_inlinablity_for
1347             || check_inlinablity_for == edge)
1348           {
1349             if (can_inline_edge_p (edge, false)
1350                 && want_inline_small_function_p (edge, false))
1351               update_edge_key (heap, edge);
1352             else if (edge->aux)
1353               {
1354                 report_inline_failed_reason (edge);
1355                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1356                 edge->aux = NULL;
1357               }
1358           }
1359         else if (edge->aux)
1360           update_edge_key (heap, edge);
1361       }
1362 }
1363
1364 /* Recompute HEAP nodes for each uninlined call in NODE.
1365    This is used when we know that edge badnesses are going only to increase
1366    (we introduced new call site) and thus all we need is to insert newly
1367    created edges into heap.  */
1368
1369 static void
1370 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1371                     bitmap updated_nodes)
1372 {
1373   struct cgraph_edge *e = node->callees;
1374
1375   if (!e)
1376     return;
1377   while (true)
1378     if (!e->inline_failed && e->callee->callees)
1379       e = e->callee->callees;
1380     else
1381       {
1382         enum availability avail;
1383         struct cgraph_node *callee;
1384         /* We do not reset callee growth cache here.  Since we added a new call,
1385            growth chould have just increased and consequentely badness metric
1386            don't need updating.  */
1387         if (e->inline_failed
1388             && (callee = e->callee->ultimate_alias_target (&avail, e->caller))
1389             && ipa_fn_summaries->get (callee)->inlinable
1390             && avail >= AVAIL_AVAILABLE
1391             && !bitmap_bit_p (updated_nodes, callee->uid))
1392           {
1393             if (can_inline_edge_p (e, false)
1394                 && want_inline_small_function_p (e, false))
1395               update_edge_key (heap, e);
1396             else if (e->aux)
1397               {
1398                 report_inline_failed_reason (e);
1399                 heap->delete_node ((edge_heap_node_t *) e->aux);
1400                 e->aux = NULL;
1401               }
1402           }
1403         if (e->next_callee)
1404           e = e->next_callee;
1405         else
1406           {
1407             do
1408               {
1409                 if (e->caller == node)
1410                   return;
1411                 e = e->caller->callers;
1412               }
1413             while (!e->next_callee);
1414             e = e->next_callee;
1415           }
1416       }
1417 }
1418
1419 /* Enqueue all recursive calls from NODE into priority queue depending on
1420    how likely we want to recursively inline the call.  */
1421
1422 static void
1423 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1424                         edge_heap_t *heap)
1425 {
1426   struct cgraph_edge *e;
1427   enum availability avail;
1428
1429   for (e = where->callees; e; e = e->next_callee)
1430     if (e->callee == node
1431         || (e->callee->ultimate_alias_target (&avail, e->caller) == node
1432             && avail > AVAIL_INTERPOSABLE))
1433       {
1434         /* When profile feedback is available, prioritize by expected number
1435            of calls.  */
1436         heap->insert (!(max_count > 0) || !e->count.ipa ().initialized_p () ? -e->frequency ()
1437                       : -(e->count.ipa ().to_gcov_type ()
1438                         / ((max_count.to_gcov_type () + (1<<24) - 1)
1439                            / (1<<24))),
1440                       e);
1441       }
1442   for (e = where->callees; e; e = e->next_callee)
1443     if (!e->inline_failed)
1444       lookup_recursive_calls (node, e->callee, heap);
1445 }
1446
1447 /* Decide on recursive inlining: in the case function has recursive calls,
1448    inline until body size reaches given argument.  If any new indirect edges
1449    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1450    is NULL.  */
1451
1452 static bool
1453 recursive_inlining (struct cgraph_edge *edge,
1454                     vec<cgraph_edge *> *new_edges)
1455 {
1456   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1457   edge_heap_t heap (sreal::min ());
1458   struct cgraph_node *node;
1459   struct cgraph_edge *e;
1460   struct cgraph_node *master_clone = NULL, *next;
1461   int depth = 0;
1462   int n = 0;
1463
1464   node = edge->caller;
1465   if (node->global.inlined_to)
1466     node = node->global.inlined_to;
1467
1468   if (DECL_DECLARED_INLINE_P (node->decl))
1469     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1470
1471   /* Make sure that function is small enough to be considered for inlining.  */
1472   if (estimate_size_after_inlining (node, edge)  >= limit)
1473     return false;
1474   lookup_recursive_calls (node, node, &heap);
1475   if (heap.empty ())
1476     return false;
1477
1478   if (dump_file)
1479     fprintf (dump_file,
1480              "  Performing recursive inlining on %s\n",
1481              node->name ());
1482
1483   /* Do the inlining and update list of recursive call during process.  */
1484   while (!heap.empty ())
1485     {
1486       struct cgraph_edge *curr = heap.extract_min ();
1487       struct cgraph_node *cnode, *dest = curr->callee;
1488
1489       if (!can_inline_edge_p (curr, true))
1490         continue;
1491
1492       /* MASTER_CLONE is produced in the case we already started modified
1493          the function. Be sure to redirect edge to the original body before
1494          estimating growths otherwise we will be seeing growths after inlining
1495          the already modified body.  */
1496       if (master_clone)
1497         {
1498           curr->redirect_callee (master_clone);
1499           reset_edge_growth_cache (curr);
1500         }
1501
1502       if (estimate_size_after_inlining (node, curr) > limit)
1503         {
1504           curr->redirect_callee (dest);
1505           reset_edge_growth_cache (curr);
1506           break;
1507         }
1508
1509       depth = 1;
1510       for (cnode = curr->caller;
1511            cnode->global.inlined_to; cnode = cnode->callers->caller)
1512         if (node->decl
1513             == curr->callee->ultimate_alias_target ()->decl)
1514           depth++;
1515
1516       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1517         {
1518           curr->redirect_callee (dest);
1519           reset_edge_growth_cache (curr);
1520           continue;
1521         }
1522
1523       if (dump_file)
1524         {
1525           fprintf (dump_file,
1526                    "   Inlining call of depth %i", depth);
1527           if (node->count.nonzero_p ())
1528             {
1529               fprintf (dump_file, " called approx. %.2f times per call",
1530                        (double)curr->count.to_gcov_type ()
1531                        / node->count.to_gcov_type ());
1532             }
1533           fprintf (dump_file, "\n");
1534         }
1535       if (!master_clone)
1536         {
1537           /* We need original clone to copy around.  */
1538           master_clone = node->create_clone (node->decl, node->count,
1539             false, vNULL, true, NULL, NULL);
1540           for (e = master_clone->callees; e; e = e->next_callee)
1541             if (!e->inline_failed)
1542               clone_inlined_nodes (e, true, false, NULL);
1543           curr->redirect_callee (master_clone);
1544           reset_edge_growth_cache (curr);
1545         }
1546
1547       inline_call (curr, false, new_edges, &overall_size, true);
1548       lookup_recursive_calls (node, curr->callee, &heap);
1549       n++;
1550     }
1551
1552   if (!heap.empty () && dump_file)
1553     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1554
1555   if (!master_clone)
1556     return false;
1557
1558   if (dump_file)
1559     fprintf (dump_file,
1560              "\n   Inlined %i times, "
1561              "body grown from size %i to %i, time %f to %f\n", n,
1562              ipa_fn_summaries->get (master_clone)->size,
1563              ipa_fn_summaries->get (node)->size,
1564              ipa_fn_summaries->get (master_clone)->time.to_double (),
1565              ipa_fn_summaries->get (node)->time.to_double ());
1566
1567   /* Remove master clone we used for inlining.  We rely that clones inlined
1568      into master clone gets queued just before master clone so we don't
1569      need recursion.  */
1570   for (node = symtab->first_function (); node != master_clone;
1571        node = next)
1572     {
1573       next = symtab->next_function (node);
1574       if (node->global.inlined_to == master_clone)
1575         node->remove ();
1576     }
1577   master_clone->remove ();
1578   return true;
1579 }
1580
1581
1582 /* Given whole compilation unit estimate of INSNS, compute how large we can
1583    allow the unit to grow.  */
1584
1585 static int
1586 compute_max_insns (int insns)
1587 {
1588   int max_insns = insns;
1589   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1590     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1591
1592   return ((int64_t) max_insns
1593           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1594 }
1595
1596
1597 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1598
1599 static void
1600 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1601 {
1602   while (new_edges.length () > 0)
1603     {
1604       struct cgraph_edge *edge = new_edges.pop ();
1605
1606       gcc_assert (!edge->aux);
1607       if (edge->inline_failed
1608           && can_inline_edge_p (edge, true)
1609           && want_inline_small_function_p (edge, true))
1610         edge->aux = heap->insert (edge_badness (edge, false), edge);
1611     }
1612 }
1613
1614 /* Remove EDGE from the fibheap.  */
1615
1616 static void
1617 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1618 {
1619   if (e->aux)
1620     {
1621       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1622       e->aux = NULL;
1623     }
1624 }
1625
1626 /* Return true if speculation of edge E seems useful.
1627    If ANTICIPATE_INLINING is true, be conservative and hope that E
1628    may get inlined.  */
1629
1630 bool
1631 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1632 {
1633   enum availability avail;
1634   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail,
1635                                                                  e->caller);
1636   struct cgraph_edge *direct, *indirect;
1637   struct ipa_ref *ref;
1638
1639   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1640
1641   if (!e->maybe_hot_p ())
1642     return false;
1643
1644   /* See if IP optimizations found something potentially useful about the
1645      function.  For now we look only for CONST/PURE flags.  Almost everything
1646      else we propagate is useless.  */
1647   if (avail >= AVAIL_AVAILABLE)
1648     {
1649       int ecf_flags = flags_from_decl_or_type (target->decl);
1650       if (ecf_flags & ECF_CONST)
1651         {
1652           e->speculative_call_info (direct, indirect, ref);
1653           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1654             return true;
1655         }
1656       else if (ecf_flags & ECF_PURE)
1657         {
1658           e->speculative_call_info (direct, indirect, ref);
1659           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1660             return true;
1661         }
1662     }
1663   /* If we did not managed to inline the function nor redirect
1664      to an ipa-cp clone (that are seen by having local flag set),
1665      it is probably pointless to inline it unless hardware is missing
1666      indirect call predictor.  */
1667   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1668     return false;
1669   /* For overwritable targets there is not much to do.  */
1670   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1671     return false;
1672   /* OK, speculation seems interesting.  */
1673   return true;
1674 }
1675
1676 /* We know that EDGE is not going to be inlined.
1677    See if we can remove speculation.  */
1678
1679 static void
1680 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1681 {
1682   if (edge->speculative && !speculation_useful_p (edge, false))
1683     {
1684       struct cgraph_node *node = edge->caller;
1685       struct cgraph_node *where = node->global.inlined_to
1686                                   ? node->global.inlined_to : node;
1687       auto_bitmap updated_nodes;
1688
1689       if (edge->count.ipa ().initialized_p ())
1690         spec_rem += edge->count.ipa ();
1691       edge->resolve_speculation ();
1692       reset_edge_caches (where);
1693       ipa_update_overall_fn_summary (where);
1694       update_caller_keys (edge_heap, where,
1695                           updated_nodes, NULL);
1696       update_callee_keys (edge_heap, where,
1697                           updated_nodes);
1698     }
1699 }
1700
1701 /* Return true if NODE should be accounted for overall size estimate.
1702    Skip all nodes optimized for size so we can measure the growth of hot
1703    part of program no matter of the padding.  */
1704
1705 bool
1706 inline_account_function_p (struct cgraph_node *node)
1707 {
1708    return (!DECL_EXTERNAL (node->decl)
1709            && !opt_for_fn (node->decl, optimize_size)
1710            && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED);
1711 }
1712
1713 /* Count number of callers of NODE and store it into DATA (that
1714    points to int.  Worker for cgraph_for_node_and_aliases.  */
1715
1716 static bool
1717 sum_callers (struct cgraph_node *node, void *data)
1718 {
1719   struct cgraph_edge *e;
1720   int *num_calls = (int *)data;
1721
1722   for (e = node->callers; e; e = e->next_caller)
1723     (*num_calls)++;
1724   return false;
1725 }
1726
1727 /* We use greedy algorithm for inlining of small functions:
1728    All inline candidates are put into prioritized heap ordered in
1729    increasing badness.
1730
1731    The inlining of small functions is bounded by unit growth parameters.  */
1732
1733 static void
1734 inline_small_functions (void)
1735 {
1736   struct cgraph_node *node;
1737   struct cgraph_edge *edge;
1738   edge_heap_t edge_heap (sreal::min ());
1739   auto_bitmap updated_nodes;
1740   int min_size, max_size;
1741   auto_vec<cgraph_edge *> new_indirect_edges;
1742   int initial_size = 0;
1743   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1744   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1745   new_indirect_edges.create (8);
1746
1747   edge_removal_hook_holder
1748     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1749
1750   /* Compute overall unit size and other global parameters used by badness
1751      metrics.  */
1752
1753   max_count = profile_count::uninitialized ();
1754   ipa_reduced_postorder (order, true, true, NULL);
1755   free (order);
1756
1757   FOR_EACH_DEFINED_FUNCTION (node)
1758     if (!node->global.inlined_to)
1759       {
1760         if (!node->alias && node->analyzed
1761             && (node->has_gimple_body_p () || node->thunk.thunk_p)
1762             && opt_for_fn (node->decl, optimize))
1763           {
1764             struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1765             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1766
1767             /* Do not account external functions, they will be optimized out
1768                if not inlined.  Also only count the non-cold portion of program.  */
1769             if (inline_account_function_p (node))
1770               initial_size += info->size;
1771             info->growth = estimate_growth (node);
1772
1773             int num_calls = 0;
1774             node->call_for_symbol_and_aliases (sum_callers, &num_calls,
1775                                                true);
1776             if (num_calls == 1)
1777               info->single_caller = true;
1778             if (dfs && dfs->next_cycle)
1779               {
1780                 struct cgraph_node *n2;
1781                 int id = dfs->scc_no + 1;
1782                 for (n2 = node; n2;
1783                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1784                   if (opt_for_fn (n2->decl, optimize))
1785                     {
1786                       struct ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
1787                       if (info2->scc_no)
1788                         break;
1789                       info2->scc_no = id;
1790                     }
1791               }
1792           }
1793
1794         for (edge = node->callers; edge; edge = edge->next_caller)
1795           max_count = max_count.max (edge->count.ipa ());
1796       }
1797   ipa_free_postorder_info ();
1798   initialize_growth_caches ();
1799
1800   if (dump_file)
1801     fprintf (dump_file,
1802              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1803              initial_size);
1804
1805   overall_size = initial_size;
1806   max_size = compute_max_insns (overall_size);
1807   min_size = overall_size;
1808
1809   /* Populate the heap with all edges we might inline.  */
1810
1811   FOR_EACH_DEFINED_FUNCTION (node)
1812     {
1813       bool update = false;
1814       struct cgraph_edge *next = NULL;
1815       bool has_speculative = false;
1816
1817       if (!opt_for_fn (node->decl, optimize))
1818         continue;
1819
1820       if (dump_file)
1821         fprintf (dump_file, "Enqueueing calls in %s.\n", node->dump_name ());
1822
1823       for (edge = node->callees; edge; edge = next)
1824         {
1825           next = edge->next_callee;
1826           if (edge->inline_failed
1827               && !edge->aux
1828               && can_inline_edge_p (edge, true)
1829               && want_inline_small_function_p (edge, true)
1830               && edge->inline_failed)
1831             {
1832               gcc_assert (!edge->aux);
1833               update_edge_key (&edge_heap, edge);
1834             }
1835           if (edge->speculative)
1836             has_speculative = true;
1837         }
1838       if (has_speculative)
1839         for (edge = node->callees; edge; edge = next)
1840           if (edge->speculative && !speculation_useful_p (edge,
1841                                                           edge->aux != NULL))
1842             {
1843               edge->resolve_speculation ();
1844               update = true;
1845             }
1846       if (update)
1847         {
1848           struct cgraph_node *where = node->global.inlined_to
1849                                       ? node->global.inlined_to : node;
1850           ipa_update_overall_fn_summary (where);
1851           reset_edge_caches (where);
1852           update_caller_keys (&edge_heap, where,
1853                               updated_nodes, NULL);
1854           update_callee_keys (&edge_heap, where,
1855                               updated_nodes);
1856           bitmap_clear (updated_nodes);
1857         }
1858     }
1859
1860   gcc_assert (in_lto_p
1861               || !(max_count > 0)
1862               || (profile_info && flag_branch_probabilities));
1863
1864   while (!edge_heap.empty ())
1865     {
1866       int old_size = overall_size;
1867       struct cgraph_node *where, *callee;
1868       sreal badness = edge_heap.min_key ();
1869       sreal current_badness;
1870       int growth;
1871
1872       edge = edge_heap.extract_min ();
1873       gcc_assert (edge->aux);
1874       edge->aux = NULL;
1875       if (!edge->inline_failed || !edge->callee->analyzed)
1876         continue;
1877
1878 #if CHECKING_P
1879       /* Be sure that caches are maintained consistent.
1880          This check is affected by scaling roundoff errors when compiling for
1881          IPA this we skip it in that case.  */
1882       if (!edge->callee->count.ipa_p ())
1883         {
1884           sreal cached_badness = edge_badness (edge, false);
1885
1886           int old_size_est = estimate_edge_size (edge);
1887           sreal old_time_est = estimate_edge_time (edge);
1888           int old_hints_est = estimate_edge_hints (edge);
1889
1890           reset_edge_growth_cache (edge);
1891           gcc_assert (old_size_est == estimate_edge_size (edge));
1892           gcc_assert (old_time_est == estimate_edge_time (edge));
1893           /* FIXME:
1894
1895              gcc_assert (old_hints_est == estimate_edge_hints (edge));
1896
1897              fails with profile feedback because some hints depends on
1898              maybe_hot_edge_p predicate and because callee gets inlined to other
1899              calls, the edge may become cold.
1900              This ought to be fixed by computing relative probabilities
1901              for given invocation but that will be better done once whole
1902              code is converted to sreals.  Disable for now and revert to "wrong"
1903              value so enable/disable checking paths agree.  */
1904           edge_growth_cache[edge->uid].hints = old_hints_est + 1;
1905
1906           /* When updating the edge costs, we only decrease badness in the keys.
1907              Increases of badness are handled lazilly; when we see key with out
1908              of date value on it, we re-insert it now.  */
1909           current_badness = edge_badness (edge, false);
1910           gcc_assert (cached_badness == current_badness);
1911           gcc_assert (current_badness >= badness);
1912         }
1913 #else
1914       current_badness = edge_badness (edge, false);
1915 #endif
1916       if (current_badness != badness)
1917         {
1918           if (edge_heap.min () && current_badness > edge_heap.min_key ())
1919             {
1920               edge->aux = edge_heap.insert (current_badness, edge);
1921               continue;
1922             }
1923           else
1924             badness = current_badness;
1925         }
1926
1927       if (!can_inline_edge_p (edge, true))
1928         {
1929           resolve_noninline_speculation (&edge_heap, edge);
1930           continue;
1931         }
1932
1933       callee = edge->callee->ultimate_alias_target ();
1934       growth = estimate_edge_growth (edge);
1935       if (dump_file)
1936         {
1937           fprintf (dump_file,
1938                    "\nConsidering %s with %i size\n",
1939                    callee->dump_name (),
1940                    ipa_fn_summaries->get (callee)->size);
1941           fprintf (dump_file,
1942                    " to be inlined into %s in %s:%i\n"
1943                    " Estimated badness is %f, frequency %.2f.\n",
1944                    edge->caller->dump_name (),
1945                    edge->call_stmt
1946                    && (LOCATION_LOCUS (gimple_location ((const gimple *)
1947                                                         edge->call_stmt))
1948                        > BUILTINS_LOCATION)
1949                    ? gimple_filename ((const gimple *) edge->call_stmt)
1950                    : "unknown",
1951                    edge->call_stmt
1952                    ? gimple_lineno ((const gimple *) edge->call_stmt)
1953                    : -1,
1954                    badness.to_double (),
1955                    edge->frequency () / (double)CGRAPH_FREQ_BASE);
1956           if (edge->count.ipa ().initialized_p ())
1957             {
1958               fprintf (dump_file, " Called ");
1959               edge->count.ipa ().dump (dump_file);
1960               fprintf (dump_file, "times\n");
1961             }
1962           if (dump_flags & TDF_DETAILS)
1963             edge_badness (edge, true);
1964         }
1965
1966       if (overall_size + growth > max_size
1967           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1968         {
1969           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1970           report_inline_failed_reason (edge);
1971           resolve_noninline_speculation (&edge_heap, edge);
1972           continue;
1973         }
1974
1975       if (!want_inline_small_function_p (edge, true))
1976         {
1977           resolve_noninline_speculation (&edge_heap, edge);
1978           continue;
1979         }
1980
1981       /* Heuristics for inlining small functions work poorly for
1982          recursive calls where we do effects similar to loop unrolling.
1983          When inlining such edge seems profitable, leave decision on
1984          specific inliner.  */
1985       if (edge->recursive_p ())
1986         {
1987           where = edge->caller;
1988           if (where->global.inlined_to)
1989             where = where->global.inlined_to;
1990           if (!recursive_inlining (edge,
1991                                    opt_for_fn (edge->caller->decl,
1992                                                flag_indirect_inlining)
1993                                    ? &new_indirect_edges : NULL))
1994             {
1995               edge->inline_failed = CIF_RECURSIVE_INLINING;
1996               resolve_noninline_speculation (&edge_heap, edge);
1997               continue;
1998             }
1999           reset_edge_caches (where);
2000           /* Recursive inliner inlines all recursive calls of the function
2001              at once. Consequently we need to update all callee keys.  */
2002           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
2003             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2004           update_callee_keys (&edge_heap, where, updated_nodes);
2005           bitmap_clear (updated_nodes);
2006         }
2007       else
2008         {
2009           struct cgraph_node *outer_node = NULL;
2010           int depth = 0;
2011
2012           /* Consider the case where self recursive function A is inlined
2013              into B.  This is desired optimization in some cases, since it
2014              leads to effect similar of loop peeling and we might completely
2015              optimize out the recursive call.  However we must be extra
2016              selective.  */
2017
2018           where = edge->caller;
2019           while (where->global.inlined_to)
2020             {
2021               if (where->decl == callee->decl)
2022                 outer_node = where, depth++;
2023               where = where->callers->caller;
2024             }
2025           if (outer_node
2026               && !want_inline_self_recursive_call_p (edge, outer_node,
2027                                                      true, depth))
2028             {
2029               edge->inline_failed
2030                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
2031                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
2032               resolve_noninline_speculation (&edge_heap, edge);
2033               continue;
2034             }
2035           else if (depth && dump_file)
2036             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
2037
2038           gcc_checking_assert (!callee->global.inlined_to);
2039           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
2040           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
2041
2042           reset_edge_caches (edge->callee);
2043
2044           update_callee_keys (&edge_heap, where, updated_nodes);
2045         }
2046       where = edge->caller;
2047       if (where->global.inlined_to)
2048         where = where->global.inlined_to;
2049
2050       /* Our profitability metric can depend on local properties
2051          such as number of inlinable calls and size of the function body.
2052          After inlining these properties might change for the function we
2053          inlined into (since it's body size changed) and for the functions
2054          called by function we inlined (since number of it inlinable callers
2055          might change).  */
2056       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
2057       /* Offline copy count has possibly changed, recompute if profile is
2058          available.  */
2059       if (max_count.nonzero_p ())
2060         {
2061           struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
2062           if (n != edge->callee && n->analyzed)
2063             update_callee_keys (&edge_heap, n, updated_nodes);
2064         }
2065       bitmap_clear (updated_nodes);
2066
2067       if (dump_file)
2068         {
2069           fprintf (dump_file,
2070                    " Inlined %s into %s which now has time %f and size %i, "
2071                    "net change of %+i.\n",
2072                    xstrdup_for_dump (edge->callee->name ()),
2073                    xstrdup_for_dump (edge->caller->name ()),
2074                    ipa_fn_summaries->get (edge->caller)->time.to_double (),
2075                    ipa_fn_summaries->get (edge->caller)->size,
2076                    overall_size - old_size);
2077         }
2078       if (min_size > overall_size)
2079         {
2080           min_size = overall_size;
2081           max_size = compute_max_insns (min_size);
2082
2083           if (dump_file)
2084             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
2085         }
2086     }
2087
2088   free_growth_caches ();
2089   if (dump_file)
2090     fprintf (dump_file,
2091              "Unit growth for small function inlining: %i->%i (%i%%)\n",
2092              initial_size, overall_size,
2093              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
2094   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
2095 }
2096
2097 /* Flatten NODE.  Performed both during early inlining and
2098    at IPA inlining time.  */
2099
2100 static void
2101 flatten_function (struct cgraph_node *node, bool early)
2102 {
2103   struct cgraph_edge *e;
2104
2105   /* We shouldn't be called recursively when we are being processed.  */
2106   gcc_assert (node->aux == NULL);
2107
2108   node->aux = (void *) node;
2109
2110   for (e = node->callees; e; e = e->next_callee)
2111     {
2112       struct cgraph_node *orig_callee;
2113       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2114
2115       /* We've hit cycle?  It is time to give up.  */
2116       if (callee->aux)
2117         {
2118           if (dump_file)
2119             fprintf (dump_file,
2120                      "Not inlining %s into %s to avoid cycle.\n",
2121                      xstrdup_for_dump (callee->name ()),
2122                      xstrdup_for_dump (e->caller->name ()));
2123           e->inline_failed = CIF_RECURSIVE_INLINING;
2124           continue;
2125         }
2126
2127       /* When the edge is already inlined, we just need to recurse into
2128          it in order to fully flatten the leaves.  */
2129       if (!e->inline_failed)
2130         {
2131           flatten_function (callee, early);
2132           continue;
2133         }
2134
2135       /* Flatten attribute needs to be processed during late inlining. For
2136          extra code quality we however do flattening during early optimization,
2137          too.  */
2138       if (!early
2139           ? !can_inline_edge_p (e, true)
2140           : !can_early_inline_edge_p (e))
2141         continue;
2142
2143       if (e->recursive_p ())
2144         {
2145           if (dump_file)
2146             fprintf (dump_file, "Not inlining: recursive call.\n");
2147           continue;
2148         }
2149
2150       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
2151           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
2152         {
2153           if (dump_file)
2154             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
2155           continue;
2156         }
2157
2158       /* Inline the edge and flatten the inline clone.  Avoid
2159          recursing through the original node if the node was cloned.  */
2160       if (dump_file)
2161         fprintf (dump_file, " Inlining %s into %s.\n",
2162                  xstrdup_for_dump (callee->name ()),
2163                  xstrdup_for_dump (e->caller->name ()));
2164       orig_callee = callee;
2165       inline_call (e, true, NULL, NULL, false);
2166       if (e->callee != orig_callee)
2167         orig_callee->aux = (void *) node;
2168       flatten_function (e->callee, early);
2169       if (e->callee != orig_callee)
2170         orig_callee->aux = NULL;
2171     }
2172
2173   node->aux = NULL;
2174   if (!node->global.inlined_to)
2175     ipa_update_overall_fn_summary (node);
2176 }
2177
2178 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
2179    DATA points to number of calls originally found so we avoid infinite
2180    recursion.  */
2181
2182 static bool
2183 inline_to_all_callers_1 (struct cgraph_node *node, void *data,
2184                          hash_set<cgraph_node *> *callers)
2185 {
2186   int *num_calls = (int *)data;
2187   bool callee_removed = false;
2188
2189   while (node->callers && !node->global.inlined_to)
2190     {
2191       struct cgraph_node *caller = node->callers->caller;
2192
2193       if (!can_inline_edge_p (node->callers, true)
2194           || node->callers->recursive_p ())
2195         {
2196           if (dump_file)
2197             fprintf (dump_file, "Uninlinable call found; giving up.\n");
2198           *num_calls = 0;
2199           return false;
2200         }
2201
2202       if (dump_file)
2203         {
2204           fprintf (dump_file,
2205                    "\nInlining %s size %i.\n",
2206                    node->name (),
2207                    ipa_fn_summaries->get (node)->size);
2208           fprintf (dump_file,
2209                    " Called once from %s %i insns.\n",
2210                    node->callers->caller->name (),
2211                    ipa_fn_summaries->get (node->callers->caller)->size);
2212         }
2213
2214       /* Remember which callers we inlined to, delaying updating the
2215          overall summary.  */
2216       callers->add (node->callers->caller);
2217       inline_call (node->callers, true, NULL, NULL, false, &callee_removed);
2218       if (dump_file)
2219         fprintf (dump_file,
2220                  " Inlined into %s which now has %i size\n",
2221                  caller->name (),
2222                  ipa_fn_summaries->get (caller)->size);
2223       if (!(*num_calls)--)
2224         {
2225           if (dump_file)
2226             fprintf (dump_file, "New calls found; giving up.\n");
2227           return callee_removed;
2228         }
2229       if (callee_removed)
2230         return true;
2231     }
2232   return false;
2233 }
2234
2235 /* Wrapper around inline_to_all_callers_1 doing delayed overall summary
2236    update.  */
2237
2238 static bool
2239 inline_to_all_callers (struct cgraph_node *node, void *data)
2240 {
2241   hash_set<cgraph_node *> callers;
2242   bool res = inline_to_all_callers_1 (node, data, &callers);
2243   /* Perform the delayed update of the overall summary of all callers
2244      processed.  This avoids quadratic behavior in the cases where
2245      we have a lot of calls to the same function.  */
2246   for (hash_set<cgraph_node *>::iterator i = callers.begin ();
2247        i != callers.end (); ++i)
2248     ipa_update_overall_fn_summary (*i);
2249   return res;
2250 }
2251
2252 /* Output overall time estimate.  */
2253 static void
2254 dump_overall_stats (void)
2255 {
2256   sreal sum_weighted = 0, sum = 0;
2257   struct cgraph_node *node;
2258
2259   FOR_EACH_DEFINED_FUNCTION (node)
2260     if (!node->global.inlined_to
2261         && !node->alias)
2262       {
2263         sreal time = ipa_fn_summaries->get (node)->time;
2264         sum += time;
2265         if (node->count.ipa ().initialized_p ())
2266           sum_weighted += time * node->count.ipa ().to_gcov_type ();
2267       }
2268   fprintf (dump_file, "Overall time estimate: "
2269            "%f weighted by profile: "
2270            "%f\n", sum.to_double (), sum_weighted.to_double ());
2271 }
2272
2273 /* Output some useful stats about inlining.  */
2274
2275 static void
2276 dump_inline_stats (void)
2277 {
2278   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2279   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2280   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2281   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2282   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2283   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2284   int64_t reason[CIF_N_REASONS][3];
2285   int i;
2286   struct cgraph_node *node;
2287
2288   memset (reason, 0, sizeof (reason));
2289   FOR_EACH_DEFINED_FUNCTION (node)
2290   {
2291     struct cgraph_edge *e;
2292     for (e = node->callees; e; e = e->next_callee)
2293       {
2294         if (e->inline_failed)
2295           {
2296             if (e->count.ipa ().initialized_p ())
2297               reason[(int) e->inline_failed][0] += e->count.ipa ().to_gcov_type ();
2298             reason[(int) e->inline_failed][1] += e->frequency ();
2299             reason[(int) e->inline_failed][2] ++;
2300             if (DECL_VIRTUAL_P (e->callee->decl)
2301                 && e->count.ipa ().initialized_p ())
2302               {
2303                 if (e->indirect_inlining_edge)
2304                   noninlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2305                 else
2306                   noninlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2307               }
2308             else if (e->count.ipa ().initialized_p ())
2309               {
2310                 if (e->indirect_inlining_edge)
2311                   noninlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2312                 else
2313                   noninlined_cnt += e->count.ipa ().to_gcov_type ();
2314               }
2315           }
2316         else if (e->count.ipa ().initialized_p ())
2317           {
2318             if (e->speculative)
2319               {
2320                 if (DECL_VIRTUAL_P (e->callee->decl))
2321                   inlined_speculative_ply += e->count.ipa ().to_gcov_type ();
2322                 else
2323                   inlined_speculative += e->count.ipa ().to_gcov_type ();
2324               }
2325             else if (DECL_VIRTUAL_P (e->callee->decl))
2326               {
2327                 if (e->indirect_inlining_edge)
2328                   inlined_virt_indir_cnt += e->count.ipa ().to_gcov_type ();
2329                 else
2330                   inlined_virt_cnt += e->count.ipa ().to_gcov_type ();
2331               }
2332             else
2333               {
2334                 if (e->indirect_inlining_edge)
2335                   inlined_indir_cnt += e->count.ipa ().to_gcov_type ();
2336                 else
2337                   inlined_cnt += e->count.ipa ().to_gcov_type ();
2338               }
2339           }
2340       }
2341     for (e = node->indirect_calls; e; e = e->next_callee)
2342       if (e->indirect_info->polymorphic
2343           & e->count.ipa ().initialized_p ())
2344         indirect_poly_cnt += e->count.ipa ().to_gcov_type ();
2345       else if (e->count.ipa ().initialized_p ())
2346         indirect_cnt += e->count.ipa ().to_gcov_type ();
2347   }
2348   if (max_count.initialized_p ())
2349     {
2350       fprintf (dump_file,
2351                "Inlined %" PRId64 " + speculative "
2352                "%" PRId64 " + speculative polymorphic "
2353                "%" PRId64 " + previously indirect "
2354                "%" PRId64 " + virtual "
2355                "%" PRId64 " + virtual and previously indirect "
2356                "%" PRId64 "\n" "Not inlined "
2357                "%" PRId64 " + previously indirect "
2358                "%" PRId64 " + virtual "
2359                "%" PRId64 " + virtual and previously indirect "
2360                "%" PRId64 " + stil indirect "
2361                "%" PRId64 " + still indirect polymorphic "
2362                "%" PRId64 "\n", inlined_cnt,
2363                inlined_speculative, inlined_speculative_ply,
2364                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2365                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2366                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2367       fprintf (dump_file, "Removed speculations ");
2368       spec_rem.dump (dump_file);
2369       fprintf (dump_file, "\n");
2370     }
2371   dump_overall_stats ();
2372   fprintf (dump_file, "\nWhy inlining failed?\n");
2373   for (i = 0; i < CIF_N_REASONS; i++)
2374     if (reason[i][2])
2375       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %" PRId64" count\n",
2376                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2377                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2378 }
2379
2380 /* Decide on the inlining.  We do so in the topological order to avoid
2381    expenses on updating data structures.  */
2382
2383 static unsigned int
2384 ipa_inline (void)
2385 {
2386   struct cgraph_node *node;
2387   int nnodes;
2388   struct cgraph_node **order;
2389   int i;
2390   int cold;
2391   bool remove_functions = false;
2392
2393   cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2394   percent_rec = (sreal) 1 / (sreal) 100;
2395
2396   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2397
2398   if (dump_file)
2399     ipa_dump_fn_summaries (dump_file);
2400
2401   nnodes = ipa_reverse_postorder (order);
2402   spec_rem = profile_count::zero ();
2403
2404   FOR_EACH_FUNCTION (node)
2405     {
2406       node->aux = 0;
2407
2408       /* Recompute the default reasons for inlining because they may have
2409          changed during merging.  */
2410       if (in_lto_p)
2411         {
2412           for (cgraph_edge *e = node->callees; e; e = e->next_callee)
2413             {
2414               gcc_assert (e->inline_failed);
2415               initialize_inline_failed (e);
2416             }
2417           for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
2418             initialize_inline_failed (e);
2419         }
2420     }
2421
2422   if (dump_file)
2423     fprintf (dump_file, "\nFlattening functions:\n");
2424
2425   /* In the first pass handle functions to be flattened.  Do this with
2426      a priority so none of our later choices will make this impossible.  */
2427   for (i = nnodes - 1; i >= 0; i--)
2428     {
2429       node = order[i];
2430
2431       /* Handle nodes to be flattened.
2432          Ideally when processing callees we stop inlining at the
2433          entry of cycles, possibly cloning that entry point and
2434          try to flatten itself turning it into a self-recursive
2435          function.  */
2436       if (lookup_attribute ("flatten",
2437                             DECL_ATTRIBUTES (node->decl)) != NULL)
2438         {
2439           if (dump_file)
2440             fprintf (dump_file,
2441                      "Flattening %s\n", node->name ());
2442           flatten_function (node, false);
2443         }
2444     }
2445   if (dump_file)
2446     dump_overall_stats ();
2447
2448   inline_small_functions ();
2449
2450   gcc_assert (symtab->state == IPA_SSA);
2451   symtab->state = IPA_SSA_AFTER_INLINING;
2452   /* Do first after-inlining removal.  We want to remove all "stale" extern
2453      inline functions and virtual functions so we really know what is called
2454      once.  */
2455   symtab->remove_unreachable_nodes (dump_file);
2456   free (order);
2457
2458   /* Inline functions with a property that after inlining into all callers the
2459      code size will shrink because the out-of-line copy is eliminated.
2460      We do this regardless on the callee size as long as function growth limits
2461      are met.  */
2462   if (dump_file)
2463     fprintf (dump_file,
2464              "\nDeciding on functions to be inlined into all callers and "
2465              "removing useless speculations:\n");
2466
2467   /* Inlining one function called once has good chance of preventing
2468      inlining other function into the same callee.  Ideally we should
2469      work in priority order, but probably inlining hot functions first
2470      is good cut without the extra pain of maintaining the queue.
2471
2472      ??? this is not really fitting the bill perfectly: inlining function
2473      into callee often leads to better optimization of callee due to
2474      increased context for optimization.
2475      For example if main() function calls a function that outputs help
2476      and then function that does the main optmization, we should inline
2477      the second with priority even if both calls are cold by themselves.
2478
2479      We probably want to implement new predicate replacing our use of
2480      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2481      to be hot.  */
2482   for (cold = 0; cold <= 1; cold ++)
2483     {
2484       FOR_EACH_DEFINED_FUNCTION (node)
2485         {
2486           struct cgraph_edge *edge, *next;
2487           bool update=false;
2488
2489           if (!opt_for_fn (node->decl, optimize)
2490               || !opt_for_fn (node->decl, flag_inline_functions_called_once))
2491             continue;
2492
2493           for (edge = node->callees; edge; edge = next)
2494             {
2495               next = edge->next_callee;
2496               if (edge->speculative && !speculation_useful_p (edge, false))
2497                 {
2498                   if (edge->count.ipa ().initialized_p ())
2499                     spec_rem += edge->count.ipa ();
2500                   edge->resolve_speculation ();
2501                   update = true;
2502                   remove_functions = true;
2503                 }
2504             }
2505           if (update)
2506             {
2507               struct cgraph_node *where = node->global.inlined_to
2508                                           ? node->global.inlined_to : node;
2509               reset_edge_caches (where);
2510               ipa_update_overall_fn_summary (where);
2511             }
2512           if (want_inline_function_to_all_callers_p (node, cold))
2513             {
2514               int num_calls = 0;
2515               node->call_for_symbol_and_aliases (sum_callers, &num_calls,
2516                                                  true);
2517               while (node->call_for_symbol_and_aliases
2518                        (inline_to_all_callers, &num_calls, true))
2519                 ;
2520               remove_functions = true;
2521             }
2522         }
2523     }
2524
2525   /* Free ipa-prop structures if they are no longer needed.  */
2526   ipa_free_all_structures_after_iinln ();
2527
2528   if (dump_file)
2529     {
2530       fprintf (dump_file,
2531                "\nInlined %i calls, eliminated %i functions\n\n",
2532                ncalls_inlined, nfunctions_inlined);
2533       dump_inline_stats ();
2534     }
2535
2536   if (dump_file)
2537     ipa_dump_fn_summaries (dump_file);
2538   return remove_functions ? TODO_remove_functions : 0;
2539 }
2540
2541 /* Inline always-inline function calls in NODE.  */
2542
2543 static bool
2544 inline_always_inline_functions (struct cgraph_node *node)
2545 {
2546   struct cgraph_edge *e;
2547   bool inlined = false;
2548
2549   for (e = node->callees; e; e = e->next_callee)
2550     {
2551       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2552       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2553         continue;
2554
2555       if (e->recursive_p ())
2556         {
2557           if (dump_file)
2558             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2559                      e->callee->name ());
2560           e->inline_failed = CIF_RECURSIVE_INLINING;
2561           continue;
2562         }
2563
2564       if (!can_early_inline_edge_p (e))
2565         {
2566           /* Set inlined to true if the callee is marked "always_inline" but
2567              is not inlinable.  This will allow flagging an error later in
2568              expand_call_inline in tree-inline.c.  */
2569           if (lookup_attribute ("always_inline",
2570                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2571             inlined = true;
2572           continue;
2573         }
2574
2575       if (dump_file)
2576         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2577                  xstrdup_for_dump (e->callee->name ()),
2578                  xstrdup_for_dump (e->caller->name ()));
2579       inline_call (e, true, NULL, NULL, false);
2580       inlined = true;
2581     }
2582   if (inlined)
2583     ipa_update_overall_fn_summary (node);
2584
2585   return inlined;
2586 }
2587
2588 /* Decide on the inlining.  We do so in the topological order to avoid
2589    expenses on updating data structures.  */
2590
2591 static bool
2592 early_inline_small_functions (struct cgraph_node *node)
2593 {
2594   struct cgraph_edge *e;
2595   bool inlined = false;
2596
2597   for (e = node->callees; e; e = e->next_callee)
2598     {
2599       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2600       if (!ipa_fn_summaries->get (callee)->inlinable
2601           || !e->inline_failed)
2602         continue;
2603
2604       /* Do not consider functions not declared inline.  */
2605       if (!DECL_DECLARED_INLINE_P (callee->decl)
2606           && !opt_for_fn (node->decl, flag_inline_small_functions)
2607           && !opt_for_fn (node->decl, flag_inline_functions))
2608         continue;
2609
2610       if (dump_file)
2611         fprintf (dump_file, "Considering inline candidate %s.\n",
2612                  callee->name ());
2613
2614       if (!can_early_inline_edge_p (e))
2615         continue;
2616
2617       if (e->recursive_p ())
2618         {
2619           if (dump_file)
2620             fprintf (dump_file, "  Not inlining: recursive call.\n");
2621           continue;
2622         }
2623
2624       if (!want_early_inline_function_p (e))
2625         continue;
2626
2627       if (dump_file)
2628         fprintf (dump_file, " Inlining %s into %s.\n",
2629                  xstrdup_for_dump (callee->name ()),
2630                  xstrdup_for_dump (e->caller->name ()));
2631       inline_call (e, true, NULL, NULL, false);
2632       inlined = true;
2633     }
2634
2635   if (inlined)
2636     ipa_update_overall_fn_summary (node);
2637
2638   return inlined;
2639 }
2640
2641 unsigned int
2642 early_inliner (function *fun)
2643 {
2644   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2645   struct cgraph_edge *edge;
2646   unsigned int todo = 0;
2647   int iterations = 0;
2648   bool inlined = false;
2649
2650   if (seen_error ())
2651     return 0;
2652
2653   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2654      happens when some pass decides to construct new function and
2655      cgraph_add_new_function calls lowering passes and early optimization on
2656      it.  This may confuse ourself when early inliner decide to inline call to
2657      function clone, because function clones don't have parameter list in
2658      ipa-prop matching their signature.  */
2659   if (ipa_node_params_sum)
2660     return 0;
2661
2662   if (flag_checking)
2663     node->verify ();
2664   node->remove_all_references ();
2665
2666   /* Rebuild this reference because it dosn't depend on
2667      function's body and it's required to pass cgraph_node
2668      verification.  */
2669   if (node->instrumented_version
2670       && !node->instrumentation_clone)
2671     node->create_reference (node->instrumented_version, IPA_REF_CHKP, NULL);
2672
2673   /* Even when not optimizing or not inlining inline always-inline
2674      functions.  */
2675   inlined = inline_always_inline_functions (node);
2676
2677   if (!optimize
2678       || flag_no_inline
2679       || !flag_early_inlining
2680       /* Never inline regular functions into always-inline functions
2681          during incremental inlining.  This sucks as functions calling
2682          always inline functions will get less optimized, but at the
2683          same time inlining of functions calling always inline
2684          function into an always inline function might introduce
2685          cycles of edges to be always inlined in the callgraph.
2686
2687          We might want to be smarter and just avoid this type of inlining.  */
2688       || (DECL_DISREGARD_INLINE_LIMITS (node->decl)
2689           && lookup_attribute ("always_inline",
2690                                DECL_ATTRIBUTES (node->decl))))
2691     ;
2692   else if (lookup_attribute ("flatten",
2693                              DECL_ATTRIBUTES (node->decl)) != NULL)
2694     {
2695       /* When the function is marked to be flattened, recursively inline
2696          all calls in it.  */
2697       if (dump_file)
2698         fprintf (dump_file,
2699                  "Flattening %s\n", node->name ());
2700       flatten_function (node, true);
2701       inlined = true;
2702     }
2703   else
2704     {
2705       /* If some always_inline functions was inlined, apply the changes.
2706          This way we will not account always inline into growth limits and
2707          moreover we will inline calls from always inlines that we skipped
2708          previously because of conditional above.  */
2709       if (inlined)
2710         {
2711           timevar_push (TV_INTEGRATION);
2712           todo |= optimize_inline_calls (current_function_decl);
2713           /* optimize_inline_calls call above might have introduced new
2714              statements that don't have inline parameters computed.  */
2715           for (edge = node->callees; edge; edge = edge->next_callee)
2716             {
2717               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2718               es->call_stmt_size
2719                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2720               es->call_stmt_time
2721                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2722             }
2723           ipa_update_overall_fn_summary (node);
2724           inlined = false;
2725           timevar_pop (TV_INTEGRATION);
2726         }
2727       /* We iterate incremental inlining to get trivial cases of indirect
2728          inlining.  */
2729       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2730              && early_inline_small_functions (node))
2731         {
2732           timevar_push (TV_INTEGRATION);
2733           todo |= optimize_inline_calls (current_function_decl);
2734
2735           /* Technically we ought to recompute inline parameters so the new
2736              iteration of early inliner works as expected.  We however have
2737              values approximately right and thus we only need to update edge
2738              info that might be cleared out for newly discovered edges.  */
2739           for (edge = node->callees; edge; edge = edge->next_callee)
2740             {
2741               /* We have no summary for new bound store calls yet.  */
2742               struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2743               es->call_stmt_size
2744                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2745               es->call_stmt_time
2746                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2747
2748               if (edge->callee->decl
2749                   && !gimple_check_call_matching_types (
2750                       edge->call_stmt, edge->callee->decl, false))
2751                 {
2752                   edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
2753                   edge->call_stmt_cannot_inline_p = true;
2754                 }
2755             }
2756           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2757             ipa_update_overall_fn_summary (node);
2758           timevar_pop (TV_INTEGRATION);
2759           iterations++;
2760           inlined = false;
2761         }
2762       if (dump_file)
2763         fprintf (dump_file, "Iterations: %i\n", iterations);
2764     }
2765
2766   if (inlined)
2767     {
2768       timevar_push (TV_INTEGRATION);
2769       todo |= optimize_inline_calls (current_function_decl);
2770       timevar_pop (TV_INTEGRATION);
2771     }
2772
2773   fun->always_inline_functions_inlined = true;
2774
2775   return todo;
2776 }
2777
2778 /* Do inlining of small functions.  Doing so early helps profiling and other
2779    passes to be somewhat more effective and avoids some code duplication in
2780    later real inlining pass for testcases with very many function calls.  */
2781
2782 namespace {
2783
2784 const pass_data pass_data_early_inline =
2785 {
2786   GIMPLE_PASS, /* type */
2787   "einline", /* name */
2788   OPTGROUP_INLINE, /* optinfo_flags */
2789   TV_EARLY_INLINING, /* tv_id */
2790   PROP_ssa, /* properties_required */
2791   0, /* properties_provided */
2792   0, /* properties_destroyed */
2793   0, /* todo_flags_start */
2794   0, /* todo_flags_finish */
2795 };
2796
2797 class pass_early_inline : public gimple_opt_pass
2798 {
2799 public:
2800   pass_early_inline (gcc::context *ctxt)
2801     : gimple_opt_pass (pass_data_early_inline, ctxt)
2802   {}
2803
2804   /* opt_pass methods: */
2805   virtual unsigned int execute (function *);
2806
2807 }; // class pass_early_inline
2808
2809 unsigned int
2810 pass_early_inline::execute (function *fun)
2811 {
2812   return early_inliner (fun);
2813 }
2814
2815 } // anon namespace
2816
2817 gimple_opt_pass *
2818 make_pass_early_inline (gcc::context *ctxt)
2819 {
2820   return new pass_early_inline (ctxt);
2821 }
2822
2823 namespace {
2824
2825 const pass_data pass_data_ipa_inline =
2826 {
2827   IPA_PASS, /* type */
2828   "inline", /* name */
2829   OPTGROUP_INLINE, /* optinfo_flags */
2830   TV_IPA_INLINING, /* tv_id */
2831   0, /* properties_required */
2832   0, /* properties_provided */
2833   0, /* properties_destroyed */
2834   0, /* todo_flags_start */
2835   ( TODO_dump_symtab ), /* todo_flags_finish */
2836 };
2837
2838 class pass_ipa_inline : public ipa_opt_pass_d
2839 {
2840 public:
2841   pass_ipa_inline (gcc::context *ctxt)
2842     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2843                       NULL, /* generate_summary */
2844                       NULL, /* write_summary */
2845                       NULL, /* read_summary */
2846                       NULL, /* write_optimization_summary */
2847                       NULL, /* read_optimization_summary */
2848                       NULL, /* stmt_fixup */
2849                       0, /* function_transform_todo_flags_start */
2850                       inline_transform, /* function_transform */
2851                       NULL) /* variable_transform */
2852   {}
2853
2854   /* opt_pass methods: */
2855   virtual unsigned int execute (function *) { return ipa_inline (); }
2856
2857 }; // class pass_ipa_inline
2858
2859 } // anon namespace
2860
2861 ipa_opt_pass_d *
2862 make_pass_ipa_inline (gcc::context *ctxt)
2863 {
2864   return new pass_ipa_inline (ctxt);
2865 }