gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "intl.h"
 106 #include "tree-pass.h"
 107 #include "coverage.h"
 108 #include "rtl.h"
 109 #include "bitmap.h"
 110 #include "profile.h"
 111 #include "predict.h"
 112 #include "vec.h"
 113 #include "hashtab.h"
 114 #include "hash-set.h"
 115 #include "machmode.h"
 116 #include "hard-reg-set.h"
 117 #include "input.h"
 118 #include "function.h"
 119 #include "basic-block.h"
 120 #include "tree-ssa-alias.h"
 121 #include "internal-fn.h"
 122 #include "gimple-expr.h"
 123 #include "is-a.h"
 124 #include "gimple.h"
 125 #include "gimple-ssa.h"
 126 #include "hash-map.h"
 127 #include "plugin-api.h"
 128 #include "ipa-ref.h"
 129 #include "cgraph.h"
 130 #include "alloc-pool.h"
 131 #include "symbol-summary.h"
 132 #include "ipa-prop.h"
 133 #include "except.h"
 134 #include "target.h"
 135 #include "ipa-inline.h"
 136 #include "ipa-utils.h"
 137 #include "sreal.h"
 138 #include "auto-profile.h"
 139 #include "cilk.h"
 140 #include "builtins.h"
 141 #include "fibonacci_heap.h"
 142
 143 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 144 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 145
 146 /* Statistics we collect about inlining algorithm.  */
 147 static int overall_size;
 148 static gcov_type max_count;
 149 static gcov_type spec_rem;
 150
 151 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
 152 static sreal cgraph_freq_base_rec, percent_rec;
 153
 154 /* Return false when inlining edge E would lead to violating
 155    limits on function unit growth or stack usage growth.
 156
 157    The relative function body growth limit is present generally
 158    to avoid problems with non-linear behavior of the compiler.
 159    To allow inlining huge functions into tiny wrapper, the limit
 160    is always based on the bigger of the two functions considered.
 161
 162    For stack growth limits we always base the growth in stack usage
 163    of the callers.  We want to prevent applications from segfaulting
 164    on stack overflow when functions with huge stack frames gets
 165    inlined. */
 166
 167 static bool
 168 caller_growth_limits (struct cgraph_edge *e)
 169 {
 170   struct cgraph_node *to = e->caller;
 171   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 172   int newsize;
 173   int limit = 0;
 174   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 175   inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
 176
 177   /* Look for function e->caller is inlined to.  While doing
 178      so work out the largest function body on the way.  As
 179      described above, we want to base our function growth
 180      limits based on that.  Not on the self size of the
 181      outer function, not on the self size of inline code
 182      we immediately inline to.  This is the most relaxed
 183      interpretation of the rule "do not grow large functions
 184      too much in order to prevent compiler from exploding".  */
 185   while (true)
 186     {
 187       info = inline_summaries->get (to);
 188       if (limit < info->self_size)
 189         limit = info->self_size;
 190       if (stack_size_limit < info->estimated_self_stack_size)
 191         stack_size_limit = info->estimated_self_stack_size;
 192       if (to->global.inlined_to)
 193         to = to->callers->caller;
 194       else
 195         break;
 196     }
 197
 198   what_info = inline_summaries->get (what);
 199
 200   if (limit < what_info->self_size)
 201     limit = what_info->self_size;
 202
 203   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 204
 205   /* Check the size after inlining against the function limits.  But allow
 206      the function to shrink if it went over the limits by forced inlining.  */
 207   newsize = estimate_size_after_inlining (to, e);
 208   if (newsize >= info->size
 209       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 210       && newsize > limit)
 211     {
 212       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 213       return false;
 214     }
 215
 216   if (!what_info->estimated_stack_size)
 217     return true;
 218
 219   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 220      due to large i/o datastructures used by the Fortran front-end.
 221      We ought to ignore this limit when we know that the edge is executed
 222      on every invocation of the caller (i.e. its call statement dominates
 223      exit block).  We do not track this information, yet.  */
 224   stack_size_limit += ((gcov_type)stack_size_limit
 225                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 226
 227   inlined_stack = (outer_info->stack_frame_offset
 228                    + outer_info->estimated_self_stack_size
 229                    + what_info->estimated_stack_size);
 230   /* Check new stack consumption with stack consumption at the place
 231      stack is used.  */
 232   if (inlined_stack > stack_size_limit
 233       /* If function already has large stack usage from sibling
 234          inline call, we can inline, too.
 235          This bit overoptimistically assume that we are good at stack
 236          packing.  */
 237       && inlined_stack > info->estimated_stack_size
 238       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 239     {
 240       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 241       return false;
 242     }
 243   return true;
 244 }
 245
 246 /* Dump info about why inlining has failed.  */
 247
 248 static void
 249 report_inline_failed_reason (struct cgraph_edge *e)
 250 {
 251   if (dump_file)
 252     {
 253       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 254                xstrdup_for_dump (e->caller->name ()), e->caller->order,
 255                xstrdup_for_dump (e->callee->name ()), e->callee->order,
 256                cgraph_inline_failed_string (e->inline_failed));
 257     }
 258 }
 259
 260  /* Decide whether sanitizer-related attributes allow inlining. */
 261
 262 static bool
 263 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 264 {
 265   /* Don't care if sanitizer is disabled */
 266   if (!(flag_sanitize & SANITIZE_ADDRESS))
 267     return true;
 268
 269   if (!caller || !callee)
 270     return true;
 271
 272   return !!lookup_attribute ("no_sanitize_address",
 273       DECL_ATTRIBUTES (caller)) ==
 274       !!lookup_attribute ("no_sanitize_address",
 275       DECL_ATTRIBUTES (callee));
 276 }
 277
 278  /* Decide if we can inline the edge and possibly update
 279    inline_failed reason.
 280    We check whether inlining is possible at all and whether
 281    caller growth limits allow doing so.
 282
 283    if REPORT is true, output reason to the dump file.
 284
 285    if DISREGARD_LIMITS is true, ignore size limits.*/
 286
 287 static bool
 288 can_inline_edge_p (struct cgraph_edge *e, bool report,
 289                    bool disregard_limits = false)
 290 {
 291   bool inlinable = true;
 292   enum availability avail;
 293   cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
 294   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 295   tree callee_tree
 296     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 297   struct function *caller_fun = e->caller->get_fun ();
 298   struct function *callee_fun = callee ? callee->get_fun () : NULL;
 299
 300   gcc_assert (e->inline_failed);
 301
 302   if (!callee || !callee->definition)
 303     {
 304       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 305       inlinable = false;
 306     }
 307   else if (callee->calls_comdat_local)
 308     {
 309       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 310       inlinable = false;
 311     }
 312   else if (!inline_summaries->get (callee)->inlinable
 313            || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
 314     {
 315       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 316       inlinable = false;
 317     }
 318   else if (avail <= AVAIL_INTERPOSABLE)
 319     {
 320       e->inline_failed = CIF_OVERWRITABLE;
 321       inlinable = false;
 322     }
 323   else if (e->call_stmt_cannot_inline_p)
 324     {
 325       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 326         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 327       inlinable = false;
 328     }
 329   /* Don't inline if the functions have different EH personalities.  */
 330   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 331            && DECL_FUNCTION_PERSONALITY (callee->decl)
 332            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 333                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 334     {
 335       e->inline_failed = CIF_EH_PERSONALITY;
 336       inlinable = false;
 337     }
 338   /* TM pure functions should not be inlined into non-TM_pure
 339      functions.  */
 340   else if (is_tm_pure (callee->decl)
 341            && !is_tm_pure (e->caller->decl))
 342     {
 343       e->inline_failed = CIF_UNSPECIFIED;
 344       inlinable = false;
 345     }
 346   /* Don't inline if the callee can throw non-call exceptions but the
 347      caller cannot.
 348      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 349      Move the flag into cgraph node or mirror it in the inline summary.  */
 350   else if (callee_fun && callee_fun->can_throw_non_call_exceptions
 351            && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
 352     {
 353       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 354       inlinable = false;
 355     }
 356   /* Check compatibility of target optimization options.  */
 357   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 358                                                 callee->decl))
 359     {
 360       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 361       inlinable = false;
 362     }
 363   /* Don't inline a function with mismatched sanitization attributes. */
 364   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 365     {
 366       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 367       inlinable = false;
 368     }
 369   /* Check if caller growth allows the inlining.  */
 370   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 371            && !disregard_limits
 372            && !lookup_attribute ("flatten",
 373                                  DECL_ATTRIBUTES
 374                                    (e->caller->global.inlined_to
 375                                     ? e->caller->global.inlined_to->decl
 376                                     : e->caller->decl))
 377            && !caller_growth_limits (e))
 378     inlinable = false;
 379   /* Don't inline a function with a higher optimization level than the
 380      caller.  FIXME: this is really just tip of iceberg of handling
 381      optimization attribute.  */
 382   else if (caller_tree != callee_tree)
 383     {
 384       if (((opt_for_fn (e->caller->decl, optimize)
 385             > opt_for_fn (e->callee->decl, optimize))
 386             || (opt_for_fn (e->caller->decl, optimize_size)
 387                 != opt_for_fn (e->callee->decl, optimize_size)))
 388           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 389           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 390         {
 391           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 392           inlinable = false;
 393         }
 394     }
 395
 396   if (!inlinable && report)
 397     report_inline_failed_reason (e);
 398   return inlinable;
 399 }
 400
 401
 402 /* Return true if the edge E is inlinable during early inlining.  */
 403
 404 static bool
 405 can_early_inline_edge_p (struct cgraph_edge *e)
 406 {
 407   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 408   /* Early inliner might get called at WPA stage when IPA pass adds new
 409      function.  In this case we can not really do any of early inlining
 410      because function bodies are missing.  */
 411   if (!gimple_has_body_p (callee->decl))
 412     {
 413       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 414       return false;
 415     }
 416   /* In early inliner some of callees may not be in SSA form yet
 417      (i.e. the callgraph is cyclic and we did not process
 418      the callee by early inliner, yet).  We don't have CIF code for this
 419      case; later we will re-do the decision in the real inliner.  */
 420   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 421       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 422     {
 423       if (dump_file)
 424         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 425       return false;
 426     }
 427   if (!can_inline_edge_p (e, true))
 428     return false;
 429   return true;
 430 }
 431
 432
 433 /* Return number of calls in N.  Ignore cheap builtins.  */
 434
 435 static int
 436 num_calls (struct cgraph_node *n)
 437 {
 438   struct cgraph_edge *e;
 439   int num = 0;
 440
 441   for (e = n->callees; e; e = e->next_callee)
 442     if (!is_inexpensive_builtin (e->callee->decl))
 443       num++;
 444   return num;
 445 }
 446
 447
 448 /* Return true if we are interested in inlining small function.  */
 449
 450 static bool
 451 want_early_inline_function_p (struct cgraph_edge *e)
 452 {
 453   bool want_inline = true;
 454   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 455
 456   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 457     ;
 458   /* For AutoFDO, we need to make sure that before profile summary, all
 459      hot paths' IR look exactly the same as profiled binary. As a result,
 460      in einliner, we will disregard size limit and inline those callsites
 461      that are:
 462        * inlined in the profiled binary, and
 463        * the cloned callee has enough samples to be considered "hot".  */
 464   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 465     ;
 466   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 467            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 468     {
 469       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 470       report_inline_failed_reason (e);
 471       want_inline = false;
 472     }
 473   else
 474     {
 475       int growth = estimate_edge_growth (e);
 476       int n;
 477
 478       if (growth <= 0)
 479         ;
 480       else if (!e->maybe_hot_p ()
 481                && growth > 0)
 482         {
 483           if (dump_file)
 484             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 485                      "call is cold and code would grow by %i\n",
 486                      xstrdup_for_dump (e->caller->name ()),
 487                      e->caller->order,
 488                      xstrdup_for_dump (callee->name ()), callee->order,
 489                      growth);
 490           want_inline = false;
 491         }
 492       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 493         {
 494           if (dump_file)
 495             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 496                      "growth %i exceeds --param early-inlining-insns\n",
 497                      xstrdup_for_dump (e->caller->name ()),
 498                      e->caller->order,
 499                      xstrdup_for_dump (callee->name ()), callee->order,
 500                      growth);
 501           want_inline = false;
 502         }
 503       else if ((n = num_calls (callee)) != 0
 504                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 505         {
 506           if (dump_file)
 507             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 508                      "growth %i exceeds --param early-inlining-insns "
 509                      "divided by number of calls\n",
 510                      xstrdup_for_dump (e->caller->name ()),
 511                      e->caller->order,
 512                      xstrdup_for_dump (callee->name ()), callee->order,
 513                      growth);
 514           want_inline = false;
 515         }
 516     }
 517   return want_inline;
 518 }
 519
 520 /* Compute time of the edge->caller + edge->callee execution when inlining
 521    does not happen.  */
 522
 523 inline sreal
 524 compute_uninlined_call_time (struct inline_summary *callee_info,
 525                              struct cgraph_edge *edge)
 526 {
 527   sreal uninlined_call_time = (sreal)callee_info->time
 528                               * MAX (edge->frequency, 1)
 529                               * cgraph_freq_base_rec;
 530   int caller_time = inline_summaries->get (edge->caller->global.inlined_to
 531                                            ? edge->caller->global.inlined_to
 532                                            : edge->caller)->time;
 533   return uninlined_call_time + caller_time;
 534 }
 535
 536 /* Same as compute_uinlined_call_time but compute time when inlining
 537    does happen.  */
 538
 539 inline sreal
 540 compute_inlined_call_time (struct cgraph_edge *edge,
 541                            int edge_time)
 542 {
 543   int caller_time = inline_summaries->get (edge->caller->global.inlined_to
 544                                            ? edge->caller->global.inlined_to
 545                                            : edge->caller)->time;
 546   sreal time = (sreal)caller_time
 547                + ((sreal) (edge_time - inline_edge_summary (edge)->call_stmt_time)
 548                   * MAX (edge->frequency, 1)
 549                   * cgraph_freq_base_rec);
 550   gcc_checking_assert (time >= 0);
 551   return time;
 552 }
 553
 554 /* Return true if the speedup for inlining E is bigger than
 555    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 556
 557 static bool
 558 big_speedup_p (struct cgraph_edge *e)
 559 {
 560   sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee), e);
 561   sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
 562   if (time - inlined_time
 563       > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
 564          * percent_rec)
 565     return true;
 566   return false;
 567 }
 568
 569 /* Return true if we are interested in inlining small function.
 570    When REPORT is true, report reason to dump file.  */
 571
 572 static bool
 573 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 574 {
 575   bool want_inline = true;
 576   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 577
 578   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 579     ;
 580   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 581            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 582     {
 583       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 584       want_inline = false;
 585     }
 586   /* Do fast and conservative check if the function can be good
 587      inline candidate.  At the moment we allow inline hints to
 588      promote non-inline functions to inline and we increase
 589      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 590   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 591            && (!e->count || !e->maybe_hot_p ()))
 592            && inline_summaries->get (callee)->min_size
 593                 - inline_edge_summary (e)->call_stmt_size
 594               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 595     {
 596       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 597       want_inline = false;
 598     }
 599   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 600            && inline_summaries->get (callee)->min_size
 601                 - inline_edge_summary (e)->call_stmt_size
 602               > 16 * MAX_INLINE_INSNS_SINGLE)
 603     {
 604       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 605                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 606                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 607       want_inline = false;
 608     }
 609   else
 610     {
 611       int growth = estimate_edge_growth (e);
 612       inline_hints hints = estimate_edge_hints (e);
 613       bool big_speedup = big_speedup_p (e);
 614
 615       if (growth <= 0)
 616         ;
 617       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 618          hints suggests that inlining given function is very profitable.  */
 619       else if (DECL_DECLARED_INLINE_P (callee->decl)
 620                && growth >= MAX_INLINE_INSNS_SINGLE
 621                && ((!big_speedup
 622                     && !(hints & (INLINE_HINT_indirect_call
 623                                   | INLINE_HINT_known_hot
 624                                   | INLINE_HINT_loop_iterations
 625                                   | INLINE_HINT_array_index
 626                                   | INLINE_HINT_loop_stride)))
 627                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 628         {
 629           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 630           want_inline = false;
 631         }
 632       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 633                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 634         {
 635           /* growth_likely_positive is expensive, always test it last.  */
 636           if (growth >= MAX_INLINE_INSNS_SINGLE
 637               || growth_likely_positive (callee, growth))
 638             {
 639               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 640               want_inline = false;
 641             }
 642         }
 643       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 644          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 645          inlining given function is very profitable.  */
 646       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 647                && !big_speedup
 648                && !(hints & INLINE_HINT_known_hot)
 649                && growth >= ((hints & (INLINE_HINT_indirect_call
 650                                        | INLINE_HINT_loop_iterations
 651                                        | INLINE_HINT_array_index
 652                                        | INLINE_HINT_loop_stride))
 653                              ? MAX (MAX_INLINE_INSNS_AUTO,
 654                                     MAX_INLINE_INSNS_SINGLE)
 655                              : MAX_INLINE_INSNS_AUTO))
 656         {
 657           /* growth_likely_positive is expensive, always test it last.  */
 658           if (growth >= MAX_INLINE_INSNS_SINGLE
 659               || growth_likely_positive (callee, growth))
 660             {
 661               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 662               want_inline = false;
 663             }
 664         }
 665       /* If call is cold, do not inline when function body would grow. */
 666       else if (!e->maybe_hot_p ()
 667                && (growth >= MAX_INLINE_INSNS_SINGLE
 668                    || growth_likely_positive (callee, growth)))
 669         {
 670           e->inline_failed = CIF_UNLIKELY_CALL;
 671           want_inline = false;
 672         }
 673     }
 674   if (!want_inline && report)
 675     report_inline_failed_reason (e);
 676   return want_inline;
 677 }
 678
 679 /* EDGE is self recursive edge.
 680    We hand two cases - when function A is inlining into itself
 681    or when function A is being inlined into another inliner copy of function
 682    A within function B.
 683
 684    In first case OUTER_NODE points to the toplevel copy of A, while
 685    in the second case OUTER_NODE points to the outermost copy of A in B.
 686
 687    In both cases we want to be extra selective since
 688    inlining the call will just introduce new recursive calls to appear.  */
 689
 690 static bool
 691 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 692                                    struct cgraph_node *outer_node,
 693                                    bool peeling,
 694                                    int depth)
 695 {
 696   char const *reason = NULL;
 697   bool want_inline = true;
 698   int caller_freq = CGRAPH_FREQ_BASE;
 699   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 700
 701   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 702     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 703
 704   if (!edge->maybe_hot_p ())
 705     {
 706       reason = "recursive call is cold";
 707       want_inline = false;
 708     }
 709   else if (max_count && !outer_node->count)
 710     {
 711       reason = "not executed in profile";
 712       want_inline = false;
 713     }
 714   else if (depth > max_depth)
 715     {
 716       reason = "--param max-inline-recursive-depth exceeded.";
 717       want_inline = false;
 718     }
 719
 720   if (outer_node->global.inlined_to)
 721     caller_freq = outer_node->callers->frequency;
 722
 723   if (!caller_freq)
 724     {
 725       reason = "function is inlined and unlikely";
 726       want_inline = false;
 727     }
 728
 729   if (!want_inline)
 730     ;
 731   /* Inlining of self recursive function into copy of itself within other function
 732      is transformation similar to loop peeling.
 733
 734      Peeling is profitable if we can inline enough copies to make probability
 735      of actual call to the self recursive function very small.  Be sure that
 736      the probability of recursion is small.
 737
 738      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 739      This way the expected number of recision is at most max_depth.  */
 740   else if (peeling)
 741     {
 742       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 743                                          / max_depth);
 744       int i;
 745       for (i = 1; i < depth; i++)
 746         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 747       if (max_count
 748           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 749               >= max_prob))
 750         {
 751           reason = "profile of recursive call is too large";
 752           want_inline = false;
 753         }
 754       if (!max_count
 755           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 756               >= max_prob))
 757         {
 758           reason = "frequency of recursive call is too large";
 759           want_inline = false;
 760         }
 761     }
 762   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 763      depth is large.  We reduce function call overhead and increase chances that
 764      things fit in hardware return predictor.
 765
 766      Recursive inlining might however increase cost of stack frame setup
 767      actually slowing down functions whose recursion tree is wide rather than
 768      deep.
 769
 770      Deciding reliably on when to do recursive inlining without profile feedback
 771      is tricky.  For now we disable recursive inlining when probability of self
 772      recursion is low.
 773
 774      Recursive inlining of self recursive call within loop also results in large loop
 775      depths that generally optimize badly.  We may want to throttle down inlining
 776      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 777      methods.  */
 778   else
 779     {
 780       if (max_count
 781           && (edge->count * 100 / outer_node->count
 782               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 783         {
 784           reason = "profile of recursive call is too small";
 785           want_inline = false;
 786         }
 787       else if (!max_count
 788                && (edge->frequency * 100 / caller_freq
 789                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 790         {
 791           reason = "frequency of recursive call is too small";
 792           want_inline = false;
 793         }
 794     }
 795   if (!want_inline && dump_file)
 796     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 797   return want_inline;
 798 }
 799
 800 /* Return true when NODE has uninlinable caller;
 801    set HAS_HOT_CALL if it has hot call.
 802    Worker for cgraph_for_node_and_aliases.  */
 803
 804 static bool
 805 check_callers (struct cgraph_node *node, void *has_hot_call)
 806 {
 807   struct cgraph_edge *e;
 808    for (e = node->callers; e; e = e->next_caller)
 809      {
 810        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
 811          return true;
 812        if (!can_inline_edge_p (e, true))
 813          return true;
 814        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 815          *(bool *)has_hot_call = true;
 816      }
 817   return false;
 818 }
 819
 820 /* If NODE has a caller, return true.  */
 821
 822 static bool
 823 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 824 {
 825   if (node->callers)
 826     return true;
 827   return false;
 828 }
 829
 830 /* Decide if inlining NODE would reduce unit size by eliminating
 831    the offline copy of function.
 832    When COLD is true the cold calls are considered, too.  */
 833
 834 static bool
 835 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 836 {
 837   bool has_hot_call = false;
 838
 839   if (node->ultimate_alias_target () != node)
 840     return false;
 841   /* Already inlined?  */
 842   if (node->global.inlined_to)
 843     return false;
 844   /* Does it have callers?  */
 845   if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
 846     return false;
 847   /* Inlining into all callers would increase size?  */
 848   if (estimate_growth (node) > 0)
 849     return false;
 850   /* All inlines must be possible.  */
 851   if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call,
 852                                                 true))
 853     return false;
 854   if (!cold && !has_hot_call)
 855     return false;
 856   return true;
 857 }
 858
 859 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 860
 861 /* Return relative time improvement for inlining EDGE in range
 862    as value NUMERATOR/DENOMINATOR.  */
 863
 864 static inline void
 865 relative_time_benefit (struct inline_summary *callee_info,
 866                        struct cgraph_edge *edge,
 867                        int edge_time,
 868                        sreal *numerator,
 869                        sreal *denominator)
 870 {
 871   /* Inlining into extern inline function is not a win.  */
 872   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 873                      ? edge->caller->global.inlined_to->decl
 874                      : edge->caller->decl))
 875     {
 876       *numerator = (sreal) 1;
 877       *denominator = (sreal) 1024;
 878       return;
 879     }
 880
 881   sreal uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 882   sreal inlined_call_time = compute_inlined_call_time (edge, edge_time);
 883
 884   /* Compute relative time benefit, i.e. how much the call becomes faster.
 885      ??? perhaps computing how much the caller+calle together become faster
 886      would lead to more realistic results.  */
 887   if (uninlined_call_time == (sreal) 0)
 888     uninlined_call_time = 1;
 889
 890   /* Avoid zeros, these are not useful later in calculations.  */
 891   if (uninlined_call_time == inlined_call_time)
 892     *numerator = ((sreal) 1)>>8;
 893   else
 894     *numerator = uninlined_call_time - inlined_call_time;
 895   *denominator = uninlined_call_time;
 896 #ifdef ENABLE_CHECKING
 897   gcc_checking_assert (*numerator >= 0);
 898   gcc_checking_assert (*denominator >= 0);
 899 #endif
 900 }
 901
 902 /* A cost model driving the inlining heuristics in a way so the edges with
 903    smallest badness are inlined first.  After each inlining is performed
 904    the costs of all caller edges of nodes affected are recomputed so the
 905    metrics may accurately depend on values such as number of inlinable callers
 906    of the function or function body size.  */
 907
 908 static sreal
 909 edge_badness (struct cgraph_edge *edge, bool dump)
 910 {
 911   sreal badness;
 912   int growth, edge_time;
 913   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 914   struct inline_summary *callee_info = inline_summaries->get (callee);
 915   inline_hints hints;
 916
 917   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 918     return sreal::min ();
 919
 920   growth = estimate_edge_growth (edge);
 921   edge_time = estimate_edge_time (edge);
 922   hints = estimate_edge_hints (edge);
 923   gcc_checking_assert (edge_time >= 0);
 924   gcc_checking_assert (edge_time <= callee_info->time);
 925   gcc_checking_assert (growth <= callee_info->size);
 926
 927   if (dump)
 928     {
 929       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 930                xstrdup_for_dump (edge->caller->name ()),
 931                edge->caller->order,
 932                xstrdup_for_dump (callee->name ()),
 933                edge->callee->order);
 934       fprintf (dump_file, "      size growth %i, time %i ",
 935                growth,
 936                edge_time);
 937       dump_inline_hints (dump_file, hints);
 938       if (big_speedup_p (edge))
 939         fprintf (dump_file, " big_speedup");
 940       fprintf (dump_file, "\n");
 941     }
 942
 943   /* Always prefer inlining saving code size.  */
 944   if (growth <= 0)
 945     {
 946       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
 947       if (dump)
 948         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
 949                  growth);
 950     }
 951
 952   /* When profiling is available, compute badness as:
 953
 954                 edge_count * relative_time_benefit
 955      goodness = -------------------------------------------
 956                 growth_of_caller
 957      badness = - goodness
 958
 959     The fraction is upside down, because on edge counts and time beneits
 960     the bounds are known. Edge growth is essentially unlimited.  */
 961
 962   else if (max_count)
 963     {
 964       sreal numerator, denominator;
 965       relative_time_benefit (callee_info, edge, edge_time, &numerator,
 966                              &denominator);
 967
 968       if (edge->count)
 969         numerator *= edge->count;
 970       denominator *= growth;
 971
 972       badness = - numerator / denominator;
 973
 974       if (dump)
 975         {
 976           sreal num,den;
 977           relative_time_benefit (callee_info, edge, edge_time, &num, &den);
 978           fprintf (dump_file,
 979                    "      %f: profile info. count %"PRId64
 980                    " * Relative benefit %f / growth %i\n",
 981                    badness.to_double (), (int64_t)edge->count,
 982                    (num / den * 100).to_double (), growth);
 983         }
 984     }
 985
 986   /* When function local profile is available. Compute badness as:
 987
 988                  relative_time_benefit
 989      goodness =  ---------------------------------
 990                  growth_of_caller * overall_growth
 991
 992      badness = - goodness
 993
 994      compensated by the inline hints.
 995   */
 996   /* TODO: We ought suport mixing units where some functions are profiled
 997      and some not.  */
 998   else if (flag_guess_branch_prob)
 999     {
1000       sreal numerator, denominator;
1001       relative_time_benefit (callee_info, edge, edge_time, &numerator,
1002                              &denominator);
1003       denominator *= growth;
1004       if (callee_info->growth > 0)
1005         denominator *= callee_info->growth;
1006
1007       badness = - numerator / denominator;
1008
1009       if (dump)
1010         {
1011           sreal num,den;
1012           relative_time_benefit (callee_info, edge, edge_time, &num, &den);
1013           fprintf (dump_file,
1014                    "      %f: guessed profile. frequency %f,"
1015                    " benefit %f%%, time w/o inlining %f, time w inlining %f"
1016                    " overall growth %i (current) %i (original)\n",
1017                    badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
1018                    (num/den).to_double () * 100,
1019                    compute_uninlined_call_time (callee_info, edge).to_double (),
1020                    compute_inlined_call_time (edge, edge_time).to_double (),
1021                    estimate_growth (callee),
1022                    callee_info->growth);
1023         }
1024     }
1025   /* When function local profile is not available or it does not give
1026      useful information (ie frequency is zero), base the cost on
1027      loop nest and overall size growth, so we optimize for overall number
1028      of functions fully inlined in program.  */
1029   else
1030     {
1031       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1032       badness = growth;
1033
1034       /* Decrease badness if call is nested.  */
1035       if (badness > 0)
1036         badness = badness >> nest;
1037       else
1038         badness = badness << nest;
1039       if (dump)
1040         fprintf (dump_file, "      %f: no profile. nest %i\n", badness.to_double (),
1041                  nest);
1042     }
1043   gcc_checking_assert (badness != 0);
1044
1045   if (edge->recursive_p ())
1046     badness = badness.shift (badness > 0 ? 4 : -4);
1047   if ((hints & (INLINE_HINT_indirect_call
1048                 | INLINE_HINT_loop_iterations
1049                 | INLINE_HINT_array_index
1050                 | INLINE_HINT_loop_stride))
1051       || callee_info->growth <= 0)
1052     badness = badness.shift (badness > 0 ? -2 : 2);
1053   if (hints & (INLINE_HINT_same_scc))
1054     badness = badness.shift (badness > 0 ? 3 : -3);
1055   else if (hints & (INLINE_HINT_in_scc))
1056     badness = badness.shift (badness > 0 ? 2 : -2);
1057   else if (hints & (INLINE_HINT_cross_module))
1058     badness = badness.shift (badness > 0 ? 1 : -1);
1059   if ((hints & INLINE_HINT_declared_inline))
1060     badness = badness.shift (badness > 0 ? -3 : 3);
1061   if (dump)
1062     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1063   return badness;
1064 }
1065
1066 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1067 static inline void
1068 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1069 {
1070   sreal badness = edge_badness (edge, false);
1071   if (edge->aux)
1072     {
1073       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1074       gcc_checking_assert (n->get_data () == edge);
1075
1076       /* fibonacci_heap::replace_key does busy updating of the
1077          heap that is unnecesarily expensive.
1078          We do lazy increases: after extracting minimum if the key
1079          turns out to be out of date, it is re-inserted into heap
1080          with correct value.  */
1081       if (badness < n->get_key ())
1082         {
1083           if (dump_file && (dump_flags & TDF_DETAILS))
1084             {
1085               fprintf (dump_file,
1086                        "  decreasing badness %s/%i -> %s/%i, %f"
1087                        " to %f\n",
1088                        xstrdup_for_dump (edge->caller->name ()),
1089                        edge->caller->order,
1090                        xstrdup_for_dump (edge->callee->name ()),
1091                        edge->callee->order,
1092                        n->get_key ().to_double (),
1093                        badness.to_double ());
1094             }
1095           heap->decrease_key (n, badness);
1096         }
1097     }
1098   else
1099     {
1100        if (dump_file && (dump_flags & TDF_DETAILS))
1101          {
1102            fprintf (dump_file,
1103                     "  enqueuing call %s/%i -> %s/%i, badness %f\n",
1104                     xstrdup_for_dump (edge->caller->name ()),
1105                     edge->caller->order,
1106                     xstrdup_for_dump (edge->callee->name ()),
1107                     edge->callee->order,
1108                     badness.to_double ());
1109          }
1110       edge->aux = heap->insert (badness, edge);
1111     }
1112 }
1113
1114
1115 /* NODE was inlined.
1116    All caller edges needs to be resetted because
1117    size estimates change. Similarly callees needs reset
1118    because better context may be known.  */
1119
1120 static void
1121 reset_edge_caches (struct cgraph_node *node)
1122 {
1123   struct cgraph_edge *edge;
1124   struct cgraph_edge *e = node->callees;
1125   struct cgraph_node *where = node;
1126   struct ipa_ref *ref;
1127
1128   if (where->global.inlined_to)
1129     where = where->global.inlined_to;
1130
1131   /* WHERE body size has changed, the cached growth is invalid.  */
1132   reset_node_growth_cache (where);
1133
1134   for (edge = where->callers; edge; edge = edge->next_caller)
1135     if (edge->inline_failed)
1136       reset_edge_growth_cache (edge);
1137
1138   FOR_EACH_ALIAS (where, ref)
1139     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1140
1141   if (!e)
1142     return;
1143
1144   while (true)
1145     if (!e->inline_failed && e->callee->callees)
1146       e = e->callee->callees;
1147     else
1148       {
1149         if (e->inline_failed)
1150           reset_edge_growth_cache (e);
1151         if (e->next_callee)
1152           e = e->next_callee;
1153         else
1154           {
1155             do
1156               {
1157                 if (e->caller == node)
1158                   return;
1159                 e = e->caller->callers;
1160               }
1161             while (!e->next_callee);
1162             e = e->next_callee;
1163           }
1164       }
1165 }
1166
1167 /* Recompute HEAP nodes for each of caller of NODE.
1168    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1169    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1170    it is inlinable. Otherwise check all edges.  */
1171
1172 static void
1173 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1174                     bitmap updated_nodes,
1175                     struct cgraph_edge *check_inlinablity_for)
1176 {
1177   struct cgraph_edge *edge;
1178   struct ipa_ref *ref;
1179
1180   if ((!node->alias && !inline_summaries->get (node)->inlinable)
1181       || node->global.inlined_to)
1182     return;
1183   if (!bitmap_set_bit (updated_nodes, node->uid))
1184     return;
1185
1186   FOR_EACH_ALIAS (node, ref)
1187     {
1188       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1189       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1190     }
1191
1192   for (edge = node->callers; edge; edge = edge->next_caller)
1193     if (edge->inline_failed)
1194       {
1195         if (!check_inlinablity_for
1196             || check_inlinablity_for == edge)
1197           {
1198             if (can_inline_edge_p (edge, false)
1199                 && want_inline_small_function_p (edge, false))
1200               update_edge_key (heap, edge);
1201             else if (edge->aux)
1202               {
1203                 report_inline_failed_reason (edge);
1204                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1205                 edge->aux = NULL;
1206               }
1207           }
1208         else if (edge->aux)
1209           update_edge_key (heap, edge);
1210       }
1211 }
1212
1213 /* Recompute HEAP nodes for each uninlined call in NODE.
1214    This is used when we know that edge badnesses are going only to increase
1215    (we introduced new call site) and thus all we need is to insert newly
1216    created edges into heap.  */
1217
1218 static void
1219 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1220                     bitmap updated_nodes)
1221 {
1222   struct cgraph_edge *e = node->callees;
1223
1224   if (!e)
1225     return;
1226   while (true)
1227     if (!e->inline_failed && e->callee->callees)
1228       e = e->callee->callees;
1229     else
1230       {
1231         enum availability avail;
1232         struct cgraph_node *callee;
1233         /* We do not reset callee growth cache here.  Since we added a new call,
1234            growth chould have just increased and consequentely badness metric
1235            don't need updating.  */
1236         if (e->inline_failed
1237             && (callee = e->callee->ultimate_alias_target (&avail))
1238             && inline_summaries->get (callee)->inlinable
1239             && avail >= AVAIL_AVAILABLE
1240             && !bitmap_bit_p (updated_nodes, callee->uid))
1241           {
1242             if (can_inline_edge_p (e, false)
1243                 && want_inline_small_function_p (e, false))
1244               update_edge_key (heap, e);
1245             else if (e->aux)
1246               {
1247                 report_inline_failed_reason (e);
1248                 heap->delete_node ((edge_heap_node_t *) e->aux);
1249                 e->aux = NULL;
1250               }
1251           }
1252         if (e->next_callee)
1253           e = e->next_callee;
1254         else
1255           {
1256             do
1257               {
1258                 if (e->caller == node)
1259                   return;
1260                 e = e->caller->callers;
1261               }
1262             while (!e->next_callee);
1263             e = e->next_callee;
1264           }
1265       }
1266 }
1267
1268 /* Enqueue all recursive calls from NODE into priority queue depending on
1269    how likely we want to recursively inline the call.  */
1270
1271 static void
1272 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1273                         edge_heap_t *heap)
1274 {
1275   struct cgraph_edge *e;
1276   enum availability avail;
1277
1278   for (e = where->callees; e; e = e->next_callee)
1279     if (e->callee == node
1280         || (e->callee->ultimate_alias_target (&avail) == node
1281             && avail > AVAIL_INTERPOSABLE))
1282       {
1283         /* When profile feedback is available, prioritize by expected number
1284            of calls.  */
1285         heap->insert (!max_count ? -e->frequency
1286                       : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1287                       e);
1288       }
1289   for (e = where->callees; e; e = e->next_callee)
1290     if (!e->inline_failed)
1291       lookup_recursive_calls (node, e->callee, heap);
1292 }
1293
1294 /* Decide on recursive inlining: in the case function has recursive calls,
1295    inline until body size reaches given argument.  If any new indirect edges
1296    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1297    is NULL.  */
1298
1299 static bool
1300 recursive_inlining (struct cgraph_edge *edge,
1301                     vec<cgraph_edge *> *new_edges)
1302 {
1303   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1304   edge_heap_t heap (sreal::min ());
1305   struct cgraph_node *node;
1306   struct cgraph_edge *e;
1307   struct cgraph_node *master_clone = NULL, *next;
1308   int depth = 0;
1309   int n = 0;
1310
1311   node = edge->caller;
1312   if (node->global.inlined_to)
1313     node = node->global.inlined_to;
1314
1315   if (DECL_DECLARED_INLINE_P (node->decl))
1316     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1317
1318   /* Make sure that function is small enough to be considered for inlining.  */
1319   if (estimate_size_after_inlining (node, edge)  >= limit)
1320     return false;
1321   lookup_recursive_calls (node, node, &heap);
1322   if (heap.empty ())
1323     return false;
1324
1325   if (dump_file)
1326     fprintf (dump_file,
1327              "  Performing recursive inlining on %s\n",
1328              node->name ());
1329
1330   /* Do the inlining and update list of recursive call during process.  */
1331   while (!heap.empty ())
1332     {
1333       struct cgraph_edge *curr = heap.extract_min ();
1334       struct cgraph_node *cnode, *dest = curr->callee;
1335
1336       if (!can_inline_edge_p (curr, true))
1337         continue;
1338
1339       /* MASTER_CLONE is produced in the case we already started modified
1340          the function. Be sure to redirect edge to the original body before
1341          estimating growths otherwise we will be seeing growths after inlining
1342          the already modified body.  */
1343       if (master_clone)
1344         {
1345           curr->redirect_callee (master_clone);
1346           reset_edge_growth_cache (curr);
1347         }
1348
1349       if (estimate_size_after_inlining (node, curr) > limit)
1350         {
1351           curr->redirect_callee (dest);
1352           reset_edge_growth_cache (curr);
1353           break;
1354         }
1355
1356       depth = 1;
1357       for (cnode = curr->caller;
1358            cnode->global.inlined_to; cnode = cnode->callers->caller)
1359         if (node->decl
1360             == curr->callee->ultimate_alias_target ()->decl)
1361           depth++;
1362
1363       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1364         {
1365           curr->redirect_callee (dest);
1366           reset_edge_growth_cache (curr);
1367           continue;
1368         }
1369
1370       if (dump_file)
1371         {
1372           fprintf (dump_file,
1373                    "   Inlining call of depth %i", depth);
1374           if (node->count)
1375             {
1376               fprintf (dump_file, " called approx. %.2f times per call",
1377                        (double)curr->count / node->count);
1378             }
1379           fprintf (dump_file, "\n");
1380         }
1381       if (!master_clone)
1382         {
1383           /* We need original clone to copy around.  */
1384           master_clone = node->create_clone (node->decl, node->count,
1385             CGRAPH_FREQ_BASE, false, vNULL,
1386             true, NULL, NULL);
1387           for (e = master_clone->callees; e; e = e->next_callee)
1388             if (!e->inline_failed)
1389               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1390           curr->redirect_callee (master_clone);
1391           reset_edge_growth_cache (curr);
1392         }
1393
1394       inline_call (curr, false, new_edges, &overall_size, true);
1395       lookup_recursive_calls (node, curr->callee, &heap);
1396       n++;
1397     }
1398
1399   if (!heap.empty () && dump_file)
1400     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1401
1402   if (!master_clone)
1403     return false;
1404
1405   if (dump_file)
1406     fprintf (dump_file,
1407              "\n   Inlined %i times, "
1408              "body grown from size %i to %i, time %i to %i\n", n,
1409              inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1410              inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1411
1412   /* Remove master clone we used for inlining.  We rely that clones inlined
1413      into master clone gets queued just before master clone so we don't
1414      need recursion.  */
1415   for (node = symtab->first_function (); node != master_clone;
1416        node = next)
1417     {
1418       next = symtab->next_function (node);
1419       if (node->global.inlined_to == master_clone)
1420         node->remove ();
1421     }
1422   master_clone->remove ();
1423   return true;
1424 }
1425
1426
1427 /* Given whole compilation unit estimate of INSNS, compute how large we can
1428    allow the unit to grow.  */
1429
1430 static int
1431 compute_max_insns (int insns)
1432 {
1433   int max_insns = insns;
1434   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1435     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1436
1437   return ((int64_t) max_insns
1438           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1439 }
1440
1441
1442 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1443
1444 static void
1445 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1446 {
1447   while (new_edges.length () > 0)
1448     {
1449       struct cgraph_edge *edge = new_edges.pop ();
1450
1451       gcc_assert (!edge->aux);
1452       if (edge->inline_failed
1453           && can_inline_edge_p (edge, true)
1454           && want_inline_small_function_p (edge, true))
1455         edge->aux = heap->insert (edge_badness (edge, false), edge);
1456     }
1457 }
1458
1459 /* Remove EDGE from the fibheap.  */
1460
1461 static void
1462 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1463 {
1464   if (e->callee)
1465     reset_node_growth_cache (e->callee);
1466   if (e->aux)
1467     {
1468       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1469       e->aux = NULL;
1470     }
1471 }
1472
1473 /* Return true if speculation of edge E seems useful.
1474    If ANTICIPATE_INLINING is true, be conservative and hope that E
1475    may get inlined.  */
1476
1477 bool
1478 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1479 {
1480   enum availability avail;
1481   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1482   struct cgraph_edge *direct, *indirect;
1483   struct ipa_ref *ref;
1484
1485   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1486
1487   if (!e->maybe_hot_p ())
1488     return false;
1489
1490   /* See if IP optimizations found something potentially useful about the
1491      function.  For now we look only for CONST/PURE flags.  Almost everything
1492      else we propagate is useless.  */
1493   if (avail >= AVAIL_AVAILABLE)
1494     {
1495       int ecf_flags = flags_from_decl_or_type (target->decl);
1496       if (ecf_flags & ECF_CONST)
1497         {
1498           e->speculative_call_info (direct, indirect, ref);
1499           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1500             return true;
1501         }
1502       else if (ecf_flags & ECF_PURE)
1503         {
1504           e->speculative_call_info (direct, indirect, ref);
1505           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1506             return true;
1507         }
1508     }
1509   /* If we did not managed to inline the function nor redirect
1510      to an ipa-cp clone (that are seen by having local flag set),
1511      it is probably pointless to inline it unless hardware is missing
1512      indirect call predictor.  */
1513   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1514     return false;
1515   /* For overwritable targets there is not much to do.  */
1516   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1517     return false;
1518   /* OK, speculation seems interesting.  */
1519   return true;
1520 }
1521
1522 /* We know that EDGE is not going to be inlined.
1523    See if we can remove speculation.  */
1524
1525 static void
1526 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1527 {
1528   if (edge->speculative && !speculation_useful_p (edge, false))
1529     {
1530       struct cgraph_node *node = edge->caller;
1531       struct cgraph_node *where = node->global.inlined_to
1532                                   ? node->global.inlined_to : node;
1533       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1534
1535       spec_rem += edge->count;
1536       edge->resolve_speculation ();
1537       reset_edge_caches (where);
1538       inline_update_overall_summary (where);
1539       update_caller_keys (edge_heap, where,
1540                           updated_nodes, NULL);
1541       update_callee_keys (edge_heap, where,
1542                           updated_nodes);
1543       BITMAP_FREE (updated_nodes);
1544     }
1545 }
1546
1547 /* We use greedy algorithm for inlining of small functions:
1548    All inline candidates are put into prioritized heap ordered in
1549    increasing badness.
1550
1551    The inlining of small functions is bounded by unit growth parameters.  */
1552
1553 static void
1554 inline_small_functions (void)
1555 {
1556   struct cgraph_node *node;
1557   struct cgraph_edge *edge;
1558   edge_heap_t edge_heap (sreal::min ());
1559   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1560   int min_size, max_size;
1561   auto_vec<cgraph_edge *> new_indirect_edges;
1562   int initial_size = 0;
1563   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1564   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1565   new_indirect_edges.create (8);
1566
1567   edge_removal_hook_holder
1568     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1569
1570   /* Compute overall unit size and other global parameters used by badness
1571      metrics.  */
1572
1573   max_count = 0;
1574   ipa_reduced_postorder (order, true, true, NULL);
1575   free (order);
1576
1577   FOR_EACH_DEFINED_FUNCTION (node)
1578     if (!node->global.inlined_to)
1579       {
1580         if (node->has_gimple_body_p ()
1581             || node->thunk.thunk_p)
1582           {
1583             struct inline_summary *info = inline_summaries->get (node);
1584             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1585
1586             /* Do not account external functions, they will be optimized out
1587                if not inlined.  Also only count the non-cold portion of program.  */
1588             if (!DECL_EXTERNAL (node->decl)
1589                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1590               initial_size += info->size;
1591             info->growth = estimate_growth (node);
1592             if (dfs && dfs->next_cycle)
1593               {
1594                 struct cgraph_node *n2;
1595                 int id = dfs->scc_no + 1;
1596                 for (n2 = node; n2;
1597                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1598                   {
1599                     struct inline_summary *info2 = inline_summaries->get (n2);
1600                     if (info2->scc_no)
1601                       break;
1602                     info2->scc_no = id;
1603                   }
1604               }
1605           }
1606
1607         for (edge = node->callers; edge; edge = edge->next_caller)
1608           if (max_count < edge->count)
1609             max_count = edge->count;
1610       }
1611   ipa_free_postorder_info ();
1612   initialize_growth_caches ();
1613
1614   if (dump_file)
1615     fprintf (dump_file,
1616              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1617              initial_size);
1618
1619   overall_size = initial_size;
1620   max_size = compute_max_insns (overall_size);
1621   min_size = overall_size;
1622
1623   /* Populate the heap with all edges we might inline.  */
1624
1625   FOR_EACH_DEFINED_FUNCTION (node)
1626     {
1627       bool update = false;
1628       struct cgraph_edge *next;
1629
1630       if (dump_file)
1631         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1632                  node->name (), node->order);
1633
1634       for (edge = node->callees; edge; edge = next)
1635         {
1636           next = edge->next_callee;
1637           if (edge->inline_failed
1638               && !edge->aux
1639               && can_inline_edge_p (edge, true)
1640               && want_inline_small_function_p (edge, true)
1641               && edge->inline_failed)
1642             {
1643               gcc_assert (!edge->aux);
1644               update_edge_key (&edge_heap, edge);
1645             }
1646           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1647             {
1648               edge->resolve_speculation ();
1649               update = true;
1650             }
1651         }
1652       if (update)
1653         {
1654           struct cgraph_node *where = node->global.inlined_to
1655                                       ? node->global.inlined_to : node;
1656           inline_update_overall_summary (where);
1657           reset_node_growth_cache (where);
1658           reset_edge_caches (where);
1659           update_caller_keys (&edge_heap, where,
1660                               updated_nodes, NULL);
1661           bitmap_clear (updated_nodes);
1662         }
1663     }
1664
1665   gcc_assert (in_lto_p
1666               || !max_count
1667               || (profile_info && flag_branch_probabilities));
1668
1669   while (!edge_heap.empty ())
1670     {
1671       int old_size = overall_size;
1672       struct cgraph_node *where, *callee;
1673       sreal badness = edge_heap.min_key ();
1674       sreal current_badness;
1675       int growth;
1676
1677       edge = edge_heap.extract_min ();
1678       gcc_assert (edge->aux);
1679       edge->aux = NULL;
1680       if (!edge->inline_failed || !edge->callee->analyzed)
1681         continue;
1682
1683 #ifdef ENABLE_CHECKING
1684       /* Be sure that caches are maintained consistent.  */
1685       sreal cached_badness = edge_badness (edge, false);
1686       reset_edge_growth_cache (edge);
1687       reset_node_growth_cache (edge->callee);
1688
1689       /* When updating the edge costs, we only decrease badness in the keys.
1690          Increases of badness are handled lazilly; when we see key with out
1691          of date value on it, we re-insert it now.  */
1692       current_badness = edge_badness (edge, false);
1693       gcc_assert (cached_badness == current_badness);
1694       gcc_assert (current_badness >= badness);
1695 #else
1696       current_badness = edge_badness (edge, false);
1697 #endif
1698       if (current_badness != badness)
1699         {
1700           if (edge_heap.min () && badness > edge_heap.min_key ())
1701             {
1702               edge->aux = edge_heap.insert (current_badness, edge);
1703               continue;
1704             }
1705           else
1706             badness = current_badness;
1707         }
1708
1709       if (!can_inline_edge_p (edge, true))
1710         {
1711           resolve_noninline_speculation (&edge_heap, edge);
1712           continue;
1713         }
1714
1715       callee = edge->callee->ultimate_alias_target ();
1716       growth = estimate_edge_growth (edge);
1717       if (dump_file)
1718         {
1719           fprintf (dump_file,
1720                    "\nConsidering %s/%i with %i size\n",
1721                    callee->name (), callee->order,
1722                    inline_summaries->get (callee)->size);
1723           fprintf (dump_file,
1724                    " to be inlined into %s/%i in %s:%i\n"
1725                    " Estimated badness is %f, frequency %.2f.\n",
1726                    edge->caller->name (), edge->caller->order,
1727                    edge->call_stmt ? "unknown"
1728                    : gimple_filename ((const_gimple) edge->call_stmt),
1729                    edge->call_stmt ? -1
1730                    : gimple_lineno ((const_gimple) edge->call_stmt),
1731                    badness.to_double (),
1732                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1733           if (edge->count)
1734             fprintf (dump_file," Called %"PRId64"x\n",
1735                      edge->count);
1736           if (dump_flags & TDF_DETAILS)
1737             edge_badness (edge, true);
1738         }
1739
1740       if (overall_size + growth > max_size
1741           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1742         {
1743           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1744           report_inline_failed_reason (edge);
1745           resolve_noninline_speculation (&edge_heap, edge);
1746           continue;
1747         }
1748
1749       if (!want_inline_small_function_p (edge, true))
1750         {
1751           resolve_noninline_speculation (&edge_heap, edge);
1752           continue;
1753         }
1754
1755       /* Heuristics for inlining small functions work poorly for
1756          recursive calls where we do effects similar to loop unrolling.
1757          When inlining such edge seems profitable, leave decision on
1758          specific inliner.  */
1759       if (edge->recursive_p ())
1760         {
1761           where = edge->caller;
1762           if (where->global.inlined_to)
1763             where = where->global.inlined_to;
1764           if (!recursive_inlining (edge,
1765                                    opt_for_fn (edge->caller->decl,
1766                                                flag_indirect_inlining)
1767                                    ? &new_indirect_edges : NULL))
1768             {
1769               edge->inline_failed = CIF_RECURSIVE_INLINING;
1770               resolve_noninline_speculation (&edge_heap, edge);
1771               continue;
1772             }
1773           reset_edge_caches (where);
1774           /* Recursive inliner inlines all recursive calls of the function
1775              at once. Consequently we need to update all callee keys.  */
1776           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1777             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1778           update_callee_keys (&edge_heap, where, updated_nodes);
1779           bitmap_clear (updated_nodes);
1780         }
1781       else
1782         {
1783           struct cgraph_node *outer_node = NULL;
1784           int depth = 0;
1785
1786           /* Consider the case where self recursive function A is inlined
1787              into B.  This is desired optimization in some cases, since it
1788              leads to effect similar of loop peeling and we might completely
1789              optimize out the recursive call.  However we must be extra
1790              selective.  */
1791
1792           where = edge->caller;
1793           while (where->global.inlined_to)
1794             {
1795               if (where->decl == callee->decl)
1796                 outer_node = where, depth++;
1797               where = where->callers->caller;
1798             }
1799           if (outer_node
1800               && !want_inline_self_recursive_call_p (edge, outer_node,
1801                                                      true, depth))
1802             {
1803               edge->inline_failed
1804                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1805                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1806               resolve_noninline_speculation (&edge_heap, edge);
1807               continue;
1808             }
1809           else if (depth && dump_file)
1810             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1811
1812           gcc_checking_assert (!callee->global.inlined_to);
1813           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1814           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1815
1816           reset_edge_caches (edge->callee);
1817           reset_node_growth_cache (callee);
1818
1819           update_callee_keys (&edge_heap, where, updated_nodes);
1820         }
1821       where = edge->caller;
1822       if (where->global.inlined_to)
1823         where = where->global.inlined_to;
1824
1825       /* Our profitability metric can depend on local properties
1826          such as number of inlinable calls and size of the function body.
1827          After inlining these properties might change for the function we
1828          inlined into (since it's body size changed) and for the functions
1829          called by function we inlined (since number of it inlinable callers
1830          might change).  */
1831       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
1832       bitmap_clear (updated_nodes);
1833
1834       if (dump_file)
1835         {
1836           fprintf (dump_file,
1837                    " Inlined into %s which now has time %i and size %i,"
1838                    "net change of %+i.\n",
1839                    edge->caller->name (),
1840                    inline_summaries->get (edge->caller)->time,
1841                    inline_summaries->get (edge->caller)->size,
1842                    overall_size - old_size);
1843         }
1844       if (min_size > overall_size)
1845         {
1846           min_size = overall_size;
1847           max_size = compute_max_insns (min_size);
1848
1849           if (dump_file)
1850             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1851         }
1852     }
1853
1854   free_growth_caches ();
1855   if (dump_file)
1856     fprintf (dump_file,
1857              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1858              initial_size, overall_size,
1859              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1860   BITMAP_FREE (updated_nodes);
1861   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1862 }
1863
1864 /* Flatten NODE.  Performed both during early inlining and
1865    at IPA inlining time.  */
1866
1867 static void
1868 flatten_function (struct cgraph_node *node, bool early)
1869 {
1870   struct cgraph_edge *e;
1871
1872   /* We shouldn't be called recursively when we are being processed.  */
1873   gcc_assert (node->aux == NULL);
1874
1875   node->aux = (void *) node;
1876
1877   for (e = node->callees; e; e = e->next_callee)
1878     {
1879       struct cgraph_node *orig_callee;
1880       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1881
1882       /* We've hit cycle?  It is time to give up.  */
1883       if (callee->aux)
1884         {
1885           if (dump_file)
1886             fprintf (dump_file,
1887                      "Not inlining %s into %s to avoid cycle.\n",
1888                      xstrdup_for_dump (callee->name ()),
1889                      xstrdup_for_dump (e->caller->name ()));
1890           e->inline_failed = CIF_RECURSIVE_INLINING;
1891           continue;
1892         }
1893
1894       /* When the edge is already inlined, we just need to recurse into
1895          it in order to fully flatten the leaves.  */
1896       if (!e->inline_failed)
1897         {
1898           flatten_function (callee, early);
1899           continue;
1900         }
1901
1902       /* Flatten attribute needs to be processed during late inlining. For
1903          extra code quality we however do flattening during early optimization,
1904          too.  */
1905       if (!early
1906           ? !can_inline_edge_p (e, true)
1907           : !can_early_inline_edge_p (e))
1908         continue;
1909
1910       if (e->recursive_p ())
1911         {
1912           if (dump_file)
1913             fprintf (dump_file, "Not inlining: recursive call.\n");
1914           continue;
1915         }
1916
1917       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1918           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1919         {
1920           if (dump_file)
1921             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1922           continue;
1923         }
1924
1925       /* Inline the edge and flatten the inline clone.  Avoid
1926          recursing through the original node if the node was cloned.  */
1927       if (dump_file)
1928         fprintf (dump_file, " Inlining %s into %s.\n",
1929                  xstrdup_for_dump (callee->name ()),
1930                  xstrdup_for_dump (e->caller->name ()));
1931       orig_callee = callee;
1932       inline_call (e, true, NULL, NULL, false);
1933       if (e->callee != orig_callee)
1934         orig_callee->aux = (void *) node;
1935       flatten_function (e->callee, early);
1936       if (e->callee != orig_callee)
1937         orig_callee->aux = NULL;
1938     }
1939
1940   node->aux = NULL;
1941   if (!node->global.inlined_to)
1942     inline_update_overall_summary (node);
1943 }
1944
1945 /* Count number of callers of NODE and store it into DATA (that
1946    points to int.  Worker for cgraph_for_node_and_aliases.  */
1947
1948 static bool
1949 sum_callers (struct cgraph_node *node, void *data)
1950 {
1951   struct cgraph_edge *e;
1952   int *num_calls = (int *)data;
1953
1954   for (e = node->callers; e; e = e->next_caller)
1955     (*num_calls)++;
1956   return false;
1957 }
1958
1959 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1960    DATA points to number of calls originally found so we avoid infinite
1961    recursion.  */
1962
1963 static bool
1964 inline_to_all_callers (struct cgraph_node *node, void *data)
1965 {
1966   int *num_calls = (int *)data;
1967   bool callee_removed = false;
1968
1969   while (node->callers && !node->global.inlined_to)
1970     {
1971       struct cgraph_node *caller = node->callers->caller;
1972
1973       if (dump_file)
1974         {
1975           fprintf (dump_file,
1976                    "\nInlining %s size %i.\n",
1977                    node->name (),
1978                    inline_summaries->get (node)->size);
1979           fprintf (dump_file,
1980                    " Called once from %s %i insns.\n",
1981                    node->callers->caller->name (),
1982                    inline_summaries->get (node->callers->caller)->size);
1983         }
1984
1985       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
1986       if (dump_file)
1987         fprintf (dump_file,
1988                  " Inlined into %s which now has %i size\n",
1989                  caller->name (),
1990                  inline_summaries->get (caller)->size);
1991       if (!(*num_calls)--)
1992         {
1993           if (dump_file)
1994             fprintf (dump_file, "New calls found; giving up.\n");
1995           return callee_removed;
1996         }
1997       if (callee_removed)
1998         return true;
1999     }
2000   return false;
2001 }
2002
2003 /* Output overall time estimate.  */
2004 static void
2005 dump_overall_stats (void)
2006 {
2007   int64_t sum_weighted = 0, sum = 0;
2008   struct cgraph_node *node;
2009
2010   FOR_EACH_DEFINED_FUNCTION (node)
2011     if (!node->global.inlined_to
2012         && !node->alias)
2013       {
2014         int time = inline_summaries->get (node)->time;
2015         sum += time;
2016         sum_weighted += time * node->count;
2017       }
2018   fprintf (dump_file, "Overall time estimate: "
2019            "%"PRId64" weighted by profile: "
2020            "%"PRId64"\n", sum, sum_weighted);
2021 }
2022
2023 /* Output some useful stats about inlining.  */
2024
2025 static void
2026 dump_inline_stats (void)
2027 {
2028   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2029   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2030   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2031   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2032   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2033   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2034   int64_t reason[CIF_N_REASONS][3];
2035   int i;
2036   struct cgraph_node *node;
2037
2038   memset (reason, 0, sizeof (reason));
2039   FOR_EACH_DEFINED_FUNCTION (node)
2040   {
2041     struct cgraph_edge *e;
2042     for (e = node->callees; e; e = e->next_callee)
2043       {
2044         if (e->inline_failed)
2045           {
2046             reason[(int) e->inline_failed][0] += e->count;
2047             reason[(int) e->inline_failed][1] += e->frequency;
2048             reason[(int) e->inline_failed][2] ++;
2049             if (DECL_VIRTUAL_P (e->callee->decl))
2050               {
2051                 if (e->indirect_inlining_edge)
2052                   noninlined_virt_indir_cnt += e->count;
2053                 else
2054                   noninlined_virt_cnt += e->count;
2055               }
2056             else
2057               {
2058                 if (e->indirect_inlining_edge)
2059                   noninlined_indir_cnt += e->count;
2060                 else
2061                   noninlined_cnt += e->count;
2062               }
2063           }
2064         else
2065           {
2066             if (e->speculative)
2067               {
2068                 if (DECL_VIRTUAL_P (e->callee->decl))
2069                   inlined_speculative_ply += e->count;
2070                 else
2071                   inlined_speculative += e->count;
2072               }
2073             else if (DECL_VIRTUAL_P (e->callee->decl))
2074               {
2075                 if (e->indirect_inlining_edge)
2076                   inlined_virt_indir_cnt += e->count;
2077                 else
2078                   inlined_virt_cnt += e->count;
2079               }
2080             else
2081               {
2082                 if (e->indirect_inlining_edge)
2083                   inlined_indir_cnt += e->count;
2084                 else
2085                   inlined_cnt += e->count;
2086               }
2087           }
2088       }
2089     for (e = node->indirect_calls; e; e = e->next_callee)
2090       if (e->indirect_info->polymorphic)
2091         indirect_poly_cnt += e->count;
2092       else
2093         indirect_cnt += e->count;
2094   }
2095   if (max_count)
2096     {
2097       fprintf (dump_file,
2098                "Inlined %"PRId64 " + speculative "
2099                "%"PRId64 " + speculative polymorphic "
2100                "%"PRId64 " + previously indirect "
2101                "%"PRId64 " + virtual "
2102                "%"PRId64 " + virtual and previously indirect "
2103                "%"PRId64 "\n" "Not inlined "
2104                "%"PRId64 " + previously indirect "
2105                "%"PRId64 " + virtual "
2106                "%"PRId64 " + virtual and previously indirect "
2107                "%"PRId64 " + stil indirect "
2108                "%"PRId64 " + still indirect polymorphic "
2109                "%"PRId64 "\n", inlined_cnt,
2110                inlined_speculative, inlined_speculative_ply,
2111                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2112                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2113                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2114       fprintf (dump_file,
2115                "Removed speculations %"PRId64 "\n",
2116                spec_rem);
2117     }
2118   dump_overall_stats ();
2119   fprintf (dump_file, "\nWhy inlining failed?\n");
2120   for (i = 0; i < CIF_N_REASONS; i++)
2121     if (reason[i][2])
2122       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2123                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2124                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2125 }
2126
2127 /* Decide on the inlining.  We do so in the topological order to avoid
2128    expenses on updating data structures.  */
2129
2130 static unsigned int
2131 ipa_inline (void)
2132 {
2133   struct cgraph_node *node;
2134   int nnodes;
2135   struct cgraph_node **order;
2136   int i;
2137   int cold;
2138   bool remove_functions = false;
2139
2140   if (!optimize)
2141     return 0;
2142
2143   cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2144   percent_rec = (sreal) 1 / (sreal) 100;
2145
2146   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2147
2148   if (in_lto_p && optimize)
2149     ipa_update_after_lto_read ();
2150
2151   if (dump_file)
2152     dump_inline_summaries (dump_file);
2153
2154   nnodes = ipa_reverse_postorder (order);
2155
2156   FOR_EACH_FUNCTION (node)
2157     node->aux = 0;
2158
2159   if (dump_file)
2160     fprintf (dump_file, "\nFlattening functions:\n");
2161
2162   /* In the first pass handle functions to be flattened.  Do this with
2163      a priority so none of our later choices will make this impossible.  */
2164   for (i = nnodes - 1; i >= 0; i--)
2165     {
2166       node = order[i];
2167
2168       /* Handle nodes to be flattened.
2169          Ideally when processing callees we stop inlining at the
2170          entry of cycles, possibly cloning that entry point and
2171          try to flatten itself turning it into a self-recursive
2172          function.  */
2173       if (lookup_attribute ("flatten",
2174                             DECL_ATTRIBUTES (node->decl)) != NULL)
2175         {
2176           if (dump_file)
2177             fprintf (dump_file,
2178                      "Flattening %s\n", node->name ());
2179           flatten_function (node, false);
2180         }
2181     }
2182   if (dump_file)
2183     dump_overall_stats ();
2184
2185   inline_small_functions ();
2186
2187   gcc_assert (symtab->state == IPA_SSA);
2188   symtab->state = IPA_SSA_AFTER_INLINING;
2189   /* Do first after-inlining removal.  We want to remove all "stale" extern
2190      inline functions and virtual functions so we really know what is called
2191      once.  */
2192   symtab->remove_unreachable_nodes (dump_file);
2193   free (order);
2194
2195   /* Inline functions with a property that after inlining into all callers the
2196      code size will shrink because the out-of-line copy is eliminated.
2197      We do this regardless on the callee size as long as function growth limits
2198      are met.  */
2199   if (dump_file)
2200     fprintf (dump_file,
2201              "\nDeciding on functions to be inlined into all callers and "
2202              "removing useless speculations:\n");
2203
2204   /* Inlining one function called once has good chance of preventing
2205      inlining other function into the same callee.  Ideally we should
2206      work in priority order, but probably inlining hot functions first
2207      is good cut without the extra pain of maintaining the queue.
2208
2209      ??? this is not really fitting the bill perfectly: inlining function
2210      into callee often leads to better optimization of callee due to
2211      increased context for optimization.
2212      For example if main() function calls a function that outputs help
2213      and then function that does the main optmization, we should inline
2214      the second with priority even if both calls are cold by themselves.
2215
2216      We probably want to implement new predicate replacing our use of
2217      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2218      to be hot.  */
2219   for (cold = 0; cold <= 1; cold ++)
2220     {
2221       FOR_EACH_DEFINED_FUNCTION (node)
2222         {
2223           struct cgraph_edge *edge, *next;
2224           bool update=false;
2225
2226           for (edge = node->callees; edge; edge = next)
2227             {
2228               next = edge->next_callee;
2229               if (edge->speculative && !speculation_useful_p (edge, false))
2230                 {
2231                   edge->resolve_speculation ();
2232                   spec_rem += edge->count;
2233                   update = true;
2234                   remove_functions = true;
2235                 }
2236             }
2237           if (update)
2238             {
2239               struct cgraph_node *where = node->global.inlined_to
2240                                           ? node->global.inlined_to : node;
2241               reset_node_growth_cache (where);
2242               reset_edge_caches (where);
2243               inline_update_overall_summary (where);
2244             }
2245           if (want_inline_function_to_all_callers_p (node, cold))
2246             {
2247               int num_calls = 0;
2248               node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2249                                                       true);
2250               while (node->call_for_symbol_thunks_and_aliases
2251                        (inline_to_all_callers, &num_calls, true))
2252                 ;
2253               remove_functions = true;
2254             }
2255         }
2256     }
2257
2258   /* Free ipa-prop structures if they are no longer needed.  */
2259   if (optimize)
2260     ipa_free_all_structures_after_iinln ();
2261
2262   if (dump_file)
2263     {
2264       fprintf (dump_file,
2265                "\nInlined %i calls, eliminated %i functions\n\n",
2266                ncalls_inlined, nfunctions_inlined);
2267       dump_inline_stats ();
2268     }
2269
2270   if (dump_file)
2271     dump_inline_summaries (dump_file);
2272   /* In WPA we use inline summaries for partitioning process.  */
2273   if (!flag_wpa)
2274     inline_free_summary ();
2275   return remove_functions ? TODO_remove_functions : 0;
2276 }
2277
2278 /* Inline always-inline function calls in NODE.  */
2279
2280 static bool
2281 inline_always_inline_functions (struct cgraph_node *node)
2282 {
2283   struct cgraph_edge *e;
2284   bool inlined = false;
2285
2286   for (e = node->callees; e; e = e->next_callee)
2287     {
2288       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2289       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2290         continue;
2291
2292       if (e->recursive_p ())
2293         {
2294           if (dump_file)
2295             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2296                      e->callee->name ());
2297           e->inline_failed = CIF_RECURSIVE_INLINING;
2298           continue;
2299         }
2300
2301       if (!can_early_inline_edge_p (e))
2302         {
2303           /* Set inlined to true if the callee is marked "always_inline" but
2304              is not inlinable.  This will allow flagging an error later in
2305              expand_call_inline in tree-inline.c.  */
2306           if (lookup_attribute ("always_inline",
2307                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2308             inlined = true;
2309           continue;
2310         }
2311
2312       if (dump_file)
2313         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2314                  xstrdup_for_dump (e->callee->name ()),
2315                  xstrdup_for_dump (e->caller->name ()));
2316       inline_call (e, true, NULL, NULL, false);
2317       inlined = true;
2318     }
2319   if (inlined)
2320     inline_update_overall_summary (node);
2321
2322   return inlined;
2323 }
2324
2325 /* Decide on the inlining.  We do so in the topological order to avoid
2326    expenses on updating data structures.  */
2327
2328 static bool
2329 early_inline_small_functions (struct cgraph_node *node)
2330 {
2331   struct cgraph_edge *e;
2332   bool inlined = false;
2333
2334   for (e = node->callees; e; e = e->next_callee)
2335     {
2336       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2337       if (!inline_summaries->get (callee)->inlinable
2338           || !e->inline_failed)
2339         continue;
2340
2341       /* Do not consider functions not declared inline.  */
2342       if (!DECL_DECLARED_INLINE_P (callee->decl)
2343           && !opt_for_fn (node->decl, flag_inline_small_functions)
2344           && !opt_for_fn (node->decl, flag_inline_functions))
2345         continue;
2346
2347       if (dump_file)
2348         fprintf (dump_file, "Considering inline candidate %s.\n",
2349                  callee->name ());
2350
2351       if (!can_early_inline_edge_p (e))
2352         continue;
2353
2354       if (e->recursive_p ())
2355         {
2356           if (dump_file)
2357             fprintf (dump_file, "  Not inlining: recursive call.\n");
2358           continue;
2359         }
2360
2361       if (!want_early_inline_function_p (e))
2362         continue;
2363
2364       if (dump_file)
2365         fprintf (dump_file, " Inlining %s into %s.\n",
2366                  xstrdup_for_dump (callee->name ()),
2367                  xstrdup_for_dump (e->caller->name ()));
2368       inline_call (e, true, NULL, NULL, true);
2369       inlined = true;
2370     }
2371
2372   return inlined;
2373 }
2374
2375 unsigned int
2376 early_inliner (function *fun)
2377 {
2378   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2379   struct cgraph_edge *edge;
2380   unsigned int todo = 0;
2381   int iterations = 0;
2382   bool inlined = false;
2383
2384   if (seen_error ())
2385     return 0;
2386
2387   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2388      happens when some pass decides to construct new function and
2389      cgraph_add_new_function calls lowering passes and early optimization on
2390      it.  This may confuse ourself when early inliner decide to inline call to
2391      function clone, because function clones don't have parameter list in
2392      ipa-prop matching their signature.  */
2393   if (ipa_node_params_sum)
2394     return 0;
2395
2396 #ifdef ENABLE_CHECKING
2397   node->verify ();
2398 #endif
2399   node->remove_all_references ();
2400
2401   /* Even when not optimizing or not inlining inline always-inline
2402      functions.  */
2403   inlined = inline_always_inline_functions (node);
2404
2405   if (!optimize
2406       || flag_no_inline
2407       || !flag_early_inlining
2408       /* Never inline regular functions into always-inline functions
2409          during incremental inlining.  This sucks as functions calling
2410          always inline functions will get less optimized, but at the
2411          same time inlining of functions calling always inline
2412          function into an always inline function might introduce
2413          cycles of edges to be always inlined in the callgraph.
2414
2415          We might want to be smarter and just avoid this type of inlining.  */
2416       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2417     ;
2418   else if (lookup_attribute ("flatten",
2419                              DECL_ATTRIBUTES (node->decl)) != NULL)
2420     {
2421       /* When the function is marked to be flattened, recursively inline
2422          all calls in it.  */
2423       if (dump_file)
2424         fprintf (dump_file,
2425                  "Flattening %s\n", node->name ());
2426       flatten_function (node, true);
2427       inlined = true;
2428     }
2429   else
2430     {
2431       /* We iterate incremental inlining to get trivial cases of indirect
2432          inlining.  */
2433       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2434              && early_inline_small_functions (node))
2435         {
2436           timevar_push (TV_INTEGRATION);
2437           todo |= optimize_inline_calls (current_function_decl);
2438
2439           /* Technically we ought to recompute inline parameters so the new
2440              iteration of early inliner works as expected.  We however have
2441              values approximately right and thus we only need to update edge
2442              info that might be cleared out for newly discovered edges.  */
2443           for (edge = node->callees; edge; edge = edge->next_callee)
2444             {
2445               /* We have no summary for new bound store calls yet.  */
2446               if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2447                 {
2448                   struct inline_edge_summary *es = inline_edge_summary (edge);
2449                   es->call_stmt_size
2450                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2451                   es->call_stmt_time
2452                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2453                 }
2454               if (edge->callee->decl
2455                   && !gimple_check_call_matching_types (
2456                       edge->call_stmt, edge->callee->decl, false))
2457                 edge->call_stmt_cannot_inline_p = true;
2458             }
2459           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2460             inline_update_overall_summary (node);
2461           timevar_pop (TV_INTEGRATION);
2462           iterations++;
2463           inlined = false;
2464         }
2465       if (dump_file)
2466         fprintf (dump_file, "Iterations: %i\n", iterations);
2467     }
2468
2469   if (inlined)
2470     {
2471       timevar_push (TV_INTEGRATION);
2472       todo |= optimize_inline_calls (current_function_decl);
2473       timevar_pop (TV_INTEGRATION);
2474     }
2475
2476   fun->always_inline_functions_inlined = true;
2477
2478   return todo;
2479 }
2480
2481 /* Do inlining of small functions.  Doing so early helps profiling and other
2482    passes to be somewhat more effective and avoids some code duplication in
2483    later real inlining pass for testcases with very many function calls.  */
2484
2485 namespace {
2486
2487 const pass_data pass_data_early_inline =
2488 {
2489   GIMPLE_PASS, /* type */
2490   "einline", /* name */
2491   OPTGROUP_INLINE, /* optinfo_flags */
2492   TV_EARLY_INLINING, /* tv_id */
2493   PROP_ssa, /* properties_required */
2494   0, /* properties_provided */
2495   0, /* properties_destroyed */
2496   0, /* todo_flags_start */
2497   0, /* todo_flags_finish */
2498 };
2499
2500 class pass_early_inline : public gimple_opt_pass
2501 {
2502 public:
2503   pass_early_inline (gcc::context *ctxt)
2504     : gimple_opt_pass (pass_data_early_inline, ctxt)
2505   {}
2506
2507   /* opt_pass methods: */
2508   virtual unsigned int execute (function *);
2509
2510 }; // class pass_early_inline
2511
2512 unsigned int
2513 pass_early_inline::execute (function *fun)
2514 {
2515   return early_inliner (fun);
2516 }
2517
2518 } // anon namespace
2519
2520 gimple_opt_pass *
2521 make_pass_early_inline (gcc::context *ctxt)
2522 {
2523   return new pass_early_inline (ctxt);
2524 }
2525
2526 namespace {
2527
2528 const pass_data pass_data_ipa_inline =
2529 {
2530   IPA_PASS, /* type */
2531   "inline", /* name */
2532   OPTGROUP_INLINE, /* optinfo_flags */
2533   TV_IPA_INLINING, /* tv_id */
2534   0, /* properties_required */
2535   0, /* properties_provided */
2536   0, /* properties_destroyed */
2537   0, /* todo_flags_start */
2538   ( TODO_dump_symtab ), /* todo_flags_finish */
2539 };
2540
2541 class pass_ipa_inline : public ipa_opt_pass_d
2542 {
2543 public:
2544   pass_ipa_inline (gcc::context *ctxt)
2545     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2546                       inline_generate_summary, /* generate_summary */
2547                       inline_write_summary, /* write_summary */
2548                       inline_read_summary, /* read_summary */
2549                       NULL, /* write_optimization_summary */
2550                       NULL, /* read_optimization_summary */
2551                       NULL, /* stmt_fixup */
2552                       0, /* function_transform_todo_flags_start */
2553                       inline_transform, /* function_transform */
2554                       NULL) /* variable_transform */
2555   {}
2556
2557   /* opt_pass methods: */
2558   virtual unsigned int execute (function *) { return ipa_inline (); }
2559
2560 }; // class pass_ipa_inline
2561
2562 } // anon namespace
2563
2564 ipa_opt_pass_d *
2565 make_pass_ipa_inline (gcc::context *ctxt)
2566 {
2567   return new pass_ipa_inline (ctxt);
2568 }