gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "intl.h"
 106 #include "tree-pass.h"
 107 #include "coverage.h"
 108 #include "rtl.h"
 109 #include "bitmap.h"
 110 #include "profile.h"
 111 #include "predict.h"
 112 #include "vec.h"
 113 #include "hashtab.h"
 114 #include "hash-set.h"
 115 #include "machmode.h"
 116 #include "hard-reg-set.h"
 117 #include "input.h"
 118 #include "function.h"
 119 #include "basic-block.h"
 120 #include "tree-ssa-alias.h"
 121 #include "internal-fn.h"
 122 #include "gimple-expr.h"
 123 #include "is-a.h"
 124 #include "gimple.h"
 125 #include "gimple-ssa.h"
 126 #include "hash-map.h"
 127 #include "plugin-api.h"
 128 #include "ipa-ref.h"
 129 #include "cgraph.h"
 130 #include "alloc-pool.h"
 131 #include "symbol-summary.h"
 132 #include "ipa-prop.h"
 133 #include "except.h"
 134 #include "target.h"
 135 #include "ipa-inline.h"
 136 #include "ipa-utils.h"
 137 #include "sreal.h"
 138 #include "auto-profile.h"
 139 #include "cilk.h"
 140 #include "builtins.h"
 141 #include "fibonacci_heap.h"
 142
 143 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 144 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 145
 146 /* Statistics we collect about inlining algorithm.  */
 147 static int overall_size;
 148 static gcov_type max_count;
 149 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 150 static gcov_type spec_rem;
 151
 152 /* Return false when inlining edge E would lead to violating
 153    limits on function unit growth or stack usage growth.
 154
 155    The relative function body growth limit is present generally
 156    to avoid problems with non-linear behavior of the compiler.
 157    To allow inlining huge functions into tiny wrapper, the limit
 158    is always based on the bigger of the two functions considered.
 159
 160    For stack growth limits we always base the growth in stack usage
 161    of the callers.  We want to prevent applications from segfaulting
 162    on stack overflow when functions with huge stack frames gets
 163    inlined. */
 164
 165 static bool
 166 caller_growth_limits (struct cgraph_edge *e)
 167 {
 168   struct cgraph_node *to = e->caller;
 169   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 170   int newsize;
 171   int limit = 0;
 172   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 173   inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
 174
 175   /* Look for function e->caller is inlined to.  While doing
 176      so work out the largest function body on the way.  As
 177      described above, we want to base our function growth
 178      limits based on that.  Not on the self size of the
 179      outer function, not on the self size of inline code
 180      we immediately inline to.  This is the most relaxed
 181      interpretation of the rule "do not grow large functions
 182      too much in order to prevent compiler from exploding".  */
 183   while (true)
 184     {
 185       info = inline_summaries->get (to);
 186       if (limit < info->self_size)
 187         limit = info->self_size;
 188       if (stack_size_limit < info->estimated_self_stack_size)
 189         stack_size_limit = info->estimated_self_stack_size;
 190       if (to->global.inlined_to)
 191         to = to->callers->caller;
 192       else
 193         break;
 194     }
 195
 196   what_info = inline_summaries->get (what);
 197
 198   if (limit < what_info->self_size)
 199     limit = what_info->self_size;
 200
 201   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 202
 203   /* Check the size after inlining against the function limits.  But allow
 204      the function to shrink if it went over the limits by forced inlining.  */
 205   newsize = estimate_size_after_inlining (to, e);
 206   if (newsize >= info->size
 207       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 208       && newsize > limit)
 209     {
 210       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 211       return false;
 212     }
 213
 214   if (!what_info->estimated_stack_size)
 215     return true;
 216
 217   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 218      due to large i/o datastructures used by the Fortran front-end.
 219      We ought to ignore this limit when we know that the edge is executed
 220      on every invocation of the caller (i.e. its call statement dominates
 221      exit block).  We do not track this information, yet.  */
 222   stack_size_limit += ((gcov_type)stack_size_limit
 223                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 224
 225   inlined_stack = (outer_info->stack_frame_offset
 226                    + outer_info->estimated_self_stack_size
 227                    + what_info->estimated_stack_size);
 228   /* Check new stack consumption with stack consumption at the place
 229      stack is used.  */
 230   if (inlined_stack > stack_size_limit
 231       /* If function already has large stack usage from sibling
 232          inline call, we can inline, too.
 233          This bit overoptimistically assume that we are good at stack
 234          packing.  */
 235       && inlined_stack > info->estimated_stack_size
 236       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 237     {
 238       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 239       return false;
 240     }
 241   return true;
 242 }
 243
 244 /* Dump info about why inlining has failed.  */
 245
 246 static void
 247 report_inline_failed_reason (struct cgraph_edge *e)
 248 {
 249   if (dump_file)
 250     {
 251       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 252                xstrdup_for_dump (e->caller->name ()), e->caller->order,
 253                xstrdup_for_dump (e->callee->name ()), e->callee->order,
 254                cgraph_inline_failed_string (e->inline_failed));
 255     }
 256 }
 257
 258  /* Decide whether sanitizer-related attributes allow inlining. */
 259
 260 static bool
 261 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 262 {
 263   /* Don't care if sanitizer is disabled */
 264   if (!(flag_sanitize & SANITIZE_ADDRESS))
 265     return true;
 266
 267   if (!caller || !callee)
 268     return true;
 269
 270   return !!lookup_attribute ("no_sanitize_address",
 271       DECL_ATTRIBUTES (caller)) ==
 272       !!lookup_attribute ("no_sanitize_address",
 273       DECL_ATTRIBUTES (callee));
 274 }
 275
 276  /* Decide if we can inline the edge and possibly update
 277    inline_failed reason.
 278    We check whether inlining is possible at all and whether
 279    caller growth limits allow doing so.
 280
 281    if REPORT is true, output reason to the dump file.
 282
 283    if DISREGARD_LIMITS is true, ignore size limits.*/
 284
 285 static bool
 286 can_inline_edge_p (struct cgraph_edge *e, bool report,
 287                    bool disregard_limits = false)
 288 {
 289   bool inlinable = true;
 290   enum availability avail;
 291   cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
 292   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 293   tree callee_tree
 294     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 295   struct function *caller_fun = e->caller->get_fun ();
 296   struct function *callee_fun = callee ? callee->get_fun () : NULL;
 297
 298   gcc_assert (e->inline_failed);
 299
 300   if (!callee || !callee->definition)
 301     {
 302       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 303       inlinable = false;
 304     }
 305   else if (callee->calls_comdat_local)
 306     {
 307       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 308       inlinable = false;
 309     }
 310   else if (!inline_summaries->get (callee)->inlinable
 311            || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
 312     {
 313       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 314       inlinable = false;
 315     }
 316   else if (avail <= AVAIL_INTERPOSABLE)
 317     {
 318       e->inline_failed = CIF_OVERWRITABLE;
 319       inlinable = false;
 320     }
 321   else if (e->call_stmt_cannot_inline_p)
 322     {
 323       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 324         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 325       inlinable = false;
 326     }
 327   /* Don't inline if the functions have different EH personalities.  */
 328   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 329            && DECL_FUNCTION_PERSONALITY (callee->decl)
 330            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 331                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 332     {
 333       e->inline_failed = CIF_EH_PERSONALITY;
 334       inlinable = false;
 335     }
 336   /* TM pure functions should not be inlined into non-TM_pure
 337      functions.  */
 338   else if (is_tm_pure (callee->decl)
 339            && !is_tm_pure (e->caller->decl))
 340     {
 341       e->inline_failed = CIF_UNSPECIFIED;
 342       inlinable = false;
 343     }
 344   /* Don't inline if the callee can throw non-call exceptions but the
 345      caller cannot.
 346      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 347      Move the flag into cgraph node or mirror it in the inline summary.  */
 348   else if (callee_fun && callee_fun->can_throw_non_call_exceptions
 349            && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
 350     {
 351       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 352       inlinable = false;
 353     }
 354   /* Check compatibility of target optimization options.  */
 355   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 356                                                 callee->decl))
 357     {
 358       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 359       inlinable = false;
 360     }
 361   /* Don't inline a function with mismatched sanitization attributes. */
 362   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 363     {
 364       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 365       inlinable = false;
 366     }
 367   /* Check if caller growth allows the inlining.  */
 368   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 369            && !disregard_limits
 370            && !lookup_attribute ("flatten",
 371                                  DECL_ATTRIBUTES
 372                                    (e->caller->global.inlined_to
 373                                     ? e->caller->global.inlined_to->decl
 374                                     : e->caller->decl))
 375            && !caller_growth_limits (e))
 376     inlinable = false;
 377   /* Don't inline a function with a higher optimization level than the
 378      caller.  FIXME: this is really just tip of iceberg of handling
 379      optimization attribute.  */
 380   else if (caller_tree != callee_tree)
 381     {
 382       if (((opt_for_fn (e->caller->decl, optimize)
 383             > opt_for_fn (e->callee->decl, optimize))
 384             || (opt_for_fn (e->caller->decl, optimize_size)
 385                 != opt_for_fn (e->callee->decl, optimize_size)))
 386           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 387           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 388         {
 389           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 390           inlinable = false;
 391         }
 392     }
 393
 394   if (!inlinable && report)
 395     report_inline_failed_reason (e);
 396   return inlinable;
 397 }
 398
 399
 400 /* Return true if the edge E is inlinable during early inlining.  */
 401
 402 static bool
 403 can_early_inline_edge_p (struct cgraph_edge *e)
 404 {
 405   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 406   /* Early inliner might get called at WPA stage when IPA pass adds new
 407      function.  In this case we can not really do any of early inlining
 408      because function bodies are missing.  */
 409   if (!gimple_has_body_p (callee->decl))
 410     {
 411       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 412       return false;
 413     }
 414   /* In early inliner some of callees may not be in SSA form yet
 415      (i.e. the callgraph is cyclic and we did not process
 416      the callee by early inliner, yet).  We don't have CIF code for this
 417      case; later we will re-do the decision in the real inliner.  */
 418   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 419       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 420     {
 421       if (dump_file)
 422         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 423       return false;
 424     }
 425   if (!can_inline_edge_p (e, true))
 426     return false;
 427   return true;
 428 }
 429
 430
 431 /* Return number of calls in N.  Ignore cheap builtins.  */
 432
 433 static int
 434 num_calls (struct cgraph_node *n)
 435 {
 436   struct cgraph_edge *e;
 437   int num = 0;
 438
 439   for (e = n->callees; e; e = e->next_callee)
 440     if (!is_inexpensive_builtin (e->callee->decl))
 441       num++;
 442   return num;
 443 }
 444
 445
 446 /* Return true if we are interested in inlining small function.  */
 447
 448 static bool
 449 want_early_inline_function_p (struct cgraph_edge *e)
 450 {
 451   bool want_inline = true;
 452   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 453
 454   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 455     ;
 456   /* For AutoFDO, we need to make sure that before profile summary, all
 457      hot paths' IR look exactly the same as profiled binary. As a result,
 458      in einliner, we will disregard size limit and inline those callsites
 459      that are:
 460        * inlined in the profiled binary, and
 461        * the cloned callee has enough samples to be considered "hot".  */
 462   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 463     ;
 464   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 465            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 466     {
 467       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 468       report_inline_failed_reason (e);
 469       want_inline = false;
 470     }
 471   else
 472     {
 473       int growth = estimate_edge_growth (e);
 474       int n;
 475
 476       if (growth <= 0)
 477         ;
 478       else if (!e->maybe_hot_p ()
 479                && growth > 0)
 480         {
 481           if (dump_file)
 482             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 483                      "call is cold and code would grow by %i\n",
 484                      xstrdup_for_dump (e->caller->name ()),
 485                      e->caller->order,
 486                      xstrdup_for_dump (callee->name ()), callee->order,
 487                      growth);
 488           want_inline = false;
 489         }
 490       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 491         {
 492           if (dump_file)
 493             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 494                      "growth %i exceeds --param early-inlining-insns\n",
 495                      xstrdup_for_dump (e->caller->name ()),
 496                      e->caller->order,
 497                      xstrdup_for_dump (callee->name ()), callee->order,
 498                      growth);
 499           want_inline = false;
 500         }
 501       else if ((n = num_calls (callee)) != 0
 502                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 503         {
 504           if (dump_file)
 505             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 506                      "growth %i exceeds --param early-inlining-insns "
 507                      "divided by number of calls\n",
 508                      xstrdup_for_dump (e->caller->name ()),
 509                      e->caller->order,
 510                      xstrdup_for_dump (callee->name ()), callee->order,
 511                      growth);
 512           want_inline = false;
 513         }
 514     }
 515   return want_inline;
 516 }
 517
 518 /* Compute time of the edge->caller + edge->callee execution when inlining
 519    does not happen.  */
 520
 521 inline gcov_type
 522 compute_uninlined_call_time (struct inline_summary *callee_info,
 523                              struct cgraph_edge *edge)
 524 {
 525   gcov_type uninlined_call_time =
 526     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 527           CGRAPH_FREQ_BASE);
 528   gcov_type caller_time = inline_summaries->get (edge->caller->global.inlined_to
 529                                           ? edge->caller->global.inlined_to
 530                                           : edge->caller)->time;
 531   return uninlined_call_time + caller_time;
 532 }
 533
 534 /* Same as compute_uinlined_call_time but compute time when inlining
 535    does happen.  */
 536
 537 inline gcov_type
 538 compute_inlined_call_time (struct cgraph_edge *edge,
 539                            int edge_time)
 540 {
 541   gcov_type caller_time = inline_summaries->get (edge->caller->global.inlined_to
 542                                           ? edge->caller->global.inlined_to
 543                                           : edge->caller)->time;
 544   gcov_type time = (caller_time
 545                     + RDIV (((gcov_type) edge_time
 546                              - inline_edge_summary (edge)->call_stmt_time)
 547                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 548   /* Possible one roundoff error, but watch for overflows.  */
 549   gcc_checking_assert (time >= INT_MIN / 2);
 550   if (time < 0)
 551     time = 0;
 552   return time;
 553 }
 554
 555 /* Return true if the speedup for inlining E is bigger than
 556    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 557
 558 static bool
 559 big_speedup_p (struct cgraph_edge *e)
 560 {
 561   gcov_type time = compute_uninlined_call_time (inline_summaries->get (e->callee),
 562                                                 e);
 563   gcov_type inlined_time = compute_inlined_call_time (e,
 564                                                       estimate_edge_time (e));
 565   if (time - inlined_time
 566       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 567     return true;
 568   return false;
 569 }
 570
 571 /* Return true if we are interested in inlining small function.
 572    When REPORT is true, report reason to dump file.  */
 573
 574 static bool
 575 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 576 {
 577   bool want_inline = true;
 578   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 579
 580   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 581     ;
 582   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 583            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 584     {
 585       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 586       want_inline = false;
 587     }
 588   /* Do fast and conservative check if the function can be good
 589      inline candidate.  At the moment we allow inline hints to
 590      promote non-inline functions to inline and we increase
 591      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 592   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 593            && (!e->count || !e->maybe_hot_p ()))
 594            && inline_summaries->get (callee)->min_size
 595                 - inline_edge_summary (e)->call_stmt_size
 596               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 597     {
 598       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 599       want_inline = false;
 600     }
 601   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 602            && inline_summaries->get (callee)->min_size
 603                 - inline_edge_summary (e)->call_stmt_size
 604               > 16 * MAX_INLINE_INSNS_SINGLE)
 605     {
 606       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 607                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 608                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 609       want_inline = false;
 610     }
 611   else
 612     {
 613       int growth = estimate_edge_growth (e);
 614       inline_hints hints = estimate_edge_hints (e);
 615       bool big_speedup = big_speedup_p (e);
 616
 617       if (growth <= 0)
 618         ;
 619       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 620          hints suggests that inlining given function is very profitable.  */
 621       else if (DECL_DECLARED_INLINE_P (callee->decl)
 622                && growth >= MAX_INLINE_INSNS_SINGLE
 623                && ((!big_speedup
 624                     && !(hints & (INLINE_HINT_indirect_call
 625                                   | INLINE_HINT_known_hot
 626                                   | INLINE_HINT_loop_iterations
 627                                   | INLINE_HINT_array_index
 628                                   | INLINE_HINT_loop_stride)))
 629                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 630         {
 631           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 632           want_inline = false;
 633         }
 634       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 635                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 636         {
 637           /* growth_likely_positive is expensive, always test it last.  */
 638           if (growth >= MAX_INLINE_INSNS_SINGLE
 639               || growth_likely_positive (callee, growth))
 640             {
 641               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 642               want_inline = false;
 643             }
 644         }
 645       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 646          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 647          inlining given function is very profitable.  */
 648       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 649                && !big_speedup
 650                && !(hints & INLINE_HINT_known_hot)
 651                && growth >= ((hints & (INLINE_HINT_indirect_call
 652                                        | INLINE_HINT_loop_iterations
 653                                        | INLINE_HINT_array_index
 654                                        | INLINE_HINT_loop_stride))
 655                              ? MAX (MAX_INLINE_INSNS_AUTO,
 656                                     MAX_INLINE_INSNS_SINGLE)
 657                              : MAX_INLINE_INSNS_AUTO))
 658         {
 659           /* growth_likely_positive is expensive, always test it last.  */
 660           if (growth >= MAX_INLINE_INSNS_SINGLE
 661               || growth_likely_positive (callee, growth))
 662             {
 663               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 664               want_inline = false;
 665             }
 666         }
 667       /* If call is cold, do not inline when function body would grow. */
 668       else if (!e->maybe_hot_p ()
 669                && (growth >= MAX_INLINE_INSNS_SINGLE
 670                    || growth_likely_positive (callee, growth)))
 671         {
 672           e->inline_failed = CIF_UNLIKELY_CALL;
 673           want_inline = false;
 674         }
 675     }
 676   if (!want_inline && report)
 677     report_inline_failed_reason (e);
 678   return want_inline;
 679 }
 680
 681 /* EDGE is self recursive edge.
 682    We hand two cases - when function A is inlining into itself
 683    or when function A is being inlined into another inliner copy of function
 684    A within function B.
 685
 686    In first case OUTER_NODE points to the toplevel copy of A, while
 687    in the second case OUTER_NODE points to the outermost copy of A in B.
 688
 689    In both cases we want to be extra selective since
 690    inlining the call will just introduce new recursive calls to appear.  */
 691
 692 static bool
 693 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 694                                    struct cgraph_node *outer_node,
 695                                    bool peeling,
 696                                    int depth)
 697 {
 698   char const *reason = NULL;
 699   bool want_inline = true;
 700   int caller_freq = CGRAPH_FREQ_BASE;
 701   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 702
 703   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 704     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 705
 706   if (!edge->maybe_hot_p ())
 707     {
 708       reason = "recursive call is cold";
 709       want_inline = false;
 710     }
 711   else if (max_count && !outer_node->count)
 712     {
 713       reason = "not executed in profile";
 714       want_inline = false;
 715     }
 716   else if (depth > max_depth)
 717     {
 718       reason = "--param max-inline-recursive-depth exceeded.";
 719       want_inline = false;
 720     }
 721
 722   if (outer_node->global.inlined_to)
 723     caller_freq = outer_node->callers->frequency;
 724
 725   if (!caller_freq)
 726     {
 727       reason = "function is inlined and unlikely";
 728       want_inline = false;
 729     }
 730
 731   if (!want_inline)
 732     ;
 733   /* Inlining of self recursive function into copy of itself within other function
 734      is transformation similar to loop peeling.
 735
 736      Peeling is profitable if we can inline enough copies to make probability
 737      of actual call to the self recursive function very small.  Be sure that
 738      the probability of recursion is small.
 739
 740      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 741      This way the expected number of recision is at most max_depth.  */
 742   else if (peeling)
 743     {
 744       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 745                                          / max_depth);
 746       int i;
 747       for (i = 1; i < depth; i++)
 748         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 749       if (max_count
 750           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 751               >= max_prob))
 752         {
 753           reason = "profile of recursive call is too large";
 754           want_inline = false;
 755         }
 756       if (!max_count
 757           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 758               >= max_prob))
 759         {
 760           reason = "frequency of recursive call is too large";
 761           want_inline = false;
 762         }
 763     }
 764   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 765      depth is large.  We reduce function call overhead and increase chances that
 766      things fit in hardware return predictor.
 767
 768      Recursive inlining might however increase cost of stack frame setup
 769      actually slowing down functions whose recursion tree is wide rather than
 770      deep.
 771
 772      Deciding reliably on when to do recursive inlining without profile feedback
 773      is tricky.  For now we disable recursive inlining when probability of self
 774      recursion is low.
 775
 776      Recursive inlining of self recursive call within loop also results in large loop
 777      depths that generally optimize badly.  We may want to throttle down inlining
 778      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 779      methods.  */
 780   else
 781     {
 782       if (max_count
 783           && (edge->count * 100 / outer_node->count
 784               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 785         {
 786           reason = "profile of recursive call is too small";
 787           want_inline = false;
 788         }
 789       else if (!max_count
 790                && (edge->frequency * 100 / caller_freq
 791                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 792         {
 793           reason = "frequency of recursive call is too small";
 794           want_inline = false;
 795         }
 796     }
 797   if (!want_inline && dump_file)
 798     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 799   return want_inline;
 800 }
 801
 802 /* Return true when NODE has uninlinable caller;
 803    set HAS_HOT_CALL if it has hot call.
 804    Worker for cgraph_for_node_and_aliases.  */
 805
 806 static bool
 807 check_callers (struct cgraph_node *node, void *has_hot_call)
 808 {
 809   struct cgraph_edge *e;
 810    for (e = node->callers; e; e = e->next_caller)
 811      {
 812        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
 813          return true;
 814        if (!can_inline_edge_p (e, true))
 815          return true;
 816        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 817          *(bool *)has_hot_call = true;
 818      }
 819   return false;
 820 }
 821
 822 /* If NODE has a caller, return true.  */
 823
 824 static bool
 825 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 826 {
 827   if (node->callers)
 828     return true;
 829   return false;
 830 }
 831
 832 /* Decide if inlining NODE would reduce unit size by eliminating
 833    the offline copy of function.
 834    When COLD is true the cold calls are considered, too.  */
 835
 836 static bool
 837 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 838 {
 839   bool has_hot_call = false;
 840
 841   if (node->ultimate_alias_target () != node)
 842     return false;
 843   /* Already inlined?  */
 844   if (node->global.inlined_to)
 845     return false;
 846   /* Does it have callers?  */
 847   if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
 848     return false;
 849   /* Inlining into all callers would increase size?  */
 850   if (estimate_growth (node) > 0)
 851     return false;
 852   /* All inlines must be possible.  */
 853   if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call,
 854                                                 true))
 855     return false;
 856   if (!cold && !has_hot_call)
 857     return false;
 858   return true;
 859 }
 860
 861 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 862
 863 /* Return relative time improvement for inlining EDGE in range
 864    1...RELATIVE_TIME_BENEFIT_RANGE  */
 865
 866 static inline int
 867 relative_time_benefit (struct inline_summary *callee_info,
 868                        struct cgraph_edge *edge,
 869                        int edge_time)
 870 {
 871   gcov_type relbenefit;
 872   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 873   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 874
 875   /* Inlining into extern inline function is not a win.  */
 876   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 877                      ? edge->caller->global.inlined_to->decl
 878                      : edge->caller->decl))
 879     return 1;
 880
 881   /* Watch overflows.  */
 882   gcc_checking_assert (uninlined_call_time >= 0);
 883   gcc_checking_assert (inlined_call_time >= 0);
 884   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 885
 886   /* Compute relative time benefit, i.e. how much the call becomes faster.
 887      ??? perhaps computing how much the caller+calle together become faster
 888      would lead to more realistic results.  */
 889   if (!uninlined_call_time)
 890     uninlined_call_time = 1;
 891   relbenefit =
 892     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 893           uninlined_call_time);
 894   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 895   gcc_checking_assert (relbenefit >= 0);
 896   relbenefit = MAX (relbenefit, 1);
 897   return relbenefit;
 898 }
 899
 900
 901 /* A cost model driving the inlining heuristics in a way so the edges with
 902    smallest badness are inlined first.  After each inlining is performed
 903    the costs of all caller edges of nodes affected are recomputed so the
 904    metrics may accurately depend on values such as number of inlinable callers
 905    of the function or function body size.  */
 906
 907 static sreal
 908 edge_badness (struct cgraph_edge *edge, bool dump)
 909 {
 910   sreal badness;
 911   int growth, edge_time;
 912   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 913   struct inline_summary *callee_info = inline_summaries->get (callee);
 914   inline_hints hints;
 915
 916   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 917     return INT_MIN;
 918
 919   growth = estimate_edge_growth (edge);
 920   edge_time = estimate_edge_time (edge);
 921   hints = estimate_edge_hints (edge);
 922   gcc_checking_assert (edge_time >= 0);
 923   gcc_checking_assert (edge_time <= callee_info->time);
 924   gcc_checking_assert (growth <= callee_info->size);
 925
 926   if (dump)
 927     {
 928       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 929                xstrdup_for_dump (edge->caller->name ()),
 930                edge->caller->order,
 931                xstrdup_for_dump (callee->name ()),
 932                edge->callee->order);
 933       fprintf (dump_file, "      size growth %i, time %i ",
 934                growth,
 935                edge_time);
 936       dump_inline_hints (dump_file, hints);
 937       if (big_speedup_p (edge))
 938         fprintf (dump_file, " big_speedup");
 939       fprintf (dump_file, "\n");
 940     }
 941
 942   /* Always prefer inlining saving code size.  */
 943   if (growth <= 0)
 944     {
 945       badness = INT_MIN / 2 + growth;
 946       if (dump)
 947         fprintf (dump_file, "      %"PRId64": Growth %d <= 0\n", badness.to_int (),
 948                  growth);
 949     }
 950
 951   /* When profiling is available, compute badness as:
 952
 953                 relative_edge_count * relative_time_benefit
 954      goodness = -------------------------------------------
 955                 growth_f_caller
 956      badness = -goodness
 957
 958     The fraction is upside down, because on edge counts and time beneits
 959     the bounds are known. Edge growth is essentially unlimited.  */
 960
 961   else if (max_count)
 962     {
 963       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 964       /* Capping edge->count to max_count. edge->count can be larger than
 965          max_count if an inline adds new edges which increase max_count
 966          after max_count is computed.  */
 967       gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
 968
 969       sreal relbenefit_real (relbenefit, 0);
 970       sreal growth_real (growth, 0);
 971
 972       /* relative_edge_count.  */
 973       sreal tmp (edge_count, 0);
 974       tmp /= max_count_real;
 975
 976       /* relative_time_benefit.  */
 977       tmp *= relbenefit_real;
 978       tmp /= max_relbenefit_real;
 979
 980       /* growth_f_caller.  */
 981       tmp *= half_int_min_real;
 982       tmp /=  growth_real;
 983
 984       badness = -1 * tmp.to_int ();
 985
 986       if (dump)
 987         {
 988           fprintf (dump_file,
 989                    "      %"PRId64" (relative %f): profile info. Relative count %f%s"
 990                    " * Relative benefit %f\n",
 991                    badness.to_int (), (double) badness.to_int () / INT_MIN,
 992                    (double) edge_count / max_count,
 993                    edge->count > max_count ? " (capped to max_count)" : "",
 994                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 995         }
 996     }
 997
 998   /* When function local profile is available. Compute badness as:
 999
1000                  relative_time_benefit
1001      goodness =  ---------------------------------
1002                  growth_of_caller * overall_growth
1003
1004      badness = - goodness
1005
1006      compensated by the inline hints.
1007   */
1008   /* TODO: We ought suport mixing units where some functions are profiled
1009      and some not.  */
1010   else if (flag_guess_branch_prob)
1011     {
1012       badness = (relative_time_benefit (callee_info, edge, edge_time)
1013                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
1014       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
1015       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
1016       if ((hints & (INLINE_HINT_indirect_call
1017                     | INLINE_HINT_loop_iterations
1018                     | INLINE_HINT_array_index
1019                     | INLINE_HINT_loop_stride))
1020           || callee_info->growth <= 0)
1021         badness *= 8;
1022       if (hints & (INLINE_HINT_same_scc))
1023         badness /= 16;
1024       else if (hints & (INLINE_HINT_in_scc))
1025         badness /= 8;
1026       else if (hints & (INLINE_HINT_cross_module))
1027         badness /= 2;
1028       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1029       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1030         badness *= 16;
1031       if (dump)
1032         {
1033           fprintf (dump_file,
1034                    "      %"PRId64": guessed profile. frequency %f,"
1035                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
1036                    " overall growth %i (current) %i (original)\n",
1037                    badness.to_int (), (double)edge->frequency / CGRAPH_FREQ_BASE,
1038                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
1039                    / RELATIVE_TIME_BENEFIT_RANGE,
1040                    (int)compute_uninlined_call_time (callee_info, edge),
1041                    (int)compute_inlined_call_time (edge, edge_time),
1042                    estimate_growth (callee),
1043                    callee_info->growth);
1044         }
1045     }
1046   /* When function local profile is not available or it does not give
1047      useful information (ie frequency is zero), base the cost on
1048      loop nest and overall size growth, so we optimize for overall number
1049      of functions fully inlined in program.  */
1050   else
1051     {
1052       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1053       badness = growth * 256;
1054
1055       /* Decrease badness if call is nested.  */
1056       if (badness > 0)
1057         badness = badness >> nest;
1058       else
1059         {
1060           badness = badness << nest;
1061         }
1062       if (dump)
1063         fprintf (dump_file, "      %"PRId64": no profile. nest %i\n", badness.to_int (),
1064                  nest);
1065     }
1066
1067   /* Ensure that we did not overflow in all the fixed point math above.  */
1068   gcc_assert (badness >= INT_MIN);
1069   gcc_assert (badness <= INT_MAX - 1);
1070   /* Make recursive inlining happen always after other inlining is done.  */
1071   if (edge->recursive_p ())
1072     return badness + 1;
1073   else
1074     return badness;
1075 }
1076
1077 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1078 static inline void
1079 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1080 {
1081   sreal badness = edge_badness (edge, false);
1082   if (edge->aux)
1083     {
1084       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1085       gcc_checking_assert (n->get_data () == edge);
1086
1087       /* fibonacci_heap::replace_key only decrease the keys.
1088          When we increase the key we do not update heap
1089          and instead re-insert the element once it becomes
1090          a minimum of heap.  */
1091       if (badness < n->get_key ())
1092         {
1093           if (dump_file && (dump_flags & TDF_DETAILS))
1094             {
1095               fprintf (dump_file,
1096                        "  decreasing badness %s/%i -> %s/%i, %"PRId64
1097                        " to %"PRId64"\n",
1098                        xstrdup_for_dump (edge->caller->name ()),
1099                        edge->caller->order,
1100                        xstrdup_for_dump (edge->callee->name ()),
1101                        edge->callee->order,
1102                        n->get_key ().to_int (),
1103                        badness.to_int ());
1104             }
1105           heap->decrease_key (n, badness);
1106           gcc_checking_assert (n->get_key () == badness);
1107         }
1108     }
1109   else
1110     {
1111        if (dump_file && (dump_flags & TDF_DETAILS))
1112          {
1113            fprintf (dump_file,
1114                     "  enqueuing call %s/%i -> %s/%i, badness %"PRId64"\n",
1115                     xstrdup_for_dump (edge->caller->name ()),
1116                     edge->caller->order,
1117                     xstrdup_for_dump (edge->callee->name ()),
1118                     edge->callee->order,
1119                     badness.to_int ());
1120          }
1121       edge->aux = heap->insert (badness, edge);
1122     }
1123 }
1124
1125
1126 /* NODE was inlined.
1127    All caller edges needs to be resetted because
1128    size estimates change. Similarly callees needs reset
1129    because better context may be known.  */
1130
1131 static void
1132 reset_edge_caches (struct cgraph_node *node)
1133 {
1134   struct cgraph_edge *edge;
1135   struct cgraph_edge *e = node->callees;
1136   struct cgraph_node *where = node;
1137   struct ipa_ref *ref;
1138
1139   if (where->global.inlined_to)
1140     where = where->global.inlined_to;
1141
1142   /* WHERE body size has changed, the cached growth is invalid.  */
1143   reset_node_growth_cache (where);
1144
1145   for (edge = where->callers; edge; edge = edge->next_caller)
1146     if (edge->inline_failed)
1147       reset_edge_growth_cache (edge);
1148
1149   FOR_EACH_ALIAS (where, ref)
1150     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1151
1152   if (!e)
1153     return;
1154
1155   while (true)
1156     if (!e->inline_failed && e->callee->callees)
1157       e = e->callee->callees;
1158     else
1159       {
1160         if (e->inline_failed)
1161           reset_edge_growth_cache (e);
1162         if (e->next_callee)
1163           e = e->next_callee;
1164         else
1165           {
1166             do
1167               {
1168                 if (e->caller == node)
1169                   return;
1170                 e = e->caller->callers;
1171               }
1172             while (!e->next_callee);
1173             e = e->next_callee;
1174           }
1175       }
1176 }
1177
1178 /* Recompute HEAP nodes for each of caller of NODE.
1179    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1180    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1181    it is inlinable. Otherwise check all edges.  */
1182
1183 static void
1184 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1185                     bitmap updated_nodes,
1186                     struct cgraph_edge *check_inlinablity_for)
1187 {
1188   struct cgraph_edge *edge;
1189   struct ipa_ref *ref;
1190
1191   if ((!node->alias && !inline_summaries->get (node)->inlinable)
1192       || node->global.inlined_to)
1193     return;
1194   if (!bitmap_set_bit (updated_nodes, node->uid))
1195     return;
1196
1197   FOR_EACH_ALIAS (node, ref)
1198     {
1199       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1200       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1201     }
1202
1203   for (edge = node->callers; edge; edge = edge->next_caller)
1204     if (edge->inline_failed)
1205       {
1206         if (!check_inlinablity_for
1207             || check_inlinablity_for == edge)
1208           {
1209             if (can_inline_edge_p (edge, false)
1210                 && want_inline_small_function_p (edge, false))
1211               update_edge_key (heap, edge);
1212             else if (edge->aux)
1213               {
1214                 report_inline_failed_reason (edge);
1215                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1216                 edge->aux = NULL;
1217               }
1218           }
1219         else if (edge->aux)
1220           update_edge_key (heap, edge);
1221       }
1222 }
1223
1224 /* Recompute HEAP nodes for each uninlined call in NODE.
1225    This is used when we know that edge badnesses are going only to increase
1226    (we introduced new call site) and thus all we need is to insert newly
1227    created edges into heap.  */
1228
1229 static void
1230 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1231                     bitmap updated_nodes)
1232 {
1233   struct cgraph_edge *e = node->callees;
1234
1235   if (!e)
1236     return;
1237   while (true)
1238     if (!e->inline_failed && e->callee->callees)
1239       e = e->callee->callees;
1240     else
1241       {
1242         enum availability avail;
1243         struct cgraph_node *callee;
1244         /* We do not reset callee growth cache here.  Since we added a new call,
1245            growth chould have just increased and consequentely badness metric
1246            don't need updating.  */
1247         if (e->inline_failed
1248             && (callee = e->callee->ultimate_alias_target (&avail))
1249             && inline_summaries->get (callee)->inlinable
1250             && avail >= AVAIL_AVAILABLE
1251             && !bitmap_bit_p (updated_nodes, callee->uid))
1252           {
1253             if (can_inline_edge_p (e, false)
1254                 && want_inline_small_function_p (e, false))
1255               update_edge_key (heap, e);
1256             else if (e->aux)
1257               {
1258                 report_inline_failed_reason (e);
1259                 heap->delete_node ((edge_heap_node_t *) e->aux);
1260                 e->aux = NULL;
1261               }
1262           }
1263         if (e->next_callee)
1264           e = e->next_callee;
1265         else
1266           {
1267             do
1268               {
1269                 if (e->caller == node)
1270                   return;
1271                 e = e->caller->callers;
1272               }
1273             while (!e->next_callee);
1274             e = e->next_callee;
1275           }
1276       }
1277 }
1278
1279 /* Enqueue all recursive calls from NODE into priority queue depending on
1280    how likely we want to recursively inline the call.  */
1281
1282 static void
1283 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1284                         edge_heap_t *heap)
1285 {
1286   struct cgraph_edge *e;
1287   enum availability avail;
1288
1289   for (e = where->callees; e; e = e->next_callee)
1290     if (e->callee == node
1291         || (e->callee->ultimate_alias_target (&avail) == node
1292             && avail > AVAIL_INTERPOSABLE))
1293       {
1294         /* When profile feedback is available, prioritize by expected number
1295            of calls.  */
1296         heap->insert (!max_count ? -e->frequency
1297                       : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1298                       e);
1299       }
1300   for (e = where->callees; e; e = e->next_callee)
1301     if (!e->inline_failed)
1302       lookup_recursive_calls (node, e->callee, heap);
1303 }
1304
1305 /* Decide on recursive inlining: in the case function has recursive calls,
1306    inline until body size reaches given argument.  If any new indirect edges
1307    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1308    is NULL.  */
1309
1310 static bool
1311 recursive_inlining (struct cgraph_edge *edge,
1312                     vec<cgraph_edge *> *new_edges)
1313 {
1314   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1315   edge_heap_t heap (sreal::min ());
1316   struct cgraph_node *node;
1317   struct cgraph_edge *e;
1318   struct cgraph_node *master_clone = NULL, *next;
1319   int depth = 0;
1320   int n = 0;
1321
1322   node = edge->caller;
1323   if (node->global.inlined_to)
1324     node = node->global.inlined_to;
1325
1326   if (DECL_DECLARED_INLINE_P (node->decl))
1327     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1328
1329   /* Make sure that function is small enough to be considered for inlining.  */
1330   if (estimate_size_after_inlining (node, edge)  >= limit)
1331     return false;
1332   lookup_recursive_calls (node, node, &heap);
1333   if (heap.empty ())
1334     return false;
1335
1336   if (dump_file)
1337     fprintf (dump_file,
1338              "  Performing recursive inlining on %s\n",
1339              node->name ());
1340
1341   /* Do the inlining and update list of recursive call during process.  */
1342   while (!heap.empty ())
1343     {
1344       struct cgraph_edge *curr = heap.extract_min ();
1345       struct cgraph_node *cnode, *dest = curr->callee;
1346
1347       if (!can_inline_edge_p (curr, true))
1348         continue;
1349
1350       /* MASTER_CLONE is produced in the case we already started modified
1351          the function. Be sure to redirect edge to the original body before
1352          estimating growths otherwise we will be seeing growths after inlining
1353          the already modified body.  */
1354       if (master_clone)
1355         {
1356           curr->redirect_callee (master_clone);
1357           reset_edge_growth_cache (curr);
1358         }
1359
1360       if (estimate_size_after_inlining (node, curr) > limit)
1361         {
1362           curr->redirect_callee (dest);
1363           reset_edge_growth_cache (curr);
1364           break;
1365         }
1366
1367       depth = 1;
1368       for (cnode = curr->caller;
1369            cnode->global.inlined_to; cnode = cnode->callers->caller)
1370         if (node->decl
1371             == curr->callee->ultimate_alias_target ()->decl)
1372           depth++;
1373
1374       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1375         {
1376           curr->redirect_callee (dest);
1377           reset_edge_growth_cache (curr);
1378           continue;
1379         }
1380
1381       if (dump_file)
1382         {
1383           fprintf (dump_file,
1384                    "   Inlining call of depth %i", depth);
1385           if (node->count)
1386             {
1387               fprintf (dump_file, " called approx. %.2f times per call",
1388                        (double)curr->count / node->count);
1389             }
1390           fprintf (dump_file, "\n");
1391         }
1392       if (!master_clone)
1393         {
1394           /* We need original clone to copy around.  */
1395           master_clone = node->create_clone (node->decl, node->count,
1396             CGRAPH_FREQ_BASE, false, vNULL,
1397             true, NULL, NULL);
1398           for (e = master_clone->callees; e; e = e->next_callee)
1399             if (!e->inline_failed)
1400               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1401           curr->redirect_callee (master_clone);
1402           reset_edge_growth_cache (curr);
1403         }
1404
1405       inline_call (curr, false, new_edges, &overall_size, true);
1406       lookup_recursive_calls (node, curr->callee, &heap);
1407       n++;
1408     }
1409
1410   if (!heap.empty () && dump_file)
1411     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1412
1413   if (!master_clone)
1414     return false;
1415
1416   if (dump_file)
1417     fprintf (dump_file,
1418              "\n   Inlined %i times, "
1419              "body grown from size %i to %i, time %i to %i\n", n,
1420              inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1421              inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1422
1423   /* Remove master clone we used for inlining.  We rely that clones inlined
1424      into master clone gets queued just before master clone so we don't
1425      need recursion.  */
1426   for (node = symtab->first_function (); node != master_clone;
1427        node = next)
1428     {
1429       next = symtab->next_function (node);
1430       if (node->global.inlined_to == master_clone)
1431         node->remove ();
1432     }
1433   master_clone->remove ();
1434   return true;
1435 }
1436
1437
1438 /* Given whole compilation unit estimate of INSNS, compute how large we can
1439    allow the unit to grow.  */
1440
1441 static int
1442 compute_max_insns (int insns)
1443 {
1444   int max_insns = insns;
1445   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1446     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1447
1448   return ((int64_t) max_insns
1449           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1450 }
1451
1452
1453 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1454
1455 static void
1456 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1457 {
1458   while (new_edges.length () > 0)
1459     {
1460       struct cgraph_edge *edge = new_edges.pop ();
1461
1462       gcc_assert (!edge->aux);
1463       if (edge->inline_failed
1464           && can_inline_edge_p (edge, true)
1465           && want_inline_small_function_p (edge, true))
1466         edge->aux = heap->insert (edge_badness (edge, false), edge);
1467     }
1468 }
1469
1470 /* Remove EDGE from the fibheap.  */
1471
1472 static void
1473 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1474 {
1475   if (e->callee)
1476     reset_node_growth_cache (e->callee);
1477   if (e->aux)
1478     {
1479       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1480       e->aux = NULL;
1481     }
1482 }
1483
1484 /* Return true if speculation of edge E seems useful.
1485    If ANTICIPATE_INLINING is true, be conservative and hope that E
1486    may get inlined.  */
1487
1488 bool
1489 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1490 {
1491   enum availability avail;
1492   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1493   struct cgraph_edge *direct, *indirect;
1494   struct ipa_ref *ref;
1495
1496   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1497
1498   if (!e->maybe_hot_p ())
1499     return false;
1500
1501   /* See if IP optimizations found something potentially useful about the
1502      function.  For now we look only for CONST/PURE flags.  Almost everything
1503      else we propagate is useless.  */
1504   if (avail >= AVAIL_AVAILABLE)
1505     {
1506       int ecf_flags = flags_from_decl_or_type (target->decl);
1507       if (ecf_flags & ECF_CONST)
1508         {
1509           e->speculative_call_info (direct, indirect, ref);
1510           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1511             return true;
1512         }
1513       else if (ecf_flags & ECF_PURE)
1514         {
1515           e->speculative_call_info (direct, indirect, ref);
1516           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1517             return true;
1518         }
1519     }
1520   /* If we did not managed to inline the function nor redirect
1521      to an ipa-cp clone (that are seen by having local flag set),
1522      it is probably pointless to inline it unless hardware is missing
1523      indirect call predictor.  */
1524   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1525     return false;
1526   /* For overwritable targets there is not much to do.  */
1527   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1528     return false;
1529   /* OK, speculation seems interesting.  */
1530   return true;
1531 }
1532
1533 /* We know that EDGE is not going to be inlined.
1534    See if we can remove speculation.  */
1535
1536 static void
1537 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1538 {
1539   if (edge->speculative && !speculation_useful_p (edge, false))
1540     {
1541       struct cgraph_node *node = edge->caller;
1542       struct cgraph_node *where = node->global.inlined_to
1543                                   ? node->global.inlined_to : node;
1544       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1545
1546       spec_rem += edge->count;
1547       edge->resolve_speculation ();
1548       reset_edge_caches (where);
1549       inline_update_overall_summary (where);
1550       update_caller_keys (edge_heap, where,
1551                           updated_nodes, NULL);
1552       update_callee_keys (edge_heap, where,
1553                           updated_nodes);
1554       BITMAP_FREE (updated_nodes);
1555     }
1556 }
1557
1558 /* We use greedy algorithm for inlining of small functions:
1559    All inline candidates are put into prioritized heap ordered in
1560    increasing badness.
1561
1562    The inlining of small functions is bounded by unit growth parameters.  */
1563
1564 static void
1565 inline_small_functions (void)
1566 {
1567   struct cgraph_node *node;
1568   struct cgraph_edge *edge;
1569   edge_heap_t edge_heap (sreal::min ());
1570   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1571   int min_size, max_size;
1572   auto_vec<cgraph_edge *> new_indirect_edges;
1573   int initial_size = 0;
1574   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1575   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1576   new_indirect_edges.create (8);
1577
1578   edge_removal_hook_holder
1579     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1580
1581   /* Compute overall unit size and other global parameters used by badness
1582      metrics.  */
1583
1584   max_count = 0;
1585   ipa_reduced_postorder (order, true, true, NULL);
1586   free (order);
1587
1588   FOR_EACH_DEFINED_FUNCTION (node)
1589     if (!node->global.inlined_to)
1590       {
1591         if (node->has_gimple_body_p ()
1592             || node->thunk.thunk_p)
1593           {
1594             struct inline_summary *info = inline_summaries->get (node);
1595             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1596
1597             /* Do not account external functions, they will be optimized out
1598                if not inlined.  Also only count the non-cold portion of program.  */
1599             if (!DECL_EXTERNAL (node->decl)
1600                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1601               initial_size += info->size;
1602             info->growth = estimate_growth (node);
1603             if (dfs && dfs->next_cycle)
1604               {
1605                 struct cgraph_node *n2;
1606                 int id = dfs->scc_no + 1;
1607                 for (n2 = node; n2;
1608                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1609                   {
1610                     struct inline_summary *info2 = inline_summaries->get (n2);
1611                     if (info2->scc_no)
1612                       break;
1613                     info2->scc_no = id;
1614                   }
1615               }
1616           }
1617
1618         for (edge = node->callers; edge; edge = edge->next_caller)
1619           if (max_count < edge->count)
1620             max_count = edge->count;
1621       }
1622   max_count_real = sreal (max_count, 0);
1623   max_relbenefit_real = sreal (RELATIVE_TIME_BENEFIT_RANGE, 0);
1624   half_int_min_real = sreal (INT_MAX / 2, 0);
1625   ipa_free_postorder_info ();
1626   initialize_growth_caches ();
1627
1628   if (dump_file)
1629     fprintf (dump_file,
1630              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1631              initial_size);
1632
1633   overall_size = initial_size;
1634   max_size = compute_max_insns (overall_size);
1635   min_size = overall_size;
1636
1637   /* Populate the heap with all edges we might inline.  */
1638
1639   FOR_EACH_DEFINED_FUNCTION (node)
1640     {
1641       bool update = false;
1642       struct cgraph_edge *next;
1643
1644       if (dump_file)
1645         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1646                  node->name (), node->order);
1647
1648       for (edge = node->callees; edge; edge = next)
1649         {
1650           next = edge->next_callee;
1651           if (edge->inline_failed
1652               && !edge->aux
1653               && can_inline_edge_p (edge, true)
1654               && want_inline_small_function_p (edge, true)
1655               && edge->inline_failed)
1656             {
1657               gcc_assert (!edge->aux);
1658               update_edge_key (&edge_heap, edge);
1659             }
1660           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1661             {
1662               edge->resolve_speculation ();
1663               update = true;
1664             }
1665         }
1666       if (update)
1667         {
1668           struct cgraph_node *where = node->global.inlined_to
1669                                       ? node->global.inlined_to : node;
1670           inline_update_overall_summary (where);
1671           reset_node_growth_cache (where);
1672           reset_edge_caches (where);
1673           update_caller_keys (&edge_heap, where,
1674                               updated_nodes, NULL);
1675           bitmap_clear (updated_nodes);
1676         }
1677     }
1678
1679   gcc_assert (in_lto_p
1680               || !max_count
1681               || (profile_info && flag_branch_probabilities));
1682
1683   while (!edge_heap.empty ())
1684     {
1685       int old_size = overall_size;
1686       struct cgraph_node *where, *callee;
1687       sreal badness = edge_heap.min_key ();
1688       sreal current_badness;
1689       sreal cached_badness;
1690       int growth;
1691
1692       edge = edge_heap.extract_min ();
1693       gcc_assert (edge->aux);
1694       edge->aux = NULL;
1695       if (!edge->inline_failed || !edge->callee->analyzed)
1696         continue;
1697
1698       /* Be sure that caches are maintained consistent.
1699          We can not make this ENABLE_CHECKING only because it cause different
1700          updates of the fibheap queue.  */
1701       cached_badness = edge_badness (edge, false);
1702       reset_edge_growth_cache (edge);
1703       reset_node_growth_cache (edge->callee);
1704
1705       /* When updating the edge costs, we only decrease badness in the keys.
1706          Increases of badness are handled lazilly; when we see key with out
1707          of date value on it, we re-insert it now.  */
1708       current_badness = edge_badness (edge, false);
1709       gcc_assert (cached_badness == current_badness);
1710       gcc_assert (current_badness >= badness);
1711       if (current_badness != badness)
1712         {
1713           edge->aux = edge_heap.insert (current_badness, edge);
1714           continue;
1715         }
1716
1717       if (!can_inline_edge_p (edge, true))
1718         {
1719           resolve_noninline_speculation (&edge_heap, edge);
1720           continue;
1721         }
1722
1723       callee = edge->callee->ultimate_alias_target ();
1724       growth = estimate_edge_growth (edge);
1725       if (dump_file)
1726         {
1727           fprintf (dump_file,
1728                    "\nConsidering %s/%i with %i size\n",
1729                    callee->name (), callee->order,
1730                    inline_summaries->get (callee)->size);
1731           fprintf (dump_file,
1732                    " to be inlined into %s/%i in %s:%i\n"
1733                    " Estimated badness is %"PRId64", frequency %.2f.\n",
1734                    edge->caller->name (), edge->caller->order,
1735                    edge->call_stmt ? "unknown"
1736                    : gimple_filename ((const_gimple) edge->call_stmt),
1737                    edge->call_stmt ? -1
1738                    : gimple_lineno ((const_gimple) edge->call_stmt),
1739                    badness.to_int (),
1740                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1741           if (edge->count)
1742             fprintf (dump_file," Called %"PRId64"x\n",
1743                      edge->count);
1744           if (dump_flags & TDF_DETAILS)
1745             edge_badness (edge, true);
1746         }
1747
1748       if (overall_size + growth > max_size
1749           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1750         {
1751           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1752           report_inline_failed_reason (edge);
1753           resolve_noninline_speculation (&edge_heap, edge);
1754           continue;
1755         }
1756
1757       if (!want_inline_small_function_p (edge, true))
1758         {
1759           resolve_noninline_speculation (&edge_heap, edge);
1760           continue;
1761         }
1762
1763       /* Heuristics for inlining small functions work poorly for
1764          recursive calls where we do effects similar to loop unrolling.
1765          When inlining such edge seems profitable, leave decision on
1766          specific inliner.  */
1767       if (edge->recursive_p ())
1768         {
1769           where = edge->caller;
1770           if (where->global.inlined_to)
1771             where = where->global.inlined_to;
1772           if (!recursive_inlining (edge,
1773                                    opt_for_fn (edge->caller->decl,
1774                                                flag_indirect_inlining)
1775                                    ? &new_indirect_edges : NULL))
1776             {
1777               edge->inline_failed = CIF_RECURSIVE_INLINING;
1778               resolve_noninline_speculation (&edge_heap, edge);
1779               continue;
1780             }
1781           reset_edge_caches (where);
1782           /* Recursive inliner inlines all recursive calls of the function
1783              at once. Consequently we need to update all callee keys.  */
1784           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1785             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1786           update_callee_keys (&edge_heap, where, updated_nodes);
1787           bitmap_clear (updated_nodes);
1788         }
1789       else
1790         {
1791           struct cgraph_node *outer_node = NULL;
1792           int depth = 0;
1793
1794           /* Consider the case where self recursive function A is inlined
1795              into B.  This is desired optimization in some cases, since it
1796              leads to effect similar of loop peeling and we might completely
1797              optimize out the recursive call.  However we must be extra
1798              selective.  */
1799
1800           where = edge->caller;
1801           while (where->global.inlined_to)
1802             {
1803               if (where->decl == callee->decl)
1804                 outer_node = where, depth++;
1805               where = where->callers->caller;
1806             }
1807           if (outer_node
1808               && !want_inline_self_recursive_call_p (edge, outer_node,
1809                                                      true, depth))
1810             {
1811               edge->inline_failed
1812                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1813                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1814               resolve_noninline_speculation (&edge_heap, edge);
1815               continue;
1816             }
1817           else if (depth && dump_file)
1818             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1819
1820           gcc_checking_assert (!callee->global.inlined_to);
1821           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1822           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1823
1824           reset_edge_caches (edge->callee);
1825           reset_node_growth_cache (callee);
1826
1827           update_callee_keys (&edge_heap, where, updated_nodes);
1828         }
1829       where = edge->caller;
1830       if (where->global.inlined_to)
1831         where = where->global.inlined_to;
1832
1833       /* Our profitability metric can depend on local properties
1834          such as number of inlinable calls and size of the function body.
1835          After inlining these properties might change for the function we
1836          inlined into (since it's body size changed) and for the functions
1837          called by function we inlined (since number of it inlinable callers
1838          might change).  */
1839       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
1840       bitmap_clear (updated_nodes);
1841
1842       if (dump_file)
1843         {
1844           fprintf (dump_file,
1845                    " Inlined into %s which now has time %i and size %i,"
1846                    "net change of %+i.\n",
1847                    edge->caller->name (),
1848                    inline_summaries->get (edge->caller)->time,
1849                    inline_summaries->get (edge->caller)->size,
1850                    overall_size - old_size);
1851         }
1852       if (min_size > overall_size)
1853         {
1854           min_size = overall_size;
1855           max_size = compute_max_insns (min_size);
1856
1857           if (dump_file)
1858             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1859         }
1860     }
1861
1862   free_growth_caches ();
1863   if (dump_file)
1864     fprintf (dump_file,
1865              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1866              initial_size, overall_size,
1867              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1868   BITMAP_FREE (updated_nodes);
1869   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1870 }
1871
1872 /* Flatten NODE.  Performed both during early inlining and
1873    at IPA inlining time.  */
1874
1875 static void
1876 flatten_function (struct cgraph_node *node, bool early)
1877 {
1878   struct cgraph_edge *e;
1879
1880   /* We shouldn't be called recursively when we are being processed.  */
1881   gcc_assert (node->aux == NULL);
1882
1883   node->aux = (void *) node;
1884
1885   for (e = node->callees; e; e = e->next_callee)
1886     {
1887       struct cgraph_node *orig_callee;
1888       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1889
1890       /* We've hit cycle?  It is time to give up.  */
1891       if (callee->aux)
1892         {
1893           if (dump_file)
1894             fprintf (dump_file,
1895                      "Not inlining %s into %s to avoid cycle.\n",
1896                      xstrdup_for_dump (callee->name ()),
1897                      xstrdup_for_dump (e->caller->name ()));
1898           e->inline_failed = CIF_RECURSIVE_INLINING;
1899           continue;
1900         }
1901
1902       /* When the edge is already inlined, we just need to recurse into
1903          it in order to fully flatten the leaves.  */
1904       if (!e->inline_failed)
1905         {
1906           flatten_function (callee, early);
1907           continue;
1908         }
1909
1910       /* Flatten attribute needs to be processed during late inlining. For
1911          extra code quality we however do flattening during early optimization,
1912          too.  */
1913       if (!early
1914           ? !can_inline_edge_p (e, true)
1915           : !can_early_inline_edge_p (e))
1916         continue;
1917
1918       if (e->recursive_p ())
1919         {
1920           if (dump_file)
1921             fprintf (dump_file, "Not inlining: recursive call.\n");
1922           continue;
1923         }
1924
1925       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1926           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1927         {
1928           if (dump_file)
1929             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1930           continue;
1931         }
1932
1933       /* Inline the edge and flatten the inline clone.  Avoid
1934          recursing through the original node if the node was cloned.  */
1935       if (dump_file)
1936         fprintf (dump_file, " Inlining %s into %s.\n",
1937                  xstrdup_for_dump (callee->name ()),
1938                  xstrdup_for_dump (e->caller->name ()));
1939       orig_callee = callee;
1940       inline_call (e, true, NULL, NULL, false);
1941       if (e->callee != orig_callee)
1942         orig_callee->aux = (void *) node;
1943       flatten_function (e->callee, early);
1944       if (e->callee != orig_callee)
1945         orig_callee->aux = NULL;
1946     }
1947
1948   node->aux = NULL;
1949   if (!node->global.inlined_to)
1950     inline_update_overall_summary (node);
1951 }
1952
1953 /* Count number of callers of NODE and store it into DATA (that
1954    points to int.  Worker for cgraph_for_node_and_aliases.  */
1955
1956 static bool
1957 sum_callers (struct cgraph_node *node, void *data)
1958 {
1959   struct cgraph_edge *e;
1960   int *num_calls = (int *)data;
1961
1962   for (e = node->callers; e; e = e->next_caller)
1963     (*num_calls)++;
1964   return false;
1965 }
1966
1967 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1968    DATA points to number of calls originally found so we avoid infinite
1969    recursion.  */
1970
1971 static bool
1972 inline_to_all_callers (struct cgraph_node *node, void *data)
1973 {
1974   int *num_calls = (int *)data;
1975   bool callee_removed = false;
1976
1977   while (node->callers && !node->global.inlined_to)
1978     {
1979       struct cgraph_node *caller = node->callers->caller;
1980
1981       if (dump_file)
1982         {
1983           fprintf (dump_file,
1984                    "\nInlining %s size %i.\n",
1985                    node->name (),
1986                    inline_summaries->get (node)->size);
1987           fprintf (dump_file,
1988                    " Called once from %s %i insns.\n",
1989                    node->callers->caller->name (),
1990                    inline_summaries->get (node->callers->caller)->size);
1991         }
1992
1993       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
1994       if (dump_file)
1995         fprintf (dump_file,
1996                  " Inlined into %s which now has %i size\n",
1997                  caller->name (),
1998                  inline_summaries->get (caller)->size);
1999       if (!(*num_calls)--)
2000         {
2001           if (dump_file)
2002             fprintf (dump_file, "New calls found; giving up.\n");
2003           return callee_removed;
2004         }
2005       if (callee_removed)
2006         return true;
2007     }
2008   return false;
2009 }
2010
2011 /* Output overall time estimate.  */
2012 static void
2013 dump_overall_stats (void)
2014 {
2015   int64_t sum_weighted = 0, sum = 0;
2016   struct cgraph_node *node;
2017
2018   FOR_EACH_DEFINED_FUNCTION (node)
2019     if (!node->global.inlined_to
2020         && !node->alias)
2021       {
2022         int time = inline_summaries->get (node)->time;
2023         sum += time;
2024         sum_weighted += time * node->count;
2025       }
2026   fprintf (dump_file, "Overall time estimate: "
2027            "%"PRId64" weighted by profile: "
2028            "%"PRId64"\n", sum, sum_weighted);
2029 }
2030
2031 /* Output some useful stats about inlining.  */
2032
2033 static void
2034 dump_inline_stats (void)
2035 {
2036   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2037   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2038   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2039   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2040   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2041   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2042   int64_t reason[CIF_N_REASONS][3];
2043   int i;
2044   struct cgraph_node *node;
2045
2046   memset (reason, 0, sizeof (reason));
2047   FOR_EACH_DEFINED_FUNCTION (node)
2048   {
2049     struct cgraph_edge *e;
2050     for (e = node->callees; e; e = e->next_callee)
2051       {
2052         if (e->inline_failed)
2053           {
2054             reason[(int) e->inline_failed][0] += e->count;
2055             reason[(int) e->inline_failed][1] += e->frequency;
2056             reason[(int) e->inline_failed][2] ++;
2057             if (DECL_VIRTUAL_P (e->callee->decl))
2058               {
2059                 if (e->indirect_inlining_edge)
2060                   noninlined_virt_indir_cnt += e->count;
2061                 else
2062                   noninlined_virt_cnt += e->count;
2063               }
2064             else
2065               {
2066                 if (e->indirect_inlining_edge)
2067                   noninlined_indir_cnt += e->count;
2068                 else
2069                   noninlined_cnt += e->count;
2070               }
2071           }
2072         else
2073           {
2074             if (e->speculative)
2075               {
2076                 if (DECL_VIRTUAL_P (e->callee->decl))
2077                   inlined_speculative_ply += e->count;
2078                 else
2079                   inlined_speculative += e->count;
2080               }
2081             else if (DECL_VIRTUAL_P (e->callee->decl))
2082               {
2083                 if (e->indirect_inlining_edge)
2084                   inlined_virt_indir_cnt += e->count;
2085                 else
2086                   inlined_virt_cnt += e->count;
2087               }
2088             else
2089               {
2090                 if (e->indirect_inlining_edge)
2091                   inlined_indir_cnt += e->count;
2092                 else
2093                   inlined_cnt += e->count;
2094               }
2095           }
2096       }
2097     for (e = node->indirect_calls; e; e = e->next_callee)
2098       if (e->indirect_info->polymorphic)
2099         indirect_poly_cnt += e->count;
2100       else
2101         indirect_cnt += e->count;
2102   }
2103   if (max_count)
2104     {
2105       fprintf (dump_file,
2106                "Inlined %"PRId64 " + speculative "
2107                "%"PRId64 " + speculative polymorphic "
2108                "%"PRId64 " + previously indirect "
2109                "%"PRId64 " + virtual "
2110                "%"PRId64 " + virtual and previously indirect "
2111                "%"PRId64 "\n" "Not inlined "
2112                "%"PRId64 " + previously indirect "
2113                "%"PRId64 " + virtual "
2114                "%"PRId64 " + virtual and previously indirect "
2115                "%"PRId64 " + stil indirect "
2116                "%"PRId64 " + still indirect polymorphic "
2117                "%"PRId64 "\n", inlined_cnt,
2118                inlined_speculative, inlined_speculative_ply,
2119                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2120                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2121                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2122       fprintf (dump_file,
2123                "Removed speculations %"PRId64 "\n",
2124                spec_rem);
2125     }
2126   dump_overall_stats ();
2127   fprintf (dump_file, "\nWhy inlining failed?\n");
2128   for (i = 0; i < CIF_N_REASONS; i++)
2129     if (reason[i][2])
2130       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2131                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2132                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2133 }
2134
2135 /* Decide on the inlining.  We do so in the topological order to avoid
2136    expenses on updating data structures.  */
2137
2138 static unsigned int
2139 ipa_inline (void)
2140 {
2141   struct cgraph_node *node;
2142   int nnodes;
2143   struct cgraph_node **order;
2144   int i;
2145   int cold;
2146   bool remove_functions = false;
2147
2148   if (!optimize)
2149     return 0;
2150
2151   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2152
2153   if (in_lto_p && optimize)
2154     ipa_update_after_lto_read ();
2155
2156   if (dump_file)
2157     dump_inline_summaries (dump_file);
2158
2159   nnodes = ipa_reverse_postorder (order);
2160
2161   FOR_EACH_FUNCTION (node)
2162     node->aux = 0;
2163
2164   if (dump_file)
2165     fprintf (dump_file, "\nFlattening functions:\n");
2166
2167   /* In the first pass handle functions to be flattened.  Do this with
2168      a priority so none of our later choices will make this impossible.  */
2169   for (i = nnodes - 1; i >= 0; i--)
2170     {
2171       node = order[i];
2172
2173       /* Handle nodes to be flattened.
2174          Ideally when processing callees we stop inlining at the
2175          entry of cycles, possibly cloning that entry point and
2176          try to flatten itself turning it into a self-recursive
2177          function.  */
2178       if (lookup_attribute ("flatten",
2179                             DECL_ATTRIBUTES (node->decl)) != NULL)
2180         {
2181           if (dump_file)
2182             fprintf (dump_file,
2183                      "Flattening %s\n", node->name ());
2184           flatten_function (node, false);
2185         }
2186     }
2187   if (dump_file)
2188     dump_overall_stats ();
2189
2190   inline_small_functions ();
2191
2192   gcc_assert (symtab->state == IPA_SSA);
2193   symtab->state = IPA_SSA_AFTER_INLINING;
2194   /* Do first after-inlining removal.  We want to remove all "stale" extern
2195      inline functions and virtual functions so we really know what is called
2196      once.  */
2197   symtab->remove_unreachable_nodes (dump_file);
2198   free (order);
2199
2200   /* Inline functions with a property that after inlining into all callers the
2201      code size will shrink because the out-of-line copy is eliminated.
2202      We do this regardless on the callee size as long as function growth limits
2203      are met.  */
2204   if (dump_file)
2205     fprintf (dump_file,
2206              "\nDeciding on functions to be inlined into all callers and "
2207              "removing useless speculations:\n");
2208
2209   /* Inlining one function called once has good chance of preventing
2210      inlining other function into the same callee.  Ideally we should
2211      work in priority order, but probably inlining hot functions first
2212      is good cut without the extra pain of maintaining the queue.
2213
2214      ??? this is not really fitting the bill perfectly: inlining function
2215      into callee often leads to better optimization of callee due to
2216      increased context for optimization.
2217      For example if main() function calls a function that outputs help
2218      and then function that does the main optmization, we should inline
2219      the second with priority even if both calls are cold by themselves.
2220
2221      We probably want to implement new predicate replacing our use of
2222      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2223      to be hot.  */
2224   for (cold = 0; cold <= 1; cold ++)
2225     {
2226       FOR_EACH_DEFINED_FUNCTION (node)
2227         {
2228           struct cgraph_edge *edge, *next;
2229           bool update=false;
2230
2231           for (edge = node->callees; edge; edge = next)
2232             {
2233               next = edge->next_callee;
2234               if (edge->speculative && !speculation_useful_p (edge, false))
2235                 {
2236                   edge->resolve_speculation ();
2237                   spec_rem += edge->count;
2238                   update = true;
2239                   remove_functions = true;
2240                 }
2241             }
2242           if (update)
2243             {
2244               struct cgraph_node *where = node->global.inlined_to
2245                                           ? node->global.inlined_to : node;
2246               reset_node_growth_cache (where);
2247               reset_edge_caches (where);
2248               inline_update_overall_summary (where);
2249             }
2250           if (want_inline_function_to_all_callers_p (node, cold))
2251             {
2252               int num_calls = 0;
2253               node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2254                                                       true);
2255               while (node->call_for_symbol_thunks_and_aliases
2256                        (inline_to_all_callers, &num_calls, true))
2257                 ;
2258               remove_functions = true;
2259             }
2260         }
2261     }
2262
2263   /* Free ipa-prop structures if they are no longer needed.  */
2264   if (optimize)
2265     ipa_free_all_structures_after_iinln ();
2266
2267   if (dump_file)
2268     {
2269       fprintf (dump_file,
2270                "\nInlined %i calls, eliminated %i functions\n\n",
2271                ncalls_inlined, nfunctions_inlined);
2272       dump_inline_stats ();
2273     }
2274
2275   if (dump_file)
2276     dump_inline_summaries (dump_file);
2277   /* In WPA we use inline summaries for partitioning process.  */
2278   if (!flag_wpa)
2279     inline_free_summary ();
2280   return remove_functions ? TODO_remove_functions : 0;
2281 }
2282
2283 /* Inline always-inline function calls in NODE.  */
2284
2285 static bool
2286 inline_always_inline_functions (struct cgraph_node *node)
2287 {
2288   struct cgraph_edge *e;
2289   bool inlined = false;
2290
2291   for (e = node->callees; e; e = e->next_callee)
2292     {
2293       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2294       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2295         continue;
2296
2297       if (e->recursive_p ())
2298         {
2299           if (dump_file)
2300             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2301                      e->callee->name ());
2302           e->inline_failed = CIF_RECURSIVE_INLINING;
2303           continue;
2304         }
2305
2306       if (!can_early_inline_edge_p (e))
2307         {
2308           /* Set inlined to true if the callee is marked "always_inline" but
2309              is not inlinable.  This will allow flagging an error later in
2310              expand_call_inline in tree-inline.c.  */
2311           if (lookup_attribute ("always_inline",
2312                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2313             inlined = true;
2314           continue;
2315         }
2316
2317       if (dump_file)
2318         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2319                  xstrdup_for_dump (e->callee->name ()),
2320                  xstrdup_for_dump (e->caller->name ()));
2321       inline_call (e, true, NULL, NULL, false);
2322       inlined = true;
2323     }
2324   if (inlined)
2325     inline_update_overall_summary (node);
2326
2327   return inlined;
2328 }
2329
2330 /* Decide on the inlining.  We do so in the topological order to avoid
2331    expenses on updating data structures.  */
2332
2333 static bool
2334 early_inline_small_functions (struct cgraph_node *node)
2335 {
2336   struct cgraph_edge *e;
2337   bool inlined = false;
2338
2339   for (e = node->callees; e; e = e->next_callee)
2340     {
2341       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2342       if (!inline_summaries->get (callee)->inlinable
2343           || !e->inline_failed)
2344         continue;
2345
2346       /* Do not consider functions not declared inline.  */
2347       if (!DECL_DECLARED_INLINE_P (callee->decl)
2348           && !opt_for_fn (node->decl, flag_inline_small_functions)
2349           && !opt_for_fn (node->decl, flag_inline_functions))
2350         continue;
2351
2352       if (dump_file)
2353         fprintf (dump_file, "Considering inline candidate %s.\n",
2354                  callee->name ());
2355
2356       if (!can_early_inline_edge_p (e))
2357         continue;
2358
2359       if (e->recursive_p ())
2360         {
2361           if (dump_file)
2362             fprintf (dump_file, "  Not inlining: recursive call.\n");
2363           continue;
2364         }
2365
2366       if (!want_early_inline_function_p (e))
2367         continue;
2368
2369       if (dump_file)
2370         fprintf (dump_file, " Inlining %s into %s.\n",
2371                  xstrdup_for_dump (callee->name ()),
2372                  xstrdup_for_dump (e->caller->name ()));
2373       inline_call (e, true, NULL, NULL, true);
2374       inlined = true;
2375     }
2376
2377   return inlined;
2378 }
2379
2380 unsigned int
2381 early_inliner (function *fun)
2382 {
2383   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2384   struct cgraph_edge *edge;
2385   unsigned int todo = 0;
2386   int iterations = 0;
2387   bool inlined = false;
2388
2389   if (seen_error ())
2390     return 0;
2391
2392   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2393      happens when some pass decides to construct new function and
2394      cgraph_add_new_function calls lowering passes and early optimization on
2395      it.  This may confuse ourself when early inliner decide to inline call to
2396      function clone, because function clones don't have parameter list in
2397      ipa-prop matching their signature.  */
2398   if (ipa_node_params_sum)
2399     return 0;
2400
2401 #ifdef ENABLE_CHECKING
2402   node->verify ();
2403 #endif
2404   node->remove_all_references ();
2405
2406   /* Even when not optimizing or not inlining inline always-inline
2407      functions.  */
2408   inlined = inline_always_inline_functions (node);
2409
2410   if (!optimize
2411       || flag_no_inline
2412       || !flag_early_inlining
2413       /* Never inline regular functions into always-inline functions
2414          during incremental inlining.  This sucks as functions calling
2415          always inline functions will get less optimized, but at the
2416          same time inlining of functions calling always inline
2417          function into an always inline function might introduce
2418          cycles of edges to be always inlined in the callgraph.
2419
2420          We might want to be smarter and just avoid this type of inlining.  */
2421       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2422     ;
2423   else if (lookup_attribute ("flatten",
2424                              DECL_ATTRIBUTES (node->decl)) != NULL)
2425     {
2426       /* When the function is marked to be flattened, recursively inline
2427          all calls in it.  */
2428       if (dump_file)
2429         fprintf (dump_file,
2430                  "Flattening %s\n", node->name ());
2431       flatten_function (node, true);
2432       inlined = true;
2433     }
2434   else
2435     {
2436       /* We iterate incremental inlining to get trivial cases of indirect
2437          inlining.  */
2438       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2439              && early_inline_small_functions (node))
2440         {
2441           timevar_push (TV_INTEGRATION);
2442           todo |= optimize_inline_calls (current_function_decl);
2443
2444           /* Technically we ought to recompute inline parameters so the new
2445              iteration of early inliner works as expected.  We however have
2446              values approximately right and thus we only need to update edge
2447              info that might be cleared out for newly discovered edges.  */
2448           for (edge = node->callees; edge; edge = edge->next_callee)
2449             {
2450               /* We have no summary for new bound store calls yet.  */
2451               if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2452                 {
2453                   struct inline_edge_summary *es = inline_edge_summary (edge);
2454                   es->call_stmt_size
2455                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2456                   es->call_stmt_time
2457                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2458                 }
2459               if (edge->callee->decl
2460                   && !gimple_check_call_matching_types (
2461                       edge->call_stmt, edge->callee->decl, false))
2462                 edge->call_stmt_cannot_inline_p = true;
2463             }
2464           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2465             inline_update_overall_summary (node);
2466           timevar_pop (TV_INTEGRATION);
2467           iterations++;
2468           inlined = false;
2469         }
2470       if (dump_file)
2471         fprintf (dump_file, "Iterations: %i\n", iterations);
2472     }
2473
2474   if (inlined)
2475     {
2476       timevar_push (TV_INTEGRATION);
2477       todo |= optimize_inline_calls (current_function_decl);
2478       timevar_pop (TV_INTEGRATION);
2479     }
2480
2481   fun->always_inline_functions_inlined = true;
2482
2483   return todo;
2484 }
2485
2486 /* Do inlining of small functions.  Doing so early helps profiling and other
2487    passes to be somewhat more effective and avoids some code duplication in
2488    later real inlining pass for testcases with very many function calls.  */
2489
2490 namespace {
2491
2492 const pass_data pass_data_early_inline =
2493 {
2494   GIMPLE_PASS, /* type */
2495   "einline", /* name */
2496   OPTGROUP_INLINE, /* optinfo_flags */
2497   TV_EARLY_INLINING, /* tv_id */
2498   PROP_ssa, /* properties_required */
2499   0, /* properties_provided */
2500   0, /* properties_destroyed */
2501   0, /* todo_flags_start */
2502   0, /* todo_flags_finish */
2503 };
2504
2505 class pass_early_inline : public gimple_opt_pass
2506 {
2507 public:
2508   pass_early_inline (gcc::context *ctxt)
2509     : gimple_opt_pass (pass_data_early_inline, ctxt)
2510   {}
2511
2512   /* opt_pass methods: */
2513   virtual unsigned int execute (function *);
2514
2515 }; // class pass_early_inline
2516
2517 unsigned int
2518 pass_early_inline::execute (function *fun)
2519 {
2520   return early_inliner (fun);
2521 }
2522
2523 } // anon namespace
2524
2525 gimple_opt_pass *
2526 make_pass_early_inline (gcc::context *ctxt)
2527 {
2528   return new pass_early_inline (ctxt);
2529 }
2530
2531 namespace {
2532
2533 const pass_data pass_data_ipa_inline =
2534 {
2535   IPA_PASS, /* type */
2536   "inline", /* name */
2537   OPTGROUP_INLINE, /* optinfo_flags */
2538   TV_IPA_INLINING, /* tv_id */
2539   0, /* properties_required */
2540   0, /* properties_provided */
2541   0, /* properties_destroyed */
2542   0, /* todo_flags_start */
2543   ( TODO_dump_symtab ), /* todo_flags_finish */
2544 };
2545
2546 class pass_ipa_inline : public ipa_opt_pass_d
2547 {
2548 public:
2549   pass_ipa_inline (gcc::context *ctxt)
2550     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2551                       inline_generate_summary, /* generate_summary */
2552                       inline_write_summary, /* write_summary */
2553                       inline_read_summary, /* read_summary */
2554                       NULL, /* write_optimization_summary */
2555                       NULL, /* read_optimization_summary */
2556                       NULL, /* stmt_fixup */
2557                       0, /* function_transform_todo_flags_start */
2558                       inline_transform, /* function_transform */
2559                       NULL) /* variable_transform */
2560   {}
2561
2562   /* opt_pass methods: */
2563   virtual unsigned int execute (function *) { return ipa_inline (); }
2564
2565 }; // class pass_ipa_inline
2566
2567 } // anon namespace
2568
2569 ipa_opt_pass_d *
2570 make_pass_ipa_inline (gcc::context *ctxt)
2571 {
2572   return new pass_ipa_inline (ctxt);
2573 }