gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "hash-set.h"
  97 #include "machmode.h"
  98 #include "vec.h"
  99 #include "double-int.h"
 100 #include "input.h"
 101 #include "alias.h"
 102 #include "symtab.h"
 103 #include "wide-int.h"
 104 #include "inchash.h"
 105 #include "tree.h"
 106 #include "fold-const.h"
 107 #include "trans-mem.h"
 108 #include "calls.h"
 109 #include "tree-inline.h"
 110 #include "langhooks.h"
 111 #include "flags.h"
 112 #include "diagnostic.h"
 113 #include "gimple-pretty-print.h"
 114 #include "params.h"
 115 #include "intl.h"
 116 #include "tree-pass.h"
 117 #include "coverage.h"
 118 #include "rtl.h"
 119 #include "bitmap.h"
 120 #include "profile.h"
 121 #include "predict.h"
 122 #include "hard-reg-set.h"
 123 #include "input.h"
 124 #include "function.h"
 125 #include "basic-block.h"
 126 #include "tree-ssa-alias.h"
 127 #include "internal-fn.h"
 128 #include "gimple-expr.h"
 129 #include "is-a.h"
 130 #include "gimple.h"
 131 #include "gimple-ssa.h"
 132 #include "hash-map.h"
 133 #include "plugin-api.h"
 134 #include "ipa-ref.h"
 135 #include "cgraph.h"
 136 #include "alloc-pool.h"
 137 #include "symbol-summary.h"
 138 #include "ipa-prop.h"
 139 #include "except.h"
 140 #include "target.h"
 141 #include "ipa-inline.h"
 142 #include "ipa-utils.h"
 143 #include "sreal.h"
 144 #include "auto-profile.h"
 145 #include "cilk.h"
 146 #include "builtins.h"
 147 #include "fibonacci_heap.h"
 148
 149 typedef fibonacci_heap <sreal, cgraph_edge> edge_heap_t;
 150 typedef fibonacci_node <sreal, cgraph_edge> edge_heap_node_t;
 151
 152 /* Statistics we collect about inlining algorithm.  */
 153 static int overall_size;
 154 static gcov_type max_count;
 155 static gcov_type spec_rem;
 156
 157 /* Pre-computed constants 1/CGRAPH_FREQ_BASE and 1/100. */
 158 static sreal cgraph_freq_base_rec, percent_rec;
 159
 160 /* Return false when inlining edge E would lead to violating
 161    limits on function unit growth or stack usage growth.
 162
 163    The relative function body growth limit is present generally
 164    to avoid problems with non-linear behavior of the compiler.
 165    To allow inlining huge functions into tiny wrapper, the limit
 166    is always based on the bigger of the two functions considered.
 167
 168    For stack growth limits we always base the growth in stack usage
 169    of the callers.  We want to prevent applications from segfaulting
 170    on stack overflow when functions with huge stack frames gets
 171    inlined. */
 172
 173 static bool
 174 caller_growth_limits (struct cgraph_edge *e)
 175 {
 176   struct cgraph_node *to = e->caller;
 177   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 178   int newsize;
 179   int limit = 0;
 180   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 181   inline_summary *info, *what_info, *outer_info = inline_summaries->get (to);
 182
 183   /* Look for function e->caller is inlined to.  While doing
 184      so work out the largest function body on the way.  As
 185      described above, we want to base our function growth
 186      limits based on that.  Not on the self size of the
 187      outer function, not on the self size of inline code
 188      we immediately inline to.  This is the most relaxed
 189      interpretation of the rule "do not grow large functions
 190      too much in order to prevent compiler from exploding".  */
 191   while (true)
 192     {
 193       info = inline_summaries->get (to);
 194       if (limit < info->self_size)
 195         limit = info->self_size;
 196       if (stack_size_limit < info->estimated_self_stack_size)
 197         stack_size_limit = info->estimated_self_stack_size;
 198       if (to->global.inlined_to)
 199         to = to->callers->caller;
 200       else
 201         break;
 202     }
 203
 204   what_info = inline_summaries->get (what);
 205
 206   if (limit < what_info->self_size)
 207     limit = what_info->self_size;
 208
 209   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 210
 211   /* Check the size after inlining against the function limits.  But allow
 212      the function to shrink if it went over the limits by forced inlining.  */
 213   newsize = estimate_size_after_inlining (to, e);
 214   if (newsize >= info->size
 215       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 216       && newsize > limit)
 217     {
 218       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 219       return false;
 220     }
 221
 222   if (!what_info->estimated_stack_size)
 223     return true;
 224
 225   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 226      due to large i/o datastructures used by the Fortran front-end.
 227      We ought to ignore this limit when we know that the edge is executed
 228      on every invocation of the caller (i.e. its call statement dominates
 229      exit block).  We do not track this information, yet.  */
 230   stack_size_limit += ((gcov_type)stack_size_limit
 231                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 232
 233   inlined_stack = (outer_info->stack_frame_offset
 234                    + outer_info->estimated_self_stack_size
 235                    + what_info->estimated_stack_size);
 236   /* Check new stack consumption with stack consumption at the place
 237      stack is used.  */
 238   if (inlined_stack > stack_size_limit
 239       /* If function already has large stack usage from sibling
 240          inline call, we can inline, too.
 241          This bit overoptimistically assume that we are good at stack
 242          packing.  */
 243       && inlined_stack > info->estimated_stack_size
 244       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 245     {
 246       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 247       return false;
 248     }
 249   return true;
 250 }
 251
 252 /* Dump info about why inlining has failed.  */
 253
 254 static void
 255 report_inline_failed_reason (struct cgraph_edge *e)
 256 {
 257   if (dump_file)
 258     {
 259       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 260                xstrdup_for_dump (e->caller->name ()), e->caller->order,
 261                xstrdup_for_dump (e->callee->name ()), e->callee->order,
 262                cgraph_inline_failed_string (e->inline_failed));
 263     }
 264 }
 265
 266  /* Decide whether sanitizer-related attributes allow inlining. */
 267
 268 static bool
 269 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 270 {
 271   /* Don't care if sanitizer is disabled */
 272   if (!(flag_sanitize & SANITIZE_ADDRESS))
 273     return true;
 274
 275   if (!caller || !callee)
 276     return true;
 277
 278   return !!lookup_attribute ("no_sanitize_address",
 279       DECL_ATTRIBUTES (caller)) ==
 280       !!lookup_attribute ("no_sanitize_address",
 281       DECL_ATTRIBUTES (callee));
 282 }
 283
 284  /* Decide if we can inline the edge and possibly update
 285    inline_failed reason.
 286    We check whether inlining is possible at all and whether
 287    caller growth limits allow doing so.
 288
 289    if REPORT is true, output reason to the dump file.
 290
 291    if DISREGARD_LIMITS is true, ignore size limits.*/
 292
 293 static bool
 294 can_inline_edge_p (struct cgraph_edge *e, bool report,
 295                    bool disregard_limits = false)
 296 {
 297   bool inlinable = true;
 298   enum availability avail;
 299   cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
 300   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 301   tree callee_tree
 302     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 303   struct function *caller_fun = e->caller->get_fun ();
 304   struct function *callee_fun = callee ? callee->get_fun () : NULL;
 305
 306   gcc_assert (e->inline_failed);
 307
 308   if (!callee || !callee->definition)
 309     {
 310       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 311       inlinable = false;
 312     }
 313   else if (callee->calls_comdat_local)
 314     {
 315       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 316       inlinable = false;
 317     }
 318   else if (!inline_summaries->get (callee)->inlinable
 319            || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
 320     {
 321       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 322       inlinable = false;
 323     }
 324   else if (avail <= AVAIL_INTERPOSABLE)
 325     {
 326       e->inline_failed = CIF_OVERWRITABLE;
 327       inlinable = false;
 328     }
 329   else if (e->call_stmt_cannot_inline_p)
 330     {
 331       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 332         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 333       inlinable = false;
 334     }
 335   /* Don't inline if the functions have different EH personalities.  */
 336   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 337            && DECL_FUNCTION_PERSONALITY (callee->decl)
 338            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 339                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 340     {
 341       e->inline_failed = CIF_EH_PERSONALITY;
 342       inlinable = false;
 343     }
 344   /* TM pure functions should not be inlined into non-TM_pure
 345      functions.  */
 346   else if (is_tm_pure (callee->decl)
 347            && !is_tm_pure (e->caller->decl))
 348     {
 349       e->inline_failed = CIF_UNSPECIFIED;
 350       inlinable = false;
 351     }
 352   /* Don't inline if the callee can throw non-call exceptions but the
 353      caller cannot.
 354      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 355      Move the flag into cgraph node or mirror it in the inline summary.  */
 356   else if (callee_fun && callee_fun->can_throw_non_call_exceptions
 357            && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
 358     {
 359       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 360       inlinable = false;
 361     }
 362   /* Check compatibility of target optimization options.  */
 363   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 364                                                 callee->decl))
 365     {
 366       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 367       inlinable = false;
 368     }
 369   /* Don't inline a function with mismatched sanitization attributes. */
 370   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 371     {
 372       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 373       inlinable = false;
 374     }
 375   /* Check if caller growth allows the inlining.  */
 376   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 377            && !disregard_limits
 378            && !lookup_attribute ("flatten",
 379                                  DECL_ATTRIBUTES
 380                                    (e->caller->global.inlined_to
 381                                     ? e->caller->global.inlined_to->decl
 382                                     : e->caller->decl))
 383            && !caller_growth_limits (e))
 384     inlinable = false;
 385   /* Don't inline a function with a higher optimization level than the
 386      caller.  FIXME: this is really just tip of iceberg of handling
 387      optimization attribute.  */
 388   else if (caller_tree != callee_tree)
 389     {
 390       if (((opt_for_fn (e->caller->decl, optimize)
 391             > opt_for_fn (callee->decl, optimize))
 392             || (opt_for_fn (e->caller->decl, optimize_size)
 393                 != opt_for_fn (callee->decl, optimize_size)))
 394           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 395           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 396         {
 397           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 398           inlinable = false;
 399         }
 400     }
 401
 402   if (!inlinable && report)
 403     report_inline_failed_reason (e);
 404   return inlinable;
 405 }
 406
 407
 408 /* Return true if the edge E is inlinable during early inlining.  */
 409
 410 static bool
 411 can_early_inline_edge_p (struct cgraph_edge *e)
 412 {
 413   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 414   /* Early inliner might get called at WPA stage when IPA pass adds new
 415      function.  In this case we can not really do any of early inlining
 416      because function bodies are missing.  */
 417   if (!gimple_has_body_p (callee->decl))
 418     {
 419       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 420       return false;
 421     }
 422   /* In early inliner some of callees may not be in SSA form yet
 423      (i.e. the callgraph is cyclic and we did not process
 424      the callee by early inliner, yet).  We don't have CIF code for this
 425      case; later we will re-do the decision in the real inliner.  */
 426   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 427       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 428     {
 429       if (dump_file)
 430         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 431       return false;
 432     }
 433   if (!can_inline_edge_p (e, true))
 434     return false;
 435   return true;
 436 }
 437
 438
 439 /* Return number of calls in N.  Ignore cheap builtins.  */
 440
 441 static int
 442 num_calls (struct cgraph_node *n)
 443 {
 444   struct cgraph_edge *e;
 445   int num = 0;
 446
 447   for (e = n->callees; e; e = e->next_callee)
 448     if (!is_inexpensive_builtin (e->callee->decl))
 449       num++;
 450   return num;
 451 }
 452
 453
 454 /* Return true if we are interested in inlining small function.  */
 455
 456 static bool
 457 want_early_inline_function_p (struct cgraph_edge *e)
 458 {
 459   bool want_inline = true;
 460   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 461
 462   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 463     ;
 464   /* For AutoFDO, we need to make sure that before profile summary, all
 465      hot paths' IR look exactly the same as profiled binary. As a result,
 466      in einliner, we will disregard size limit and inline those callsites
 467      that are:
 468        * inlined in the profiled binary, and
 469        * the cloned callee has enough samples to be considered "hot".  */
 470   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 471     ;
 472   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 473            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 474     {
 475       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 476       report_inline_failed_reason (e);
 477       want_inline = false;
 478     }
 479   else
 480     {
 481       int growth = estimate_edge_growth (e);
 482       int n;
 483
 484       if (growth <= 0)
 485         ;
 486       else if (!e->maybe_hot_p ()
 487                && growth > 0)
 488         {
 489           if (dump_file)
 490             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 491                      "call is cold and code would grow by %i\n",
 492                      xstrdup_for_dump (e->caller->name ()),
 493                      e->caller->order,
 494                      xstrdup_for_dump (callee->name ()), callee->order,
 495                      growth);
 496           want_inline = false;
 497         }
 498       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 499         {
 500           if (dump_file)
 501             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 502                      "growth %i exceeds --param early-inlining-insns\n",
 503                      xstrdup_for_dump (e->caller->name ()),
 504                      e->caller->order,
 505                      xstrdup_for_dump (callee->name ()), callee->order,
 506                      growth);
 507           want_inline = false;
 508         }
 509       else if ((n = num_calls (callee)) != 0
 510                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 511         {
 512           if (dump_file)
 513             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 514                      "growth %i exceeds --param early-inlining-insns "
 515                      "divided by number of calls\n",
 516                      xstrdup_for_dump (e->caller->name ()),
 517                      e->caller->order,
 518                      xstrdup_for_dump (callee->name ()), callee->order,
 519                      growth);
 520           want_inline = false;
 521         }
 522     }
 523   return want_inline;
 524 }
 525
 526 /* Compute time of the edge->caller + edge->callee execution when inlining
 527    does not happen.  */
 528
 529 inline sreal
 530 compute_uninlined_call_time (struct inline_summary *callee_info,
 531                              struct cgraph_edge *edge)
 532 {
 533   sreal uninlined_call_time = (sreal)callee_info->time;
 534   cgraph_node *caller = (edge->caller->global.inlined_to
 535                          ? edge->caller->global.inlined_to
 536                          : edge->caller);
 537
 538   if (edge->count && caller->count)
 539     uninlined_call_time *= (sreal)edge->count / caller->count;
 540   if (edge->frequency)
 541     uninlined_call_time *= cgraph_freq_base_rec * edge->frequency;
 542   else
 543     uninlined_call_time = uninlined_call_time >> 11;
 544
 545   int caller_time = inline_summaries->get (caller)->time;
 546   return uninlined_call_time + caller_time;
 547 }
 548
 549 /* Same as compute_uinlined_call_time but compute time when inlining
 550    does happen.  */
 551
 552 inline sreal
 553 compute_inlined_call_time (struct cgraph_edge *edge,
 554                            int edge_time)
 555 {
 556   cgraph_node *caller = (edge->caller->global.inlined_to
 557                          ? edge->caller->global.inlined_to
 558                          : edge->caller);
 559   int caller_time = inline_summaries->get (caller)->time;
 560   sreal time = edge_time;
 561
 562   if (edge->count && caller->count)
 563     time *= (sreal)edge->count / caller->count;
 564   if (edge->frequency)
 565     time *= cgraph_freq_base_rec * edge->frequency;
 566   else
 567     time = time >> 11;
 568
 569   /* This calculation should match one in ipa-inline-analysis.
 570      FIXME: Once ipa-inline-analysis is converted to sreal this can be
 571      simplified.  */
 572   time -= (sreal) ((gcov_type) edge->frequency
 573                    * inline_edge_summary (edge)->call_stmt_time
 574                    * (INLINE_TIME_SCALE / CGRAPH_FREQ_BASE)) / INLINE_TIME_SCALE;
 575   time += caller_time;
 576   if (time <= 0)
 577     time = ((sreal) 1) >> 8;
 578   gcc_checking_assert (time >= 0);
 579   return time;
 580 }
 581
 582 /* Return true if the speedup for inlining E is bigger than
 583    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 584
 585 static bool
 586 big_speedup_p (struct cgraph_edge *e)
 587 {
 588   sreal time = compute_uninlined_call_time (inline_summaries->get (e->callee),
 589                                             e);
 590   sreal inlined_time = compute_inlined_call_time (e, estimate_edge_time (e));
 591
 592   if (time - inlined_time
 593       > (sreal) time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP)
 594          * percent_rec)
 595     return true;
 596   return false;
 597 }
 598
 599 /* Return true if we are interested in inlining small function.
 600    When REPORT is true, report reason to dump file.  */
 601
 602 static bool
 603 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 604 {
 605   bool want_inline = true;
 606   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 607
 608   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 609     ;
 610   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 611            && !opt_for_fn (e->caller->decl, flag_inline_small_functions))
 612     {
 613       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 614       want_inline = false;
 615     }
 616   /* Do fast and conservative check if the function can be good
 617      inline candidate.  At the moment we allow inline hints to
 618      promote non-inline functions to inline and we increase
 619      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 620   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 621            && (!e->count || !e->maybe_hot_p ()))
 622            && inline_summaries->get (callee)->min_size
 623                 - inline_edge_summary (e)->call_stmt_size
 624               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 625     {
 626       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 627       want_inline = false;
 628     }
 629   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 630            && inline_summaries->get (callee)->min_size
 631                 - inline_edge_summary (e)->call_stmt_size
 632               > 16 * MAX_INLINE_INSNS_SINGLE)
 633     {
 634       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 635                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 636                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 637       want_inline = false;
 638     }
 639   else
 640     {
 641       int growth = estimate_edge_growth (e);
 642       inline_hints hints = estimate_edge_hints (e);
 643       bool big_speedup = big_speedup_p (e);
 644
 645       if (growth <= 0)
 646         ;
 647       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 648          hints suggests that inlining given function is very profitable.  */
 649       else if (DECL_DECLARED_INLINE_P (callee->decl)
 650                && growth >= MAX_INLINE_INSNS_SINGLE
 651                && ((!big_speedup
 652                     && !(hints & (INLINE_HINT_indirect_call
 653                                   | INLINE_HINT_known_hot
 654                                   | INLINE_HINT_loop_iterations
 655                                   | INLINE_HINT_array_index
 656                                   | INLINE_HINT_loop_stride)))
 657                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 658         {
 659           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 660           want_inline = false;
 661         }
 662       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 663                && !opt_for_fn (e->caller->decl, flag_inline_functions))
 664         {
 665           /* growth_likely_positive is expensive, always test it last.  */
 666           if (growth >= MAX_INLINE_INSNS_SINGLE
 667               || growth_likely_positive (callee, growth))
 668             {
 669               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 670               want_inline = false;
 671             }
 672         }
 673       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 674          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 675          inlining given function is very profitable.  */
 676       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 677                && !big_speedup
 678                && !(hints & INLINE_HINT_known_hot)
 679                && growth >= ((hints & (INLINE_HINT_indirect_call
 680                                        | INLINE_HINT_loop_iterations
 681                                        | INLINE_HINT_array_index
 682                                        | INLINE_HINT_loop_stride))
 683                              ? MAX (MAX_INLINE_INSNS_AUTO,
 684                                     MAX_INLINE_INSNS_SINGLE)
 685                              : MAX_INLINE_INSNS_AUTO))
 686         {
 687           /* growth_likely_positive is expensive, always test it last.  */
 688           if (growth >= MAX_INLINE_INSNS_SINGLE
 689               || growth_likely_positive (callee, growth))
 690             {
 691               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 692               want_inline = false;
 693             }
 694         }
 695       /* If call is cold, do not inline when function body would grow. */
 696       else if (!e->maybe_hot_p ()
 697                && (growth >= MAX_INLINE_INSNS_SINGLE
 698                    || growth_likely_positive (callee, growth)))
 699         {
 700           e->inline_failed = CIF_UNLIKELY_CALL;
 701           want_inline = false;
 702         }
 703     }
 704   if (!want_inline && report)
 705     report_inline_failed_reason (e);
 706   return want_inline;
 707 }
 708
 709 /* EDGE is self recursive edge.
 710    We hand two cases - when function A is inlining into itself
 711    or when function A is being inlined into another inliner copy of function
 712    A within function B.
 713
 714    In first case OUTER_NODE points to the toplevel copy of A, while
 715    in the second case OUTER_NODE points to the outermost copy of A in B.
 716
 717    In both cases we want to be extra selective since
 718    inlining the call will just introduce new recursive calls to appear.  */
 719
 720 static bool
 721 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 722                                    struct cgraph_node *outer_node,
 723                                    bool peeling,
 724                                    int depth)
 725 {
 726   char const *reason = NULL;
 727   bool want_inline = true;
 728   int caller_freq = CGRAPH_FREQ_BASE;
 729   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 730
 731   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 732     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 733
 734   if (!edge->maybe_hot_p ())
 735     {
 736       reason = "recursive call is cold";
 737       want_inline = false;
 738     }
 739   else if (max_count && !outer_node->count)
 740     {
 741       reason = "not executed in profile";
 742       want_inline = false;
 743     }
 744   else if (depth > max_depth)
 745     {
 746       reason = "--param max-inline-recursive-depth exceeded.";
 747       want_inline = false;
 748     }
 749
 750   if (outer_node->global.inlined_to)
 751     caller_freq = outer_node->callers->frequency;
 752
 753   if (!caller_freq)
 754     {
 755       reason = "function is inlined and unlikely";
 756       want_inline = false;
 757     }
 758
 759   if (!want_inline)
 760     ;
 761   /* Inlining of self recursive function into copy of itself within other function
 762      is transformation similar to loop peeling.
 763
 764      Peeling is profitable if we can inline enough copies to make probability
 765      of actual call to the self recursive function very small.  Be sure that
 766      the probability of recursion is small.
 767
 768      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 769      This way the expected number of recision is at most max_depth.  */
 770   else if (peeling)
 771     {
 772       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 773                                          / max_depth);
 774       int i;
 775       for (i = 1; i < depth; i++)
 776         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 777       if (max_count
 778           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 779               >= max_prob))
 780         {
 781           reason = "profile of recursive call is too large";
 782           want_inline = false;
 783         }
 784       if (!max_count
 785           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 786               >= max_prob))
 787         {
 788           reason = "frequency of recursive call is too large";
 789           want_inline = false;
 790         }
 791     }
 792   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 793      depth is large.  We reduce function call overhead and increase chances that
 794      things fit in hardware return predictor.
 795
 796      Recursive inlining might however increase cost of stack frame setup
 797      actually slowing down functions whose recursion tree is wide rather than
 798      deep.
 799
 800      Deciding reliably on when to do recursive inlining without profile feedback
 801      is tricky.  For now we disable recursive inlining when probability of self
 802      recursion is low.
 803
 804      Recursive inlining of self recursive call within loop also results in large loop
 805      depths that generally optimize badly.  We may want to throttle down inlining
 806      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 807      methods.  */
 808   else
 809     {
 810       if (max_count
 811           && (edge->count * 100 / outer_node->count
 812               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 813         {
 814           reason = "profile of recursive call is too small";
 815           want_inline = false;
 816         }
 817       else if (!max_count
 818                && (edge->frequency * 100 / caller_freq
 819                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 820         {
 821           reason = "frequency of recursive call is too small";
 822           want_inline = false;
 823         }
 824     }
 825   if (!want_inline && dump_file)
 826     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 827   return want_inline;
 828 }
 829
 830 /* Return true when NODE has uninlinable caller;
 831    set HAS_HOT_CALL if it has hot call.
 832    Worker for cgraph_for_node_and_aliases.  */
 833
 834 static bool
 835 check_callers (struct cgraph_node *node, void *has_hot_call)
 836 {
 837   struct cgraph_edge *e;
 838    for (e = node->callers; e; e = e->next_caller)
 839      {
 840        if (!opt_for_fn (e->caller->decl, flag_inline_functions_called_once))
 841          return true;
 842        if (!can_inline_edge_p (e, true))
 843          return true;
 844        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 845          *(bool *)has_hot_call = true;
 846      }
 847   return false;
 848 }
 849
 850 /* If NODE has a caller, return true.  */
 851
 852 static bool
 853 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 854 {
 855   if (node->callers)
 856     return true;
 857   return false;
 858 }
 859
 860 /* Decide if inlining NODE would reduce unit size by eliminating
 861    the offline copy of function.
 862    When COLD is true the cold calls are considered, too.  */
 863
 864 static bool
 865 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 866 {
 867   bool has_hot_call = false;
 868
 869   if (node->ultimate_alias_target () != node)
 870     return false;
 871   /* Already inlined?  */
 872   if (node->global.inlined_to)
 873     return false;
 874   /* Does it have callers?  */
 875   if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
 876     return false;
 877   /* Inlining into all callers would increase size?  */
 878   if (estimate_growth (node) > 0)
 879     return false;
 880   /* All inlines must be possible.  */
 881   if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call,
 882                                                 true))
 883     return false;
 884   if (!cold && !has_hot_call)
 885     return false;
 886   return true;
 887 }
 888
 889 /* A cost model driving the inlining heuristics in a way so the edges with
 890    smallest badness are inlined first.  After each inlining is performed
 891    the costs of all caller edges of nodes affected are recomputed so the
 892    metrics may accurately depend on values such as number of inlinable callers
 893    of the function or function body size.  */
 894
 895 static sreal
 896 edge_badness (struct cgraph_edge *edge, bool dump)
 897 {
 898   sreal badness;
 899   int growth, edge_time;
 900   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 901   struct inline_summary *callee_info = inline_summaries->get (callee);
 902   inline_hints hints;
 903   cgraph_node *caller = (edge->caller->global.inlined_to
 904                          ? edge->caller->global.inlined_to
 905                          : edge->caller);
 906
 907   growth = estimate_edge_growth (edge);
 908   edge_time = estimate_edge_time (edge);
 909   hints = estimate_edge_hints (edge);
 910   gcc_checking_assert (edge_time >= 0);
 911   gcc_checking_assert (edge_time <= callee_info->time);
 912   gcc_checking_assert (growth <= callee_info->size);
 913
 914   if (dump)
 915     {
 916       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 917                xstrdup_for_dump (edge->caller->name ()),
 918                edge->caller->order,
 919                xstrdup_for_dump (callee->name ()),
 920                edge->callee->order);
 921       fprintf (dump_file, "      size growth %i, time %i ",
 922                growth,
 923                edge_time);
 924       dump_inline_hints (dump_file, hints);
 925       if (big_speedup_p (edge))
 926         fprintf (dump_file, " big_speedup");
 927       fprintf (dump_file, "\n");
 928     }
 929
 930   /* Always prefer inlining saving code size.  */
 931   if (growth <= 0)
 932     {
 933       badness = (sreal) (-SREAL_MIN_SIG + growth) << (SREAL_MAX_EXP / 256);
 934       if (dump)
 935         fprintf (dump_file, "      %f: Growth %d <= 0\n", badness.to_double (),
 936                  growth);
 937     }
 938    /* Inlining into EXTERNAL functions is not going to change anything unless
 939       they are themselves inlined.  */
 940    else if (DECL_EXTERNAL (caller->decl))
 941     {
 942       if (dump)
 943         fprintf (dump_file, "      max: function is external\n");
 944       return sreal::max ();
 945     }
 946   /* When profile is available. Compute badness as:
 947
 948                  time_saved * caller_count
 949      goodness =  ---------------------------------
 950                  growth_of_caller * overall_growth
 951
 952      badness = - goodness
 953
 954      Again use negative value to make calls with profile appear hotter
 955      then calls without.
 956   */
 957   else if (opt_for_fn (caller->decl, flag_guess_branch_prob) || caller->count)
 958     {
 959       sreal numerator, denominator;
 960
 961       numerator = (compute_uninlined_call_time (callee_info, edge)
 962                    - compute_inlined_call_time (edge, edge_time));
 963       if (numerator == 0)
 964         numerator = ((sreal) 1 >> 8);
 965       if (caller->count)
 966         numerator *= caller->count;
 967       else if (opt_for_fn (caller->decl, flag_branch_probabilities))
 968         numerator = numerator >> 11;
 969       denominator = growth;
 970       if (callee_info->growth > 0)
 971         denominator *= callee_info->growth;
 972
 973       badness = - numerator / denominator;
 974
 975       if (dump)
 976         {
 977           fprintf (dump_file,
 978                    "      %f: guessed profile. frequency %f, count %"PRId64
 979                    " caller count %"PRId64
 980                    " time w/o inlining %f, time w inlining %f"
 981                    " overall growth %i (current) %i (original)\n",
 982                    badness.to_double (), (double)edge->frequency / CGRAPH_FREQ_BASE,
 983                    edge->count, caller->count,
 984                    compute_uninlined_call_time (callee_info, edge).to_double (),
 985                    compute_inlined_call_time (edge, edge_time).to_double (),
 986                    estimate_growth (callee),
 987                    callee_info->growth);
 988         }
 989     }
 990   /* When function local profile is not available or it does not give
 991      useful information (ie frequency is zero), base the cost on
 992      loop nest and overall size growth, so we optimize for overall number
 993      of functions fully inlined in program.  */
 994   else
 995     {
 996       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 997       badness = growth;
 998
 999       /* Decrease badness if call is nested.  */
1000       if (badness > 0)
1001         badness = badness >> nest;
1002       else
1003         badness = badness << nest;
1004       if (dump)
1005         fprintf (dump_file, "      %f: no profile. nest %i\n", badness.to_double (),
1006                  nest);
1007     }
1008   gcc_checking_assert (badness != 0);
1009
1010   if (edge->recursive_p ())
1011     badness = badness.shift (badness > 0 ? 4 : -4);
1012   if ((hints & (INLINE_HINT_indirect_call
1013                 | INLINE_HINT_loop_iterations
1014                 | INLINE_HINT_array_index
1015                 | INLINE_HINT_loop_stride))
1016       || callee_info->growth <= 0)
1017     badness = badness.shift (badness > 0 ? -2 : 2);
1018   if (hints & (INLINE_HINT_same_scc))
1019     badness = badness.shift (badness > 0 ? 3 : -3);
1020   else if (hints & (INLINE_HINT_in_scc))
1021     badness = badness.shift (badness > 0 ? 2 : -2);
1022   else if (hints & (INLINE_HINT_cross_module))
1023     badness = badness.shift (badness > 0 ? 1 : -1);
1024   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1025     badness = badness.shift (badness > 0 ? -4 : 4);
1026   else if ((hints & INLINE_HINT_declared_inline))
1027     badness = badness.shift (badness > 0 ? -3 : 3);
1028   if (dump)
1029     fprintf (dump_file, "      Adjusted by hints %f\n", badness.to_double ());
1030   return badness;
1031 }
1032
1033 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1034 static inline void
1035 update_edge_key (edge_heap_t *heap, struct cgraph_edge *edge)
1036 {
1037   sreal badness = edge_badness (edge, false);
1038   if (edge->aux)
1039     {
1040       edge_heap_node_t *n = (edge_heap_node_t *) edge->aux;
1041       gcc_checking_assert (n->get_data () == edge);
1042
1043       /* fibonacci_heap::replace_key does busy updating of the
1044          heap that is unnecesarily expensive.
1045          We do lazy increases: after extracting minimum if the key
1046          turns out to be out of date, it is re-inserted into heap
1047          with correct value.  */
1048       if (badness < n->get_key ())
1049         {
1050           if (dump_file && (dump_flags & TDF_DETAILS))
1051             {
1052               fprintf (dump_file,
1053                        "  decreasing badness %s/%i -> %s/%i, %f"
1054                        " to %f\n",
1055                        xstrdup_for_dump (edge->caller->name ()),
1056                        edge->caller->order,
1057                        xstrdup_for_dump (edge->callee->name ()),
1058                        edge->callee->order,
1059                        n->get_key ().to_double (),
1060                        badness.to_double ());
1061             }
1062           heap->decrease_key (n, badness);
1063         }
1064     }
1065   else
1066     {
1067        if (dump_file && (dump_flags & TDF_DETAILS))
1068          {
1069            fprintf (dump_file,
1070                     "  enqueuing call %s/%i -> %s/%i, badness %f\n",
1071                     xstrdup_for_dump (edge->caller->name ()),
1072                     edge->caller->order,
1073                     xstrdup_for_dump (edge->callee->name ()),
1074                     edge->callee->order,
1075                     badness.to_double ());
1076          }
1077       edge->aux = heap->insert (badness, edge);
1078     }
1079 }
1080
1081
1082 /* NODE was inlined.
1083    All caller edges needs to be resetted because
1084    size estimates change. Similarly callees needs reset
1085    because better context may be known.  */
1086
1087 static void
1088 reset_edge_caches (struct cgraph_node *node)
1089 {
1090   struct cgraph_edge *edge;
1091   struct cgraph_edge *e = node->callees;
1092   struct cgraph_node *where = node;
1093   struct ipa_ref *ref;
1094
1095   if (where->global.inlined_to)
1096     where = where->global.inlined_to;
1097
1098   for (edge = where->callers; edge; edge = edge->next_caller)
1099     if (edge->inline_failed)
1100       reset_edge_growth_cache (edge);
1101
1102   FOR_EACH_ALIAS (where, ref)
1103     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1104
1105   if (!e)
1106     return;
1107
1108   while (true)
1109     if (!e->inline_failed && e->callee->callees)
1110       e = e->callee->callees;
1111     else
1112       {
1113         if (e->inline_failed)
1114           reset_edge_growth_cache (e);
1115         if (e->next_callee)
1116           e = e->next_callee;
1117         else
1118           {
1119             do
1120               {
1121                 if (e->caller == node)
1122                   return;
1123                 e = e->caller->callers;
1124               }
1125             while (!e->next_callee);
1126             e = e->next_callee;
1127           }
1128       }
1129 }
1130
1131 /* Recompute HEAP nodes for each of caller of NODE.
1132    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1133    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1134    it is inlinable. Otherwise check all edges.  */
1135
1136 static void
1137 update_caller_keys (edge_heap_t *heap, struct cgraph_node *node,
1138                     bitmap updated_nodes,
1139                     struct cgraph_edge *check_inlinablity_for)
1140 {
1141   struct cgraph_edge *edge;
1142   struct ipa_ref *ref;
1143
1144   if ((!node->alias && !inline_summaries->get (node)->inlinable)
1145       || node->global.inlined_to)
1146     return;
1147   if (!bitmap_set_bit (updated_nodes, node->uid))
1148     return;
1149
1150   FOR_EACH_ALIAS (node, ref)
1151     {
1152       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1153       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1154     }
1155
1156   for (edge = node->callers; edge; edge = edge->next_caller)
1157     if (edge->inline_failed)
1158       {
1159         if (!check_inlinablity_for
1160             || check_inlinablity_for == edge)
1161           {
1162             if (can_inline_edge_p (edge, false)
1163                 && want_inline_small_function_p (edge, false))
1164               update_edge_key (heap, edge);
1165             else if (edge->aux)
1166               {
1167                 report_inline_failed_reason (edge);
1168                 heap->delete_node ((edge_heap_node_t *) edge->aux);
1169                 edge->aux = NULL;
1170               }
1171           }
1172         else if (edge->aux)
1173           update_edge_key (heap, edge);
1174       }
1175 }
1176
1177 /* Recompute HEAP nodes for each uninlined call in NODE.
1178    This is used when we know that edge badnesses are going only to increase
1179    (we introduced new call site) and thus all we need is to insert newly
1180    created edges into heap.  */
1181
1182 static void
1183 update_callee_keys (edge_heap_t *heap, struct cgraph_node *node,
1184                     bitmap updated_nodes)
1185 {
1186   struct cgraph_edge *e = node->callees;
1187
1188   if (!e)
1189     return;
1190   while (true)
1191     if (!e->inline_failed && e->callee->callees)
1192       e = e->callee->callees;
1193     else
1194       {
1195         enum availability avail;
1196         struct cgraph_node *callee;
1197         /* We do not reset callee growth cache here.  Since we added a new call,
1198            growth chould have just increased and consequentely badness metric
1199            don't need updating.  */
1200         if (e->inline_failed
1201             && (callee = e->callee->ultimate_alias_target (&avail))
1202             && inline_summaries->get (callee)->inlinable
1203             && avail >= AVAIL_AVAILABLE
1204             && !bitmap_bit_p (updated_nodes, callee->uid))
1205           {
1206             if (can_inline_edge_p (e, false)
1207                 && want_inline_small_function_p (e, false))
1208               update_edge_key (heap, e);
1209             else if (e->aux)
1210               {
1211                 report_inline_failed_reason (e);
1212                 heap->delete_node ((edge_heap_node_t *) e->aux);
1213                 e->aux = NULL;
1214               }
1215           }
1216         if (e->next_callee)
1217           e = e->next_callee;
1218         else
1219           {
1220             do
1221               {
1222                 if (e->caller == node)
1223                   return;
1224                 e = e->caller->callers;
1225               }
1226             while (!e->next_callee);
1227             e = e->next_callee;
1228           }
1229       }
1230 }
1231
1232 /* Enqueue all recursive calls from NODE into priority queue depending on
1233    how likely we want to recursively inline the call.  */
1234
1235 static void
1236 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1237                         edge_heap_t *heap)
1238 {
1239   struct cgraph_edge *e;
1240   enum availability avail;
1241
1242   for (e = where->callees; e; e = e->next_callee)
1243     if (e->callee == node
1244         || (e->callee->ultimate_alias_target (&avail) == node
1245             && avail > AVAIL_INTERPOSABLE))
1246       {
1247         /* When profile feedback is available, prioritize by expected number
1248            of calls.  */
1249         heap->insert (!max_count ? -e->frequency
1250                       : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1251                       e);
1252       }
1253   for (e = where->callees; e; e = e->next_callee)
1254     if (!e->inline_failed)
1255       lookup_recursive_calls (node, e->callee, heap);
1256 }
1257
1258 /* Decide on recursive inlining: in the case function has recursive calls,
1259    inline until body size reaches given argument.  If any new indirect edges
1260    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1261    is NULL.  */
1262
1263 static bool
1264 recursive_inlining (struct cgraph_edge *edge,
1265                     vec<cgraph_edge *> *new_edges)
1266 {
1267   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1268   edge_heap_t heap (sreal::min ());
1269   struct cgraph_node *node;
1270   struct cgraph_edge *e;
1271   struct cgraph_node *master_clone = NULL, *next;
1272   int depth = 0;
1273   int n = 0;
1274
1275   node = edge->caller;
1276   if (node->global.inlined_to)
1277     node = node->global.inlined_to;
1278
1279   if (DECL_DECLARED_INLINE_P (node->decl))
1280     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1281
1282   /* Make sure that function is small enough to be considered for inlining.  */
1283   if (estimate_size_after_inlining (node, edge)  >= limit)
1284     return false;
1285   lookup_recursive_calls (node, node, &heap);
1286   if (heap.empty ())
1287     return false;
1288
1289   if (dump_file)
1290     fprintf (dump_file,
1291              "  Performing recursive inlining on %s\n",
1292              node->name ());
1293
1294   /* Do the inlining and update list of recursive call during process.  */
1295   while (!heap.empty ())
1296     {
1297       struct cgraph_edge *curr = heap.extract_min ();
1298       struct cgraph_node *cnode, *dest = curr->callee;
1299
1300       if (!can_inline_edge_p (curr, true))
1301         continue;
1302
1303       /* MASTER_CLONE is produced in the case we already started modified
1304          the function. Be sure to redirect edge to the original body before
1305          estimating growths otherwise we will be seeing growths after inlining
1306          the already modified body.  */
1307       if (master_clone)
1308         {
1309           curr->redirect_callee (master_clone);
1310           reset_edge_growth_cache (curr);
1311         }
1312
1313       if (estimate_size_after_inlining (node, curr) > limit)
1314         {
1315           curr->redirect_callee (dest);
1316           reset_edge_growth_cache (curr);
1317           break;
1318         }
1319
1320       depth = 1;
1321       for (cnode = curr->caller;
1322            cnode->global.inlined_to; cnode = cnode->callers->caller)
1323         if (node->decl
1324             == curr->callee->ultimate_alias_target ()->decl)
1325           depth++;
1326
1327       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1328         {
1329           curr->redirect_callee (dest);
1330           reset_edge_growth_cache (curr);
1331           continue;
1332         }
1333
1334       if (dump_file)
1335         {
1336           fprintf (dump_file,
1337                    "   Inlining call of depth %i", depth);
1338           if (node->count)
1339             {
1340               fprintf (dump_file, " called approx. %.2f times per call",
1341                        (double)curr->count / node->count);
1342             }
1343           fprintf (dump_file, "\n");
1344         }
1345       if (!master_clone)
1346         {
1347           /* We need original clone to copy around.  */
1348           master_clone = node->create_clone (node->decl, node->count,
1349             CGRAPH_FREQ_BASE, false, vNULL,
1350             true, NULL, NULL);
1351           for (e = master_clone->callees; e; e = e->next_callee)
1352             if (!e->inline_failed)
1353               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1354           curr->redirect_callee (master_clone);
1355           reset_edge_growth_cache (curr);
1356         }
1357
1358       inline_call (curr, false, new_edges, &overall_size, true);
1359       lookup_recursive_calls (node, curr->callee, &heap);
1360       n++;
1361     }
1362
1363   if (!heap.empty () && dump_file)
1364     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1365
1366   if (!master_clone)
1367     return false;
1368
1369   if (dump_file)
1370     fprintf (dump_file,
1371              "\n   Inlined %i times, "
1372              "body grown from size %i to %i, time %i to %i\n", n,
1373              inline_summaries->get (master_clone)->size, inline_summaries->get (node)->size,
1374              inline_summaries->get (master_clone)->time, inline_summaries->get (node)->time);
1375
1376   /* Remove master clone we used for inlining.  We rely that clones inlined
1377      into master clone gets queued just before master clone so we don't
1378      need recursion.  */
1379   for (node = symtab->first_function (); node != master_clone;
1380        node = next)
1381     {
1382       next = symtab->next_function (node);
1383       if (node->global.inlined_to == master_clone)
1384         node->remove ();
1385     }
1386   master_clone->remove ();
1387   return true;
1388 }
1389
1390
1391 /* Given whole compilation unit estimate of INSNS, compute how large we can
1392    allow the unit to grow.  */
1393
1394 static int
1395 compute_max_insns (int insns)
1396 {
1397   int max_insns = insns;
1398   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1399     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1400
1401   return ((int64_t) max_insns
1402           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1403 }
1404
1405
1406 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1407
1408 static void
1409 add_new_edges_to_heap (edge_heap_t *heap, vec<cgraph_edge *> new_edges)
1410 {
1411   while (new_edges.length () > 0)
1412     {
1413       struct cgraph_edge *edge = new_edges.pop ();
1414
1415       gcc_assert (!edge->aux);
1416       if (edge->inline_failed
1417           && can_inline_edge_p (edge, true)
1418           && want_inline_small_function_p (edge, true))
1419         edge->aux = heap->insert (edge_badness (edge, false), edge);
1420     }
1421 }
1422
1423 /* Remove EDGE from the fibheap.  */
1424
1425 static void
1426 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1427 {
1428   if (e->aux)
1429     {
1430       ((edge_heap_t *)data)->delete_node ((edge_heap_node_t *)e->aux);
1431       e->aux = NULL;
1432     }
1433 }
1434
1435 /* Return true if speculation of edge E seems useful.
1436    If ANTICIPATE_INLINING is true, be conservative and hope that E
1437    may get inlined.  */
1438
1439 bool
1440 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1441 {
1442   enum availability avail;
1443   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1444   struct cgraph_edge *direct, *indirect;
1445   struct ipa_ref *ref;
1446
1447   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1448
1449   if (!e->maybe_hot_p ())
1450     return false;
1451
1452   /* See if IP optimizations found something potentially useful about the
1453      function.  For now we look only for CONST/PURE flags.  Almost everything
1454      else we propagate is useless.  */
1455   if (avail >= AVAIL_AVAILABLE)
1456     {
1457       int ecf_flags = flags_from_decl_or_type (target->decl);
1458       if (ecf_flags & ECF_CONST)
1459         {
1460           e->speculative_call_info (direct, indirect, ref);
1461           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1462             return true;
1463         }
1464       else if (ecf_flags & ECF_PURE)
1465         {
1466           e->speculative_call_info (direct, indirect, ref);
1467           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1468             return true;
1469         }
1470     }
1471   /* If we did not managed to inline the function nor redirect
1472      to an ipa-cp clone (that are seen by having local flag set),
1473      it is probably pointless to inline it unless hardware is missing
1474      indirect call predictor.  */
1475   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1476     return false;
1477   /* For overwritable targets there is not much to do.  */
1478   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1479     return false;
1480   /* OK, speculation seems interesting.  */
1481   return true;
1482 }
1483
1484 /* We know that EDGE is not going to be inlined.
1485    See if we can remove speculation.  */
1486
1487 static void
1488 resolve_noninline_speculation (edge_heap_t *edge_heap, struct cgraph_edge *edge)
1489 {
1490   if (edge->speculative && !speculation_useful_p (edge, false))
1491     {
1492       struct cgraph_node *node = edge->caller;
1493       struct cgraph_node *where = node->global.inlined_to
1494                                   ? node->global.inlined_to : node;
1495       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1496
1497       spec_rem += edge->count;
1498       edge->resolve_speculation ();
1499       reset_edge_caches (where);
1500       inline_update_overall_summary (where);
1501       update_caller_keys (edge_heap, where,
1502                           updated_nodes, NULL);
1503       update_callee_keys (edge_heap, where,
1504                           updated_nodes);
1505       BITMAP_FREE (updated_nodes);
1506     }
1507 }
1508
1509 /* We use greedy algorithm for inlining of small functions:
1510    All inline candidates are put into prioritized heap ordered in
1511    increasing badness.
1512
1513    The inlining of small functions is bounded by unit growth parameters.  */
1514
1515 static void
1516 inline_small_functions (void)
1517 {
1518   struct cgraph_node *node;
1519   struct cgraph_edge *edge;
1520   edge_heap_t edge_heap (sreal::min ());
1521   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1522   int min_size, max_size;
1523   auto_vec<cgraph_edge *> new_indirect_edges;
1524   int initial_size = 0;
1525   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1526   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1527   new_indirect_edges.create (8);
1528
1529   edge_removal_hook_holder
1530     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, &edge_heap);
1531
1532   /* Compute overall unit size and other global parameters used by badness
1533      metrics.  */
1534
1535   max_count = 0;
1536   ipa_reduced_postorder (order, true, true, NULL);
1537   free (order);
1538
1539   FOR_EACH_DEFINED_FUNCTION (node)
1540     if (!node->global.inlined_to)
1541       {
1542         if (node->has_gimple_body_p ()
1543             || node->thunk.thunk_p)
1544           {
1545             struct inline_summary *info = inline_summaries->get (node);
1546             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1547
1548             /* Do not account external functions, they will be optimized out
1549                if not inlined.  Also only count the non-cold portion of program.  */
1550             if (!DECL_EXTERNAL (node->decl)
1551                 && !opt_for_fn (node->decl, optimize_size)
1552                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1553               initial_size += info->size;
1554             info->growth = estimate_growth (node);
1555             if (dfs && dfs->next_cycle)
1556               {
1557                 struct cgraph_node *n2;
1558                 int id = dfs->scc_no + 1;
1559                 for (n2 = node; n2;
1560                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1561                   {
1562                     struct inline_summary *info2 = inline_summaries->get (n2);
1563                     if (info2->scc_no)
1564                       break;
1565                     info2->scc_no = id;
1566                   }
1567               }
1568           }
1569
1570         for (edge = node->callers; edge; edge = edge->next_caller)
1571           if (max_count < edge->count)
1572             max_count = edge->count;
1573       }
1574   ipa_free_postorder_info ();
1575   initialize_growth_caches ();
1576
1577   if (dump_file)
1578     fprintf (dump_file,
1579              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1580              initial_size);
1581
1582   overall_size = initial_size;
1583   max_size = compute_max_insns (overall_size);
1584   min_size = overall_size;
1585
1586   /* Populate the heap with all edges we might inline.  */
1587
1588   FOR_EACH_DEFINED_FUNCTION (node)
1589     {
1590       bool update = false;
1591       struct cgraph_edge *next;
1592
1593       if (dump_file)
1594         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1595                  node->name (), node->order);
1596
1597       for (edge = node->callees; edge; edge = next)
1598         {
1599           next = edge->next_callee;
1600           if (edge->inline_failed
1601               && !edge->aux
1602               && can_inline_edge_p (edge, true)
1603               && want_inline_small_function_p (edge, true)
1604               && edge->inline_failed)
1605             {
1606               gcc_assert (!edge->aux);
1607               update_edge_key (&edge_heap, edge);
1608             }
1609           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1610             {
1611               edge->resolve_speculation ();
1612               update = true;
1613             }
1614         }
1615       if (update)
1616         {
1617           struct cgraph_node *where = node->global.inlined_to
1618                                       ? node->global.inlined_to : node;
1619           inline_update_overall_summary (where);
1620           reset_edge_caches (where);
1621           update_caller_keys (&edge_heap, where,
1622                               updated_nodes, NULL);
1623           update_callee_keys (&edge_heap, where,
1624                               updated_nodes);
1625           bitmap_clear (updated_nodes);
1626         }
1627     }
1628
1629   gcc_assert (in_lto_p
1630               || !max_count
1631               || (profile_info && flag_branch_probabilities));
1632
1633   while (!edge_heap.empty ())
1634     {
1635       int old_size = overall_size;
1636       struct cgraph_node *where, *callee;
1637       sreal badness = edge_heap.min_key ();
1638       sreal current_badness;
1639       int growth;
1640
1641       edge = edge_heap.extract_min ();
1642       gcc_assert (edge->aux);
1643       edge->aux = NULL;
1644       if (!edge->inline_failed || !edge->callee->analyzed)
1645         continue;
1646
1647 #ifdef ENABLE_CHECKING
1648       /* Be sure that caches are maintained consistent.  */
1649       sreal cached_badness = edge_badness (edge, false);
1650
1651       int old_size_est = estimate_edge_size (edge);
1652       int old_time_est = estimate_edge_time (edge);
1653       int old_hints_est = estimate_edge_hints (edge);
1654
1655       reset_edge_growth_cache (edge);
1656       gcc_assert (old_size_est == estimate_edge_size (edge));
1657       gcc_assert (old_time_est == estimate_edge_time (edge));
1658       gcc_assert (old_hints_est == estimate_edge_hints (edge));
1659
1660       /* When updating the edge costs, we only decrease badness in the keys.
1661          Increases of badness are handled lazilly; when we see key with out
1662          of date value on it, we re-insert it now.  */
1663       current_badness = edge_badness (edge, false);
1664       /* Disable checking for profile because roundoff errors may cause slight
1665          deviations in the order.  */
1666       gcc_assert (max_count || cached_badness == current_badness);
1667       gcc_assert (current_badness >= badness);
1668 #else
1669       current_badness = edge_badness (edge, false);
1670 #endif
1671       if (current_badness != badness)
1672         {
1673           if (edge_heap.min () && badness > edge_heap.min_key ())
1674             {
1675               edge->aux = edge_heap.insert (current_badness, edge);
1676               continue;
1677             }
1678           else
1679             badness = current_badness;
1680         }
1681
1682       if (!can_inline_edge_p (edge, true))
1683         {
1684           resolve_noninline_speculation (&edge_heap, edge);
1685           continue;
1686         }
1687
1688       callee = edge->callee->ultimate_alias_target ();
1689       growth = estimate_edge_growth (edge);
1690       if (dump_file)
1691         {
1692           fprintf (dump_file,
1693                    "\nConsidering %s/%i with %i size\n",
1694                    callee->name (), callee->order,
1695                    inline_summaries->get (callee)->size);
1696           fprintf (dump_file,
1697                    " to be inlined into %s/%i in %s:%i\n"
1698                    " Estimated badness is %f, frequency %.2f.\n",
1699                    edge->caller->name (), edge->caller->order,
1700                    edge->call_stmt
1701                    ? gimple_filename ((const_gimple) edge->call_stmt)
1702                    : "unknown",
1703                    edge->call_stmt
1704                    ? gimple_lineno ((const_gimple) edge->call_stmt)
1705                    : -1,
1706                    badness.to_double (),
1707                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1708           if (edge->count)
1709             fprintf (dump_file," Called %"PRId64"x\n",
1710                      edge->count);
1711           if (dump_flags & TDF_DETAILS)
1712             edge_badness (edge, true);
1713         }
1714
1715       if (overall_size + growth > max_size
1716           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1717         {
1718           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1719           report_inline_failed_reason (edge);
1720           resolve_noninline_speculation (&edge_heap, edge);
1721           continue;
1722         }
1723
1724       if (!want_inline_small_function_p (edge, true))
1725         {
1726           resolve_noninline_speculation (&edge_heap, edge);
1727           continue;
1728         }
1729
1730       /* Heuristics for inlining small functions work poorly for
1731          recursive calls where we do effects similar to loop unrolling.
1732          When inlining such edge seems profitable, leave decision on
1733          specific inliner.  */
1734       if (edge->recursive_p ())
1735         {
1736           where = edge->caller;
1737           if (where->global.inlined_to)
1738             where = where->global.inlined_to;
1739           if (!recursive_inlining (edge,
1740                                    opt_for_fn (edge->caller->decl,
1741                                                flag_indirect_inlining)
1742                                    ? &new_indirect_edges : NULL))
1743             {
1744               edge->inline_failed = CIF_RECURSIVE_INLINING;
1745               resolve_noninline_speculation (&edge_heap, edge);
1746               continue;
1747             }
1748           reset_edge_caches (where);
1749           /* Recursive inliner inlines all recursive calls of the function
1750              at once. Consequently we need to update all callee keys.  */
1751           if (opt_for_fn (edge->caller->decl, flag_indirect_inlining))
1752             add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1753           update_callee_keys (&edge_heap, where, updated_nodes);
1754           bitmap_clear (updated_nodes);
1755         }
1756       else
1757         {
1758           struct cgraph_node *outer_node = NULL;
1759           int depth = 0;
1760
1761           /* Consider the case where self recursive function A is inlined
1762              into B.  This is desired optimization in some cases, since it
1763              leads to effect similar of loop peeling and we might completely
1764              optimize out the recursive call.  However we must be extra
1765              selective.  */
1766
1767           where = edge->caller;
1768           while (where->global.inlined_to)
1769             {
1770               if (where->decl == callee->decl)
1771                 outer_node = where, depth++;
1772               where = where->callers->caller;
1773             }
1774           if (outer_node
1775               && !want_inline_self_recursive_call_p (edge, outer_node,
1776                                                      true, depth))
1777             {
1778               edge->inline_failed
1779                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1780                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1781               resolve_noninline_speculation (&edge_heap, edge);
1782               continue;
1783             }
1784           else if (depth && dump_file)
1785             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1786
1787           gcc_checking_assert (!callee->global.inlined_to);
1788           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1789           add_new_edges_to_heap (&edge_heap, new_indirect_edges);
1790
1791           reset_edge_caches (edge->callee->function_symbol ());
1792
1793           update_callee_keys (&edge_heap, where, updated_nodes);
1794         }
1795       where = edge->caller;
1796       if (where->global.inlined_to)
1797         where = where->global.inlined_to;
1798
1799       /* Our profitability metric can depend on local properties
1800          such as number of inlinable calls and size of the function body.
1801          After inlining these properties might change for the function we
1802          inlined into (since it's body size changed) and for the functions
1803          called by function we inlined (since number of it inlinable callers
1804          might change).  */
1805       update_caller_keys (&edge_heap, where, updated_nodes, NULL);
1806       /* Offline copy count has possibly changed, recompute if profile is
1807          available.  */
1808       if (max_count)
1809         {
1810           struct cgraph_node *n = cgraph_node::get (edge->callee->decl);
1811           if (n != edge->callee && n->analyzed)
1812             update_callee_keys (&edge_heap, n, updated_nodes);
1813         }
1814       bitmap_clear (updated_nodes);
1815
1816       if (dump_file)
1817         {
1818           fprintf (dump_file,
1819                    " Inlined into %s which now has time %i and size %i,"
1820                    "net change of %+i.\n",
1821                    edge->caller->name (),
1822                    inline_summaries->get (edge->caller)->time,
1823                    inline_summaries->get (edge->caller)->size,
1824                    overall_size - old_size);
1825         }
1826       if (min_size > overall_size)
1827         {
1828           min_size = overall_size;
1829           max_size = compute_max_insns (min_size);
1830
1831           if (dump_file)
1832             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1833         }
1834     }
1835
1836   free_growth_caches ();
1837   if (dump_file)
1838     fprintf (dump_file,
1839              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1840              initial_size, overall_size,
1841              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1842   BITMAP_FREE (updated_nodes);
1843   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1844 }
1845
1846 /* Flatten NODE.  Performed both during early inlining and
1847    at IPA inlining time.  */
1848
1849 static void
1850 flatten_function (struct cgraph_node *node, bool early)
1851 {
1852   struct cgraph_edge *e;
1853
1854   /* We shouldn't be called recursively when we are being processed.  */
1855   gcc_assert (node->aux == NULL);
1856
1857   node->aux = (void *) node;
1858
1859   for (e = node->callees; e; e = e->next_callee)
1860     {
1861       struct cgraph_node *orig_callee;
1862       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1863
1864       /* We've hit cycle?  It is time to give up.  */
1865       if (callee->aux)
1866         {
1867           if (dump_file)
1868             fprintf (dump_file,
1869                      "Not inlining %s into %s to avoid cycle.\n",
1870                      xstrdup_for_dump (callee->name ()),
1871                      xstrdup_for_dump (e->caller->name ()));
1872           e->inline_failed = CIF_RECURSIVE_INLINING;
1873           continue;
1874         }
1875
1876       /* When the edge is already inlined, we just need to recurse into
1877          it in order to fully flatten the leaves.  */
1878       if (!e->inline_failed)
1879         {
1880           flatten_function (callee, early);
1881           continue;
1882         }
1883
1884       /* Flatten attribute needs to be processed during late inlining. For
1885          extra code quality we however do flattening during early optimization,
1886          too.  */
1887       if (!early
1888           ? !can_inline_edge_p (e, true)
1889           : !can_early_inline_edge_p (e))
1890         continue;
1891
1892       if (e->recursive_p ())
1893         {
1894           if (dump_file)
1895             fprintf (dump_file, "Not inlining: recursive call.\n");
1896           continue;
1897         }
1898
1899       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1900           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1901         {
1902           if (dump_file)
1903             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1904           continue;
1905         }
1906
1907       /* Inline the edge and flatten the inline clone.  Avoid
1908          recursing through the original node if the node was cloned.  */
1909       if (dump_file)
1910         fprintf (dump_file, " Inlining %s into %s.\n",
1911                  xstrdup_for_dump (callee->name ()),
1912                  xstrdup_for_dump (e->caller->name ()));
1913       orig_callee = callee;
1914       inline_call (e, true, NULL, NULL, false);
1915       if (e->callee != orig_callee)
1916         orig_callee->aux = (void *) node;
1917       flatten_function (e->callee, early);
1918       if (e->callee != orig_callee)
1919         orig_callee->aux = NULL;
1920     }
1921
1922   node->aux = NULL;
1923   if (!node->global.inlined_to)
1924     inline_update_overall_summary (node);
1925 }
1926
1927 /* Count number of callers of NODE and store it into DATA (that
1928    points to int.  Worker for cgraph_for_node_and_aliases.  */
1929
1930 static bool
1931 sum_callers (struct cgraph_node *node, void *data)
1932 {
1933   struct cgraph_edge *e;
1934   int *num_calls = (int *)data;
1935
1936   for (e = node->callers; e; e = e->next_caller)
1937     (*num_calls)++;
1938   return false;
1939 }
1940
1941 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1942    DATA points to number of calls originally found so we avoid infinite
1943    recursion.  */
1944
1945 static bool
1946 inline_to_all_callers (struct cgraph_node *node, void *data)
1947 {
1948   int *num_calls = (int *)data;
1949   bool callee_removed = false;
1950
1951   while (node->callers && !node->global.inlined_to)
1952     {
1953       struct cgraph_node *caller = node->callers->caller;
1954
1955       if (dump_file)
1956         {
1957           fprintf (dump_file,
1958                    "\nInlining %s size %i.\n",
1959                    node->name (),
1960                    inline_summaries->get (node)->size);
1961           fprintf (dump_file,
1962                    " Called once from %s %i insns.\n",
1963                    node->callers->caller->name (),
1964                    inline_summaries->get (node->callers->caller)->size);
1965         }
1966
1967       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
1968       if (dump_file)
1969         fprintf (dump_file,
1970                  " Inlined into %s which now has %i size\n",
1971                  caller->name (),
1972                  inline_summaries->get (caller)->size);
1973       if (!(*num_calls)--)
1974         {
1975           if (dump_file)
1976             fprintf (dump_file, "New calls found; giving up.\n");
1977           return callee_removed;
1978         }
1979       if (callee_removed)
1980         return true;
1981     }
1982   return false;
1983 }
1984
1985 /* Output overall time estimate.  */
1986 static void
1987 dump_overall_stats (void)
1988 {
1989   int64_t sum_weighted = 0, sum = 0;
1990   struct cgraph_node *node;
1991
1992   FOR_EACH_DEFINED_FUNCTION (node)
1993     if (!node->global.inlined_to
1994         && !node->alias)
1995       {
1996         int time = inline_summaries->get (node)->time;
1997         sum += time;
1998         sum_weighted += time * node->count;
1999       }
2000   fprintf (dump_file, "Overall time estimate: "
2001            "%"PRId64" weighted by profile: "
2002            "%"PRId64"\n", sum, sum_weighted);
2003 }
2004
2005 /* Output some useful stats about inlining.  */
2006
2007 static void
2008 dump_inline_stats (void)
2009 {
2010   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2011   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2012   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2013   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2014   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2015   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2016   int64_t reason[CIF_N_REASONS][3];
2017   int i;
2018   struct cgraph_node *node;
2019
2020   memset (reason, 0, sizeof (reason));
2021   FOR_EACH_DEFINED_FUNCTION (node)
2022   {
2023     struct cgraph_edge *e;
2024     for (e = node->callees; e; e = e->next_callee)
2025       {
2026         if (e->inline_failed)
2027           {
2028             reason[(int) e->inline_failed][0] += e->count;
2029             reason[(int) e->inline_failed][1] += e->frequency;
2030             reason[(int) e->inline_failed][2] ++;
2031             if (DECL_VIRTUAL_P (e->callee->decl))
2032               {
2033                 if (e->indirect_inlining_edge)
2034                   noninlined_virt_indir_cnt += e->count;
2035                 else
2036                   noninlined_virt_cnt += e->count;
2037               }
2038             else
2039               {
2040                 if (e->indirect_inlining_edge)
2041                   noninlined_indir_cnt += e->count;
2042                 else
2043                   noninlined_cnt += e->count;
2044               }
2045           }
2046         else
2047           {
2048             if (e->speculative)
2049               {
2050                 if (DECL_VIRTUAL_P (e->callee->decl))
2051                   inlined_speculative_ply += e->count;
2052                 else
2053                   inlined_speculative += e->count;
2054               }
2055             else if (DECL_VIRTUAL_P (e->callee->decl))
2056               {
2057                 if (e->indirect_inlining_edge)
2058                   inlined_virt_indir_cnt += e->count;
2059                 else
2060                   inlined_virt_cnt += e->count;
2061               }
2062             else
2063               {
2064                 if (e->indirect_inlining_edge)
2065                   inlined_indir_cnt += e->count;
2066                 else
2067                   inlined_cnt += e->count;
2068               }
2069           }
2070       }
2071     for (e = node->indirect_calls; e; e = e->next_callee)
2072       if (e->indirect_info->polymorphic)
2073         indirect_poly_cnt += e->count;
2074       else
2075         indirect_cnt += e->count;
2076   }
2077   if (max_count)
2078     {
2079       fprintf (dump_file,
2080                "Inlined %"PRId64 " + speculative "
2081                "%"PRId64 " + speculative polymorphic "
2082                "%"PRId64 " + previously indirect "
2083                "%"PRId64 " + virtual "
2084                "%"PRId64 " + virtual and previously indirect "
2085                "%"PRId64 "\n" "Not inlined "
2086                "%"PRId64 " + previously indirect "
2087                "%"PRId64 " + virtual "
2088                "%"PRId64 " + virtual and previously indirect "
2089                "%"PRId64 " + stil indirect "
2090                "%"PRId64 " + still indirect polymorphic "
2091                "%"PRId64 "\n", inlined_cnt,
2092                inlined_speculative, inlined_speculative_ply,
2093                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2094                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2095                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2096       fprintf (dump_file,
2097                "Removed speculations %"PRId64 "\n",
2098                spec_rem);
2099     }
2100   dump_overall_stats ();
2101   fprintf (dump_file, "\nWhy inlining failed?\n");
2102   for (i = 0; i < CIF_N_REASONS; i++)
2103     if (reason[i][2])
2104       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2105                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2106                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2107 }
2108
2109 /* Decide on the inlining.  We do so in the topological order to avoid
2110    expenses on updating data structures.  */
2111
2112 static unsigned int
2113 ipa_inline (void)
2114 {
2115   struct cgraph_node *node;
2116   int nnodes;
2117   struct cgraph_node **order;
2118   int i;
2119   int cold;
2120   bool remove_functions = false;
2121
2122   if (!optimize)
2123     return 0;
2124
2125   cgraph_freq_base_rec = (sreal) 1 / (sreal) CGRAPH_FREQ_BASE;
2126   percent_rec = (sreal) 1 / (sreal) 100;
2127
2128   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2129
2130   if (in_lto_p && optimize)
2131     ipa_update_after_lto_read ();
2132
2133   if (dump_file)
2134     dump_inline_summaries (dump_file);
2135
2136   nnodes = ipa_reverse_postorder (order);
2137
2138   FOR_EACH_FUNCTION (node)
2139     node->aux = 0;
2140
2141   if (dump_file)
2142     fprintf (dump_file, "\nFlattening functions:\n");
2143
2144   /* In the first pass handle functions to be flattened.  Do this with
2145      a priority so none of our later choices will make this impossible.  */
2146   for (i = nnodes - 1; i >= 0; i--)
2147     {
2148       node = order[i];
2149
2150       /* Handle nodes to be flattened.
2151          Ideally when processing callees we stop inlining at the
2152          entry of cycles, possibly cloning that entry point and
2153          try to flatten itself turning it into a self-recursive
2154          function.  */
2155       if (lookup_attribute ("flatten",
2156                             DECL_ATTRIBUTES (node->decl)) != NULL)
2157         {
2158           if (dump_file)
2159             fprintf (dump_file,
2160                      "Flattening %s\n", node->name ());
2161           flatten_function (node, false);
2162         }
2163     }
2164   if (dump_file)
2165     dump_overall_stats ();
2166
2167   inline_small_functions ();
2168
2169   gcc_assert (symtab->state == IPA_SSA);
2170   symtab->state = IPA_SSA_AFTER_INLINING;
2171   /* Do first after-inlining removal.  We want to remove all "stale" extern
2172      inline functions and virtual functions so we really know what is called
2173      once.  */
2174   symtab->remove_unreachable_nodes (dump_file);
2175   free (order);
2176
2177   /* Inline functions with a property that after inlining into all callers the
2178      code size will shrink because the out-of-line copy is eliminated.
2179      We do this regardless on the callee size as long as function growth limits
2180      are met.  */
2181   if (dump_file)
2182     fprintf (dump_file,
2183              "\nDeciding on functions to be inlined into all callers and "
2184              "removing useless speculations:\n");
2185
2186   /* Inlining one function called once has good chance of preventing
2187      inlining other function into the same callee.  Ideally we should
2188      work in priority order, but probably inlining hot functions first
2189      is good cut without the extra pain of maintaining the queue.
2190
2191      ??? this is not really fitting the bill perfectly: inlining function
2192      into callee often leads to better optimization of callee due to
2193      increased context for optimization.
2194      For example if main() function calls a function that outputs help
2195      and then function that does the main optmization, we should inline
2196      the second with priority even if both calls are cold by themselves.
2197
2198      We probably want to implement new predicate replacing our use of
2199      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2200      to be hot.  */
2201   for (cold = 0; cold <= 1; cold ++)
2202     {
2203       FOR_EACH_DEFINED_FUNCTION (node)
2204         {
2205           struct cgraph_edge *edge, *next;
2206           bool update=false;
2207
2208           for (edge = node->callees; edge; edge = next)
2209             {
2210               next = edge->next_callee;
2211               if (edge->speculative && !speculation_useful_p (edge, false))
2212                 {
2213                   edge->resolve_speculation ();
2214                   spec_rem += edge->count;
2215                   update = true;
2216                   remove_functions = true;
2217                 }
2218             }
2219           if (update)
2220             {
2221               struct cgraph_node *where = node->global.inlined_to
2222                                           ? node->global.inlined_to : node;
2223               reset_edge_caches (where);
2224               inline_update_overall_summary (where);
2225             }
2226           if (want_inline_function_to_all_callers_p (node, cold))
2227             {
2228               int num_calls = 0;
2229               node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2230                                                       true);
2231               while (node->call_for_symbol_thunks_and_aliases
2232                        (inline_to_all_callers, &num_calls, true))
2233                 ;
2234               remove_functions = true;
2235             }
2236         }
2237     }
2238
2239   /* Free ipa-prop structures if they are no longer needed.  */
2240   if (optimize)
2241     ipa_free_all_structures_after_iinln ();
2242
2243   if (dump_file)
2244     {
2245       fprintf (dump_file,
2246                "\nInlined %i calls, eliminated %i functions\n\n",
2247                ncalls_inlined, nfunctions_inlined);
2248       dump_inline_stats ();
2249     }
2250
2251   if (dump_file)
2252     dump_inline_summaries (dump_file);
2253   /* In WPA we use inline summaries for partitioning process.  */
2254   if (!flag_wpa)
2255     inline_free_summary ();
2256   return remove_functions ? TODO_remove_functions : 0;
2257 }
2258
2259 /* Inline always-inline function calls in NODE.  */
2260
2261 static bool
2262 inline_always_inline_functions (struct cgraph_node *node)
2263 {
2264   struct cgraph_edge *e;
2265   bool inlined = false;
2266
2267   for (e = node->callees; e; e = e->next_callee)
2268     {
2269       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2270       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2271         continue;
2272
2273       if (e->recursive_p ())
2274         {
2275           if (dump_file)
2276             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2277                      e->callee->name ());
2278           e->inline_failed = CIF_RECURSIVE_INLINING;
2279           continue;
2280         }
2281
2282       if (!can_early_inline_edge_p (e))
2283         {
2284           /* Set inlined to true if the callee is marked "always_inline" but
2285              is not inlinable.  This will allow flagging an error later in
2286              expand_call_inline in tree-inline.c.  */
2287           if (lookup_attribute ("always_inline",
2288                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2289             inlined = true;
2290           continue;
2291         }
2292
2293       if (dump_file)
2294         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2295                  xstrdup_for_dump (e->callee->name ()),
2296                  xstrdup_for_dump (e->caller->name ()));
2297       inline_call (e, true, NULL, NULL, false);
2298       inlined = true;
2299     }
2300   if (inlined)
2301     inline_update_overall_summary (node);
2302
2303   return inlined;
2304 }
2305
2306 /* Decide on the inlining.  We do so in the topological order to avoid
2307    expenses on updating data structures.  */
2308
2309 static bool
2310 early_inline_small_functions (struct cgraph_node *node)
2311 {
2312   struct cgraph_edge *e;
2313   bool inlined = false;
2314
2315   for (e = node->callees; e; e = e->next_callee)
2316     {
2317       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2318       if (!inline_summaries->get (callee)->inlinable
2319           || !e->inline_failed)
2320         continue;
2321
2322       /* Do not consider functions not declared inline.  */
2323       if (!DECL_DECLARED_INLINE_P (callee->decl)
2324           && !opt_for_fn (node->decl, flag_inline_small_functions)
2325           && !opt_for_fn (node->decl, flag_inline_functions))
2326         continue;
2327
2328       if (dump_file)
2329         fprintf (dump_file, "Considering inline candidate %s.\n",
2330                  callee->name ());
2331
2332       if (!can_early_inline_edge_p (e))
2333         continue;
2334
2335       if (e->recursive_p ())
2336         {
2337           if (dump_file)
2338             fprintf (dump_file, "  Not inlining: recursive call.\n");
2339           continue;
2340         }
2341
2342       if (!want_early_inline_function_p (e))
2343         continue;
2344
2345       if (dump_file)
2346         fprintf (dump_file, " Inlining %s into %s.\n",
2347                  xstrdup_for_dump (callee->name ()),
2348                  xstrdup_for_dump (e->caller->name ()));
2349       inline_call (e, true, NULL, NULL, true);
2350       inlined = true;
2351     }
2352
2353   return inlined;
2354 }
2355
2356 unsigned int
2357 early_inliner (function *fun)
2358 {
2359   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2360   struct cgraph_edge *edge;
2361   unsigned int todo = 0;
2362   int iterations = 0;
2363   bool inlined = false;
2364
2365   if (seen_error ())
2366     return 0;
2367
2368   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2369      happens when some pass decides to construct new function and
2370      cgraph_add_new_function calls lowering passes and early optimization on
2371      it.  This may confuse ourself when early inliner decide to inline call to
2372      function clone, because function clones don't have parameter list in
2373      ipa-prop matching their signature.  */
2374   if (ipa_node_params_sum)
2375     return 0;
2376
2377 #ifdef ENABLE_CHECKING
2378   node->verify ();
2379 #endif
2380   node->remove_all_references ();
2381
2382   /* Even when not optimizing or not inlining inline always-inline
2383      functions.  */
2384   inlined = inline_always_inline_functions (node);
2385
2386   if (!optimize
2387       || flag_no_inline
2388       || !flag_early_inlining
2389       /* Never inline regular functions into always-inline functions
2390          during incremental inlining.  This sucks as functions calling
2391          always inline functions will get less optimized, but at the
2392          same time inlining of functions calling always inline
2393          function into an always inline function might introduce
2394          cycles of edges to be always inlined in the callgraph.
2395
2396          We might want to be smarter and just avoid this type of inlining.  */
2397       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2398     ;
2399   else if (lookup_attribute ("flatten",
2400                              DECL_ATTRIBUTES (node->decl)) != NULL)
2401     {
2402       /* When the function is marked to be flattened, recursively inline
2403          all calls in it.  */
2404       if (dump_file)
2405         fprintf (dump_file,
2406                  "Flattening %s\n", node->name ());
2407       flatten_function (node, true);
2408       inlined = true;
2409     }
2410   else
2411     {
2412       /* We iterate incremental inlining to get trivial cases of indirect
2413          inlining.  */
2414       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2415              && early_inline_small_functions (node))
2416         {
2417           timevar_push (TV_INTEGRATION);
2418           todo |= optimize_inline_calls (current_function_decl);
2419
2420           /* Technically we ought to recompute inline parameters so the new
2421              iteration of early inliner works as expected.  We however have
2422              values approximately right and thus we only need to update edge
2423              info that might be cleared out for newly discovered edges.  */
2424           for (edge = node->callees; edge; edge = edge->next_callee)
2425             {
2426               /* We have no summary for new bound store calls yet.  */
2427               if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2428                 {
2429                   struct inline_edge_summary *es = inline_edge_summary (edge);
2430                   es->call_stmt_size
2431                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2432                   es->call_stmt_time
2433                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2434                 }
2435               if (edge->callee->decl
2436                   && !gimple_check_call_matching_types (
2437                       edge->call_stmt, edge->callee->decl, false))
2438                 edge->call_stmt_cannot_inline_p = true;
2439             }
2440           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2441             inline_update_overall_summary (node);
2442           timevar_pop (TV_INTEGRATION);
2443           iterations++;
2444           inlined = false;
2445         }
2446       if (dump_file)
2447         fprintf (dump_file, "Iterations: %i\n", iterations);
2448     }
2449
2450   if (inlined)
2451     {
2452       timevar_push (TV_INTEGRATION);
2453       todo |= optimize_inline_calls (current_function_decl);
2454       timevar_pop (TV_INTEGRATION);
2455     }
2456
2457   fun->always_inline_functions_inlined = true;
2458
2459   return todo;
2460 }
2461
2462 /* Do inlining of small functions.  Doing so early helps profiling and other
2463    passes to be somewhat more effective and avoids some code duplication in
2464    later real inlining pass for testcases with very many function calls.  */
2465
2466 namespace {
2467
2468 const pass_data pass_data_early_inline =
2469 {
2470   GIMPLE_PASS, /* type */
2471   "einline", /* name */
2472   OPTGROUP_INLINE, /* optinfo_flags */
2473   TV_EARLY_INLINING, /* tv_id */
2474   PROP_ssa, /* properties_required */
2475   0, /* properties_provided */
2476   0, /* properties_destroyed */
2477   0, /* todo_flags_start */
2478   0, /* todo_flags_finish */
2479 };
2480
2481 class pass_early_inline : public gimple_opt_pass
2482 {
2483 public:
2484   pass_early_inline (gcc::context *ctxt)
2485     : gimple_opt_pass (pass_data_early_inline, ctxt)
2486   {}
2487
2488   /* opt_pass methods: */
2489   virtual unsigned int execute (function *);
2490
2491 }; // class pass_early_inline
2492
2493 unsigned int
2494 pass_early_inline::execute (function *fun)
2495 {
2496   return early_inliner (fun);
2497 }
2498
2499 } // anon namespace
2500
2501 gimple_opt_pass *
2502 make_pass_early_inline (gcc::context *ctxt)
2503 {
2504   return new pass_early_inline (ctxt);
2505 }
2506
2507 namespace {
2508
2509 const pass_data pass_data_ipa_inline =
2510 {
2511   IPA_PASS, /* type */
2512   "inline", /* name */
2513   OPTGROUP_INLINE, /* optinfo_flags */
2514   TV_IPA_INLINING, /* tv_id */
2515   0, /* properties_required */
2516   0, /* properties_provided */
2517   0, /* properties_destroyed */
2518   0, /* todo_flags_start */
2519   ( TODO_dump_symtab ), /* todo_flags_finish */
2520 };
2521
2522 class pass_ipa_inline : public ipa_opt_pass_d
2523 {
2524 public:
2525   pass_ipa_inline (gcc::context *ctxt)
2526     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2527                       inline_generate_summary, /* generate_summary */
2528                       inline_write_summary, /* write_summary */
2529                       inline_read_summary, /* read_summary */
2530                       NULL, /* write_optimization_summary */
2531                       NULL, /* read_optimization_summary */
2532                       NULL, /* stmt_fixup */
2533                       0, /* function_transform_todo_flags_start */
2534                       inline_transform, /* function_transform */
2535                       NULL) /* variable_transform */
2536   {}
2537
2538   /* opt_pass methods: */
2539   virtual unsigned int execute (function *) { return ipa_inline (); }
2540
2541 }; // class pass_ipa_inline
2542
2543 } // anon namespace
2544
2545 ipa_opt_pass_d *
2546 make_pass_ipa_inline (gcc::context *ctxt)
2547 {
2548   return new pass_ipa_inline (ctxt);
2549 }