gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "fibheap.h"
 106 #include "intl.h"
 107 #include "tree-pass.h"
 108 #include "coverage.h"
 109 #include "rtl.h"
 110 #include "bitmap.h"
 111 #include "profile.h"
 112 #include "predict.h"
 113 #include "vec.h"
 114 #include "hashtab.h"
 115 #include "hash-set.h"
 116 #include "machmode.h"
 117 #include "hard-reg-set.h"
 118 #include "input.h"
 119 #include "function.h"
 120 #include "basic-block.h"
 121 #include "tree-ssa-alias.h"
 122 #include "internal-fn.h"
 123 #include "gimple-expr.h"
 124 #include "is-a.h"
 125 #include "gimple.h"
 126 #include "gimple-ssa.h"
 127 #include "hash-map.h"
 128 #include "plugin-api.h"
 129 #include "ipa-ref.h"
 130 #include "cgraph.h"
 131 #include "alloc-pool.h"
 132 #include "ipa-prop.h"
 133 #include "except.h"
 134 #include "target.h"
 135 #include "ipa-inline.h"
 136 #include "ipa-utils.h"
 137 #include "sreal.h"
 138 #include "auto-profile.h"
 139 #include "cilk.h"
 140 #include "builtins.h"
 141
 142 /* Statistics we collect about inlining algorithm.  */
 143 static int overall_size;
 144 static gcov_type max_count;
 145 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 146 static gcov_type spec_rem;
 147
 148 /* Return false when inlining edge E would lead to violating
 149    limits on function unit growth or stack usage growth.
 150
 151    The relative function body growth limit is present generally
 152    to avoid problems with non-linear behavior of the compiler.
 153    To allow inlining huge functions into tiny wrapper, the limit
 154    is always based on the bigger of the two functions considered.
 155
 156    For stack growth limits we always base the growth in stack usage
 157    of the callers.  We want to prevent applications from segfaulting
 158    on stack overflow when functions with huge stack frames gets
 159    inlined. */
 160
 161 static bool
 162 caller_growth_limits (struct cgraph_edge *e)
 163 {
 164   struct cgraph_node *to = e->caller;
 165   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 166   int newsize;
 167   int limit = 0;
 168   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 169   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 170
 171   /* Look for function e->caller is inlined to.  While doing
 172      so work out the largest function body on the way.  As
 173      described above, we want to base our function growth
 174      limits based on that.  Not on the self size of the
 175      outer function, not on the self size of inline code
 176      we immediately inline to.  This is the most relaxed
 177      interpretation of the rule "do not grow large functions
 178      too much in order to prevent compiler from exploding".  */
 179   while (true)
 180     {
 181       info = inline_summary (to);
 182       if (limit < info->self_size)
 183         limit = info->self_size;
 184       if (stack_size_limit < info->estimated_self_stack_size)
 185         stack_size_limit = info->estimated_self_stack_size;
 186       if (to->global.inlined_to)
 187         to = to->callers->caller;
 188       else
 189         break;
 190     }
 191
 192   what_info = inline_summary (what);
 193
 194   if (limit < what_info->self_size)
 195     limit = what_info->self_size;
 196
 197   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 198
 199   /* Check the size after inlining against the function limits.  But allow
 200      the function to shrink if it went over the limits by forced inlining.  */
 201   newsize = estimate_size_after_inlining (to, e);
 202   if (newsize >= info->size
 203       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 204       && newsize > limit)
 205     {
 206       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 207       return false;
 208     }
 209
 210   if (!what_info->estimated_stack_size)
 211     return true;
 212
 213   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 214      due to large i/o datastructures used by the Fortran front-end.
 215      We ought to ignore this limit when we know that the edge is executed
 216      on every invocation of the caller (i.e. its call statement dominates
 217      exit block).  We do not track this information, yet.  */
 218   stack_size_limit += ((gcov_type)stack_size_limit
 219                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 220
 221   inlined_stack = (outer_info->stack_frame_offset
 222                    + outer_info->estimated_self_stack_size
 223                    + what_info->estimated_stack_size);
 224   /* Check new stack consumption with stack consumption at the place
 225      stack is used.  */
 226   if (inlined_stack > stack_size_limit
 227       /* If function already has large stack usage from sibling
 228          inline call, we can inline, too.
 229          This bit overoptimistically assume that we are good at stack
 230          packing.  */
 231       && inlined_stack > info->estimated_stack_size
 232       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 233     {
 234       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 235       return false;
 236     }
 237   return true;
 238 }
 239
 240 /* Dump info about why inlining has failed.  */
 241
 242 static void
 243 report_inline_failed_reason (struct cgraph_edge *e)
 244 {
 245   if (dump_file)
 246     {
 247       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 248                xstrdup (e->caller->name ()), e->caller->order,
 249                xstrdup (e->callee->name ()), e->callee->order,
 250                cgraph_inline_failed_string (e->inline_failed));
 251     }
 252 }
 253
 254  /* Decide whether sanitizer-related attributes allow inlining. */
 255
 256 static bool
 257 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 258 {
 259   /* Don't care if sanitizer is disabled */
 260   if (!(flag_sanitize & SANITIZE_ADDRESS))
 261     return true;
 262
 263   if (!caller || !callee)
 264     return true;
 265
 266   return !!lookup_attribute ("no_sanitize_address",
 267       DECL_ATTRIBUTES (caller)) ==
 268       !!lookup_attribute ("no_sanitize_address",
 269       DECL_ATTRIBUTES (callee));
 270 }
 271
 272  /* Decide if we can inline the edge and possibly update
 273    inline_failed reason.
 274    We check whether inlining is possible at all and whether
 275    caller growth limits allow doing so.
 276
 277    if REPORT is true, output reason to the dump file.
 278
 279    if DISREGARD_LIMITS is true, ignore size limits.*/
 280
 281 static bool
 282 can_inline_edge_p (struct cgraph_edge *e, bool report,
 283                    bool disregard_limits = false)
 284 {
 285   bool inlinable = true;
 286   enum availability avail;
 287   cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
 288   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 289   tree callee_tree
 290     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 291   struct function *caller_fun = e->caller->get_fun ();
 292   struct function *callee_fun = callee ? callee->get_fun () : NULL;
 293
 294   gcc_assert (e->inline_failed);
 295
 296   if (!callee || !callee->definition)
 297     {
 298       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 299       inlinable = false;
 300     }
 301   else if (callee->calls_comdat_local)
 302     {
 303       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 304       inlinable = false;
 305     }
 306   else if (!inline_summary (callee)->inlinable
 307            || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
 308     {
 309       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 310       inlinable = false;
 311     }
 312   else if (avail <= AVAIL_INTERPOSABLE)
 313     {
 314       e->inline_failed = CIF_OVERWRITABLE;
 315       inlinable = false;
 316     }
 317   else if (e->call_stmt_cannot_inline_p)
 318     {
 319       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 320         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 321       inlinable = false;
 322     }
 323   /* Don't inline if the functions have different EH personalities.  */
 324   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 325            && DECL_FUNCTION_PERSONALITY (callee->decl)
 326            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 327                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 328     {
 329       e->inline_failed = CIF_EH_PERSONALITY;
 330       inlinable = false;
 331     }
 332   /* TM pure functions should not be inlined into non-TM_pure
 333      functions.  */
 334   else if (is_tm_pure (callee->decl)
 335            && !is_tm_pure (e->caller->decl))
 336     {
 337       e->inline_failed = CIF_UNSPECIFIED;
 338       inlinable = false;
 339     }
 340   /* Don't inline if the callee can throw non-call exceptions but the
 341      caller cannot.
 342      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 343      Move the flag into cgraph node or mirror it in the inline summary.  */
 344   else if (callee_fun && callee_fun->can_throw_non_call_exceptions
 345            && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
 346     {
 347       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 348       inlinable = false;
 349     }
 350   /* Check compatibility of target optimization options.  */
 351   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 352                                                 callee->decl))
 353     {
 354       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 355       inlinable = false;
 356     }
 357   /* Don't inline a function with mismatched sanitization attributes. */
 358   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 359     {
 360       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 361       inlinable = false;
 362     }
 363   /* Check if caller growth allows the inlining.  */
 364   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 365            && !disregard_limits
 366            && !lookup_attribute ("flatten",
 367                                  DECL_ATTRIBUTES
 368                                    (e->caller->global.inlined_to
 369                                     ? e->caller->global.inlined_to->decl
 370                                     : e->caller->decl))
 371            && !caller_growth_limits (e))
 372     inlinable = false;
 373   /* Don't inline a function with a higher optimization level than the
 374      caller.  FIXME: this is really just tip of iceberg of handling
 375      optimization attribute.  */
 376   else if (caller_tree != callee_tree)
 377     {
 378       struct cl_optimization *caller_opt
 379         = TREE_OPTIMIZATION ((caller_tree)
 380                              ? caller_tree
 381                              : optimization_default_node);
 382
 383       struct cl_optimization *callee_opt
 384         = TREE_OPTIMIZATION ((callee_tree)
 385                              ? callee_tree
 386                              : optimization_default_node);
 387
 388       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 389            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 390           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 391           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 392         {
 393           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 394           inlinable = false;
 395         }
 396     }
 397
 398   if (!inlinable && report)
 399     report_inline_failed_reason (e);
 400   return inlinable;
 401 }
 402
 403
 404 /* Return true if the edge E is inlinable during early inlining.  */
 405
 406 static bool
 407 can_early_inline_edge_p (struct cgraph_edge *e)
 408 {
 409   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 410   /* Early inliner might get called at WPA stage when IPA pass adds new
 411      function.  In this case we can not really do any of early inlining
 412      because function bodies are missing.  */
 413   if (!gimple_has_body_p (callee->decl))
 414     {
 415       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 416       return false;
 417     }
 418   /* In early inliner some of callees may not be in SSA form yet
 419      (i.e. the callgraph is cyclic and we did not process
 420      the callee by early inliner, yet).  We don't have CIF code for this
 421      case; later we will re-do the decision in the real inliner.  */
 422   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 423       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 424     {
 425       if (dump_file)
 426         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 427       return false;
 428     }
 429   if (!can_inline_edge_p (e, true))
 430     return false;
 431   return true;
 432 }
 433
 434
 435 /* Return number of calls in N.  Ignore cheap builtins.  */
 436
 437 static int
 438 num_calls (struct cgraph_node *n)
 439 {
 440   struct cgraph_edge *e;
 441   int num = 0;
 442
 443   for (e = n->callees; e; e = e->next_callee)
 444     if (!is_inexpensive_builtin (e->callee->decl))
 445       num++;
 446   return num;
 447 }
 448
 449
 450 /* Return true if we are interested in inlining small function.  */
 451
 452 static bool
 453 want_early_inline_function_p (struct cgraph_edge *e)
 454 {
 455   bool want_inline = true;
 456   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 457
 458   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 459     ;
 460   /* For AutoFDO, we need to make sure that before profile annotation, all
 461      hot paths' IR look exactly the same as profiled binary. As a result,
 462      in einliner, we will disregard size limit and inline those callsites
 463      that are:
 464        * inlined in the profiled binary, and
 465        * the cloned callee has enough samples to be considered "hot".  */
 466   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 467     ;
 468   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 469            && !flag_inline_small_functions)
 470     {
 471       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 472       report_inline_failed_reason (e);
 473       want_inline = false;
 474     }
 475   else
 476     {
 477       int growth = estimate_edge_growth (e);
 478       int n;
 479
 480       if (growth <= 0)
 481         ;
 482       else if (!e->maybe_hot_p ()
 483                && growth > 0)
 484         {
 485           if (dump_file)
 486             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 487                      "call is cold and code would grow by %i\n",
 488                      xstrdup (e->caller->name ()),
 489                      e->caller->order,
 490                      xstrdup (callee->name ()), callee->order,
 491                      growth);
 492           want_inline = false;
 493         }
 494       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 495         {
 496           if (dump_file)
 497             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 498                      "growth %i exceeds --param early-inlining-insns\n",
 499                      xstrdup (e->caller->name ()),
 500                      e->caller->order,
 501                      xstrdup (callee->name ()), callee->order,
 502                      growth);
 503           want_inline = false;
 504         }
 505       else if ((n = num_calls (callee)) != 0
 506                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 507         {
 508           if (dump_file)
 509             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 510                      "growth %i exceeds --param early-inlining-insns "
 511                      "divided by number of calls\n",
 512                      xstrdup (e->caller->name ()),
 513                      e->caller->order,
 514                      xstrdup (callee->name ()), callee->order,
 515                      growth);
 516           want_inline = false;
 517         }
 518     }
 519   return want_inline;
 520 }
 521
 522 /* Compute time of the edge->caller + edge->callee execution when inlining
 523    does not happen.  */
 524
 525 inline gcov_type
 526 compute_uninlined_call_time (struct inline_summary *callee_info,
 527                              struct cgraph_edge *edge)
 528 {
 529   gcov_type uninlined_call_time =
 530     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 531           CGRAPH_FREQ_BASE);
 532   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 533                                           ? edge->caller->global.inlined_to
 534                                           : edge->caller)->time;
 535   return uninlined_call_time + caller_time;
 536 }
 537
 538 /* Same as compute_uinlined_call_time but compute time when inlining
 539    does happen.  */
 540
 541 inline gcov_type
 542 compute_inlined_call_time (struct cgraph_edge *edge,
 543                            int edge_time)
 544 {
 545   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 546                                           ? edge->caller->global.inlined_to
 547                                           : edge->caller)->time;
 548   gcov_type time = (caller_time
 549                     + RDIV (((gcov_type) edge_time
 550                              - inline_edge_summary (edge)->call_stmt_time)
 551                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 552   /* Possible one roundoff error, but watch for overflows.  */
 553   gcc_checking_assert (time >= INT_MIN / 2);
 554   if (time < 0)
 555     time = 0;
 556   return time;
 557 }
 558
 559 /* Return true if the speedup for inlining E is bigger than
 560    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 561
 562 static bool
 563 big_speedup_p (struct cgraph_edge *e)
 564 {
 565   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 566                                                 e);
 567   gcov_type inlined_time = compute_inlined_call_time (e,
 568                                                       estimate_edge_time (e));
 569   if (time - inlined_time
 570       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 571     return true;
 572   return false;
 573 }
 574
 575 /* Return true if we are interested in inlining small function.
 576    When REPORT is true, report reason to dump file.  */
 577
 578 static bool
 579 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 580 {
 581   bool want_inline = true;
 582   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 583
 584   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 585     ;
 586   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 587            && !flag_inline_small_functions)
 588     {
 589       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 590       want_inline = false;
 591     }
 592   /* Do fast and conservative check if the function can be good
 593      inline candidate.  At the moment we allow inline hints to
 594      promote non-inline functions to inline and we increase
 595      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 596   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 597            && (!e->count || !e->maybe_hot_p ()))
 598            && inline_summary (callee)->min_size
 599                 - inline_edge_summary (e)->call_stmt_size
 600               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 601     {
 602       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 603       want_inline = false;
 604     }
 605   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 606            && inline_summary (callee)->min_size
 607                 - inline_edge_summary (e)->call_stmt_size
 608               > 16 * MAX_INLINE_INSNS_SINGLE)
 609     {
 610       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 611                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 612                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 613       want_inline = false;
 614     }
 615   else
 616     {
 617       int growth = estimate_edge_growth (e);
 618       inline_hints hints = estimate_edge_hints (e);
 619       bool big_speedup = big_speedup_p (e);
 620
 621       if (growth <= 0)
 622         ;
 623       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 624          hints suggests that inlining given function is very profitable.  */
 625       else if (DECL_DECLARED_INLINE_P (callee->decl)
 626                && growth >= MAX_INLINE_INSNS_SINGLE
 627                && ((!big_speedup
 628                     && !(hints & (INLINE_HINT_indirect_call
 629                                   | INLINE_HINT_known_hot
 630                                   | INLINE_HINT_loop_iterations
 631                                   | INLINE_HINT_array_index
 632                                   | INLINE_HINT_loop_stride)))
 633                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 634         {
 635           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 636           want_inline = false;
 637         }
 638       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 639                && !flag_inline_functions)
 640         {
 641           /* growth_likely_positive is expensive, always test it last.  */
 642           if (growth >= MAX_INLINE_INSNS_SINGLE
 643               || growth_likely_positive (callee, growth))
 644             {
 645               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 646               want_inline = false;
 647             }
 648         }
 649       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 650          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 651          inlining given function is very profitable.  */
 652       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 653                && !big_speedup
 654                && !(hints & INLINE_HINT_known_hot)
 655                && growth >= ((hints & (INLINE_HINT_indirect_call
 656                                        | INLINE_HINT_loop_iterations
 657                                        | INLINE_HINT_array_index
 658                                        | INLINE_HINT_loop_stride))
 659                              ? MAX (MAX_INLINE_INSNS_AUTO,
 660                                     MAX_INLINE_INSNS_SINGLE)
 661                              : MAX_INLINE_INSNS_AUTO))
 662         {
 663           /* growth_likely_positive is expensive, always test it last.  */
 664           if (growth >= MAX_INLINE_INSNS_SINGLE
 665               || growth_likely_positive (callee, growth))
 666             {
 667               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 668               want_inline = false;
 669             }
 670         }
 671       /* If call is cold, do not inline when function body would grow. */
 672       else if (!e->maybe_hot_p ()
 673                && (growth >= MAX_INLINE_INSNS_SINGLE
 674                    || growth_likely_positive (callee, growth)))
 675         {
 676           e->inline_failed = CIF_UNLIKELY_CALL;
 677           want_inline = false;
 678         }
 679     }
 680   if (!want_inline && report)
 681     report_inline_failed_reason (e);
 682   return want_inline;
 683 }
 684
 685 /* EDGE is self recursive edge.
 686    We hand two cases - when function A is inlining into itself
 687    or when function A is being inlined into another inliner copy of function
 688    A within function B.
 689
 690    In first case OUTER_NODE points to the toplevel copy of A, while
 691    in the second case OUTER_NODE points to the outermost copy of A in B.
 692
 693    In both cases we want to be extra selective since
 694    inlining the call will just introduce new recursive calls to appear.  */
 695
 696 static bool
 697 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 698                                    struct cgraph_node *outer_node,
 699                                    bool peeling,
 700                                    int depth)
 701 {
 702   char const *reason = NULL;
 703   bool want_inline = true;
 704   int caller_freq = CGRAPH_FREQ_BASE;
 705   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 706
 707   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 708     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 709
 710   if (!edge->maybe_hot_p ())
 711     {
 712       reason = "recursive call is cold";
 713       want_inline = false;
 714     }
 715   else if (max_count && !outer_node->count)
 716     {
 717       reason = "not executed in profile";
 718       want_inline = false;
 719     }
 720   else if (depth > max_depth)
 721     {
 722       reason = "--param max-inline-recursive-depth exceeded.";
 723       want_inline = false;
 724     }
 725
 726   if (outer_node->global.inlined_to)
 727     caller_freq = outer_node->callers->frequency;
 728
 729   if (!caller_freq)
 730     {
 731       reason = "function is inlined and unlikely";
 732       want_inline = false;
 733     }
 734
 735   if (!want_inline)
 736     ;
 737   /* Inlining of self recursive function into copy of itself within other function
 738      is transformation similar to loop peeling.
 739
 740      Peeling is profitable if we can inline enough copies to make probability
 741      of actual call to the self recursive function very small.  Be sure that
 742      the probability of recursion is small.
 743
 744      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 745      This way the expected number of recision is at most max_depth.  */
 746   else if (peeling)
 747     {
 748       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 749                                          / max_depth);
 750       int i;
 751       for (i = 1; i < depth; i++)
 752         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 753       if (max_count
 754           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 755               >= max_prob))
 756         {
 757           reason = "profile of recursive call is too large";
 758           want_inline = false;
 759         }
 760       if (!max_count
 761           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 762               >= max_prob))
 763         {
 764           reason = "frequency of recursive call is too large";
 765           want_inline = false;
 766         }
 767     }
 768   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 769      depth is large.  We reduce function call overhead and increase chances that
 770      things fit in hardware return predictor.
 771
 772      Recursive inlining might however increase cost of stack frame setup
 773      actually slowing down functions whose recursion tree is wide rather than
 774      deep.
 775
 776      Deciding reliably on when to do recursive inlining without profile feedback
 777      is tricky.  For now we disable recursive inlining when probability of self
 778      recursion is low.
 779
 780      Recursive inlining of self recursive call within loop also results in large loop
 781      depths that generally optimize badly.  We may want to throttle down inlining
 782      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 783      methods.  */
 784   else
 785     {
 786       if (max_count
 787           && (edge->count * 100 / outer_node->count
 788               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 789         {
 790           reason = "profile of recursive call is too small";
 791           want_inline = false;
 792         }
 793       else if (!max_count
 794                && (edge->frequency * 100 / caller_freq
 795                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 796         {
 797           reason = "frequency of recursive call is too small";
 798           want_inline = false;
 799         }
 800     }
 801   if (!want_inline && dump_file)
 802     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 803   return want_inline;
 804 }
 805
 806 /* Return true when NODE has uninlinable caller;
 807    set HAS_HOT_CALL if it has hot call.
 808    Worker for cgraph_for_node_and_aliases.  */
 809
 810 static bool
 811 check_callers (struct cgraph_node *node, void *has_hot_call)
 812 {
 813   struct cgraph_edge *e;
 814    for (e = node->callers; e; e = e->next_caller)
 815      {
 816        if (!can_inline_edge_p (e, true))
 817          return true;
 818        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 819          *(bool *)has_hot_call = true;
 820      }
 821   return false;
 822 }
 823
 824 /* If NODE has a caller, return true.  */
 825
 826 static bool
 827 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 828 {
 829   if (node->callers)
 830     return true;
 831   return false;
 832 }
 833
 834 /* Decide if inlining NODE would reduce unit size by eliminating
 835    the offline copy of function.
 836    When COLD is true the cold calls are considered, too.  */
 837
 838 static bool
 839 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 840 {
 841   bool has_hot_call = false;
 842
 843   if (node->ultimate_alias_target () != node)
 844     return false;
 845   /* Already inlined?  */
 846   if (node->global.inlined_to)
 847     return false;
 848   /* Does it have callers?  */
 849   if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
 850     return false;
 851   /* Inlining into all callers would increase size?  */
 852   if (estimate_growth (node) > 0)
 853     return false;
 854   /* All inlines must be possible.  */
 855   if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call,
 856                                                 true))
 857     return false;
 858   if (!cold && !has_hot_call)
 859     return false;
 860   return true;
 861 }
 862
 863 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 864
 865 /* Return relative time improvement for inlining EDGE in range
 866    1...RELATIVE_TIME_BENEFIT_RANGE  */
 867
 868 static inline int
 869 relative_time_benefit (struct inline_summary *callee_info,
 870                        struct cgraph_edge *edge,
 871                        int edge_time)
 872 {
 873   gcov_type relbenefit;
 874   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 875   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 876
 877   /* Inlining into extern inline function is not a win.  */
 878   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 879                      ? edge->caller->global.inlined_to->decl
 880                      : edge->caller->decl))
 881     return 1;
 882
 883   /* Watch overflows.  */
 884   gcc_checking_assert (uninlined_call_time >= 0);
 885   gcc_checking_assert (inlined_call_time >= 0);
 886   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 887
 888   /* Compute relative time benefit, i.e. how much the call becomes faster.
 889      ??? perhaps computing how much the caller+calle together become faster
 890      would lead to more realistic results.  */
 891   if (!uninlined_call_time)
 892     uninlined_call_time = 1;
 893   relbenefit =
 894     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 895           uninlined_call_time);
 896   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 897   gcc_checking_assert (relbenefit >= 0);
 898   relbenefit = MAX (relbenefit, 1);
 899   return relbenefit;
 900 }
 901
 902
 903 /* A cost model driving the inlining heuristics in a way so the edges with
 904    smallest badness are inlined first.  After each inlining is performed
 905    the costs of all caller edges of nodes affected are recomputed so the
 906    metrics may accurately depend on values such as number of inlinable callers
 907    of the function or function body size.  */
 908
 909 static int
 910 edge_badness (struct cgraph_edge *edge, bool dump)
 911 {
 912   gcov_type badness;
 913   int growth, edge_time;
 914   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 915   struct inline_summary *callee_info = inline_summary (callee);
 916   inline_hints hints;
 917
 918   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 919     return INT_MIN;
 920
 921   growth = estimate_edge_growth (edge);
 922   edge_time = estimate_edge_time (edge);
 923   hints = estimate_edge_hints (edge);
 924   gcc_checking_assert (edge_time >= 0);
 925   gcc_checking_assert (edge_time <= callee_info->time);
 926   gcc_checking_assert (growth <= callee_info->size);
 927
 928   if (dump)
 929     {
 930       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 931                xstrdup (edge->caller->name ()),
 932                edge->caller->order,
 933                xstrdup (callee->name ()),
 934                edge->callee->order);
 935       fprintf (dump_file, "      size growth %i, time %i ",
 936                growth,
 937                edge_time);
 938       dump_inline_hints (dump_file, hints);
 939       if (big_speedup_p (edge))
 940         fprintf (dump_file, " big_speedup");
 941       fprintf (dump_file, "\n");
 942     }
 943
 944   /* Always prefer inlining saving code size.  */
 945   if (growth <= 0)
 946     {
 947       badness = INT_MIN / 2 + growth;
 948       if (dump)
 949         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 950                  growth);
 951     }
 952
 953   /* When profiling is available, compute badness as:
 954
 955                 relative_edge_count * relative_time_benefit
 956      goodness = -------------------------------------------
 957                 growth_f_caller
 958      badness = -goodness
 959
 960     The fraction is upside down, because on edge counts and time beneits
 961     the bounds are known. Edge growth is essentially unlimited.  */
 962
 963   else if (max_count)
 964     {
 965       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 966       /* Capping edge->count to max_count. edge->count can be larger than
 967          max_count if an inline adds new edges which increase max_count
 968          after max_count is computed.  */
 969       gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
 970
 971       sreal relbenefit_real (relbenefit, 0);
 972       sreal growth_real (growth, 0);
 973
 974       /* relative_edge_count.  */
 975       sreal tmp (edge_count, 0);
 976       tmp /= max_count_real;
 977
 978       /* relative_time_benefit.  */
 979       tmp *= relbenefit_real;
 980       tmp /= max_relbenefit_real;
 981
 982       /* growth_f_caller.  */
 983       tmp *= half_int_min_real;
 984       tmp /=  growth_real;
 985
 986       badness = -1 * tmp.to_int ();
 987
 988       if (dump)
 989         {
 990           fprintf (dump_file,
 991                    "      %i (relative %f): profile info. Relative count %f%s"
 992                    " * Relative benefit %f\n",
 993                    (int) badness, (double) badness / INT_MIN,
 994                    (double) edge_count / max_count,
 995                    edge->count > max_count ? " (capped to max_count)" : "",
 996                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 997         }
 998     }
 999
1000   /* When function local profile is available. Compute badness as:
1001
1002                  relative_time_benefit
1003      goodness =  ---------------------------------
1004                  growth_of_caller * overall_growth
1005
1006      badness = - goodness
1007
1008      compensated by the inline hints.
1009   */
1010   else if (flag_guess_branch_prob)
1011     {
1012       badness = (relative_time_benefit (callee_info, edge, edge_time)
1013                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
1014       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
1015       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
1016       if ((hints & (INLINE_HINT_indirect_call
1017                     | INLINE_HINT_loop_iterations
1018                     | INLINE_HINT_array_index
1019                     | INLINE_HINT_loop_stride))
1020           || callee_info->growth <= 0)
1021         badness *= 8;
1022       if (hints & (INLINE_HINT_same_scc))
1023         badness /= 16;
1024       else if (hints & (INLINE_HINT_in_scc))
1025         badness /= 8;
1026       else if (hints & (INLINE_HINT_cross_module))
1027         badness /= 2;
1028       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1029       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1030         badness *= 16;
1031       if (dump)
1032         {
1033           fprintf (dump_file,
1034                    "      %i: guessed profile. frequency %f,"
1035                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
1036                    " overall growth %i (current) %i (original)\n",
1037                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
1038                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
1039                    / RELATIVE_TIME_BENEFIT_RANGE,
1040                    (int)compute_uninlined_call_time (callee_info, edge),
1041                    (int)compute_inlined_call_time (edge, edge_time),
1042                    estimate_growth (callee),
1043                    callee_info->growth);
1044         }
1045     }
1046   /* When function local profile is not available or it does not give
1047      useful information (ie frequency is zero), base the cost on
1048      loop nest and overall size growth, so we optimize for overall number
1049      of functions fully inlined in program.  */
1050   else
1051     {
1052       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1053       badness = growth * 256;
1054
1055       /* Decrease badness if call is nested.  */
1056       if (badness > 0)
1057         badness >>= nest;
1058       else
1059         {
1060           badness <<= nest;
1061         }
1062       if (dump)
1063         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
1064                  nest);
1065     }
1066
1067   /* Ensure that we did not overflow in all the fixed point math above.  */
1068   gcc_assert (badness >= INT_MIN);
1069   gcc_assert (badness <= INT_MAX - 1);
1070   /* Make recursive inlining happen always after other inlining is done.  */
1071   if (edge->recursive_p ())
1072     return badness + 1;
1073   else
1074     return badness;
1075 }
1076
1077 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1078 static inline void
1079 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1080 {
1081   int badness = edge_badness (edge, false);
1082   if (edge->aux)
1083     {
1084       fibnode_t n = (fibnode_t) edge->aux;
1085       gcc_checking_assert (n->data == edge);
1086
1087       /* fibheap_replace_key only decrease the keys.
1088          When we increase the key we do not update heap
1089          and instead re-insert the element once it becomes
1090          a minimum of heap.  */
1091       if (badness < n->key)
1092         {
1093           if (dump_file && (dump_flags & TDF_DETAILS))
1094             {
1095               fprintf (dump_file,
1096                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1097                        xstrdup (edge->caller->name ()),
1098                        edge->caller->order,
1099                        xstrdup (edge->callee->name ()),
1100                        edge->callee->order,
1101                        (int)n->key,
1102                        badness);
1103             }
1104           fibheap_replace_key (heap, n, badness);
1105           gcc_checking_assert (n->key == badness);
1106         }
1107     }
1108   else
1109     {
1110        if (dump_file && (dump_flags & TDF_DETAILS))
1111          {
1112            fprintf (dump_file,
1113                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1114                     xstrdup (edge->caller->name ()),
1115                     edge->caller->order,
1116                     xstrdup (edge->callee->name ()),
1117                     edge->callee->order,
1118                     badness);
1119          }
1120       edge->aux = fibheap_insert (heap, badness, edge);
1121     }
1122 }
1123
1124
1125 /* NODE was inlined.
1126    All caller edges needs to be resetted because
1127    size estimates change. Similarly callees needs reset
1128    because better context may be known.  */
1129
1130 static void
1131 reset_edge_caches (struct cgraph_node *node)
1132 {
1133   struct cgraph_edge *edge;
1134   struct cgraph_edge *e = node->callees;
1135   struct cgraph_node *where = node;
1136   struct ipa_ref *ref;
1137
1138   if (where->global.inlined_to)
1139     where = where->global.inlined_to;
1140
1141   /* WHERE body size has changed, the cached growth is invalid.  */
1142   reset_node_growth_cache (where);
1143
1144   for (edge = where->callers; edge; edge = edge->next_caller)
1145     if (edge->inline_failed)
1146       reset_edge_growth_cache (edge);
1147
1148   FOR_EACH_ALIAS (where, ref)
1149     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1150
1151   if (!e)
1152     return;
1153
1154   while (true)
1155     if (!e->inline_failed && e->callee->callees)
1156       e = e->callee->callees;
1157     else
1158       {
1159         if (e->inline_failed)
1160           reset_edge_growth_cache (e);
1161         if (e->next_callee)
1162           e = e->next_callee;
1163         else
1164           {
1165             do
1166               {
1167                 if (e->caller == node)
1168                   return;
1169                 e = e->caller->callers;
1170               }
1171             while (!e->next_callee);
1172             e = e->next_callee;
1173           }
1174       }
1175 }
1176
1177 /* Recompute HEAP nodes for each of caller of NODE.
1178    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1179    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1180    it is inlinable. Otherwise check all edges.  */
1181
1182 static void
1183 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1184                     bitmap updated_nodes,
1185                     struct cgraph_edge *check_inlinablity_for)
1186 {
1187   struct cgraph_edge *edge;
1188   struct ipa_ref *ref;
1189
1190   if ((!node->alias && !inline_summary (node)->inlinable)
1191       || node->global.inlined_to)
1192     return;
1193   if (!bitmap_set_bit (updated_nodes, node->uid))
1194     return;
1195
1196   FOR_EACH_ALIAS (node, ref)
1197     {
1198       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1199       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1200     }
1201
1202   for (edge = node->callers; edge; edge = edge->next_caller)
1203     if (edge->inline_failed)
1204       {
1205         if (!check_inlinablity_for
1206             || check_inlinablity_for == edge)
1207           {
1208             if (can_inline_edge_p (edge, false)
1209                 && want_inline_small_function_p (edge, false))
1210               update_edge_key (heap, edge);
1211             else if (edge->aux)
1212               {
1213                 report_inline_failed_reason (edge);
1214                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1215                 edge->aux = NULL;
1216               }
1217           }
1218         else if (edge->aux)
1219           update_edge_key (heap, edge);
1220       }
1221 }
1222
1223 /* Recompute HEAP nodes for each uninlined call in NODE.
1224    This is used when we know that edge badnesses are going only to increase
1225    (we introduced new call site) and thus all we need is to insert newly
1226    created edges into heap.  */
1227
1228 static void
1229 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1230                     bitmap updated_nodes)
1231 {
1232   struct cgraph_edge *e = node->callees;
1233
1234   if (!e)
1235     return;
1236   while (true)
1237     if (!e->inline_failed && e->callee->callees)
1238       e = e->callee->callees;
1239     else
1240       {
1241         enum availability avail;
1242         struct cgraph_node *callee;
1243         /* We do not reset callee growth cache here.  Since we added a new call,
1244            growth chould have just increased and consequentely badness metric
1245            don't need updating.  */
1246         if (e->inline_failed
1247             && (callee = e->callee->ultimate_alias_target (&avail))
1248             && inline_summary (callee)->inlinable
1249             && avail >= AVAIL_AVAILABLE
1250             && !bitmap_bit_p (updated_nodes, callee->uid))
1251           {
1252             if (can_inline_edge_p (e, false)
1253                 && want_inline_small_function_p (e, false))
1254               update_edge_key (heap, e);
1255             else if (e->aux)
1256               {
1257                 report_inline_failed_reason (e);
1258                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1259                 e->aux = NULL;
1260               }
1261           }
1262         if (e->next_callee)
1263           e = e->next_callee;
1264         else
1265           {
1266             do
1267               {
1268                 if (e->caller == node)
1269                   return;
1270                 e = e->caller->callers;
1271               }
1272             while (!e->next_callee);
1273             e = e->next_callee;
1274           }
1275       }
1276 }
1277
1278 /* Enqueue all recursive calls from NODE into priority queue depending on
1279    how likely we want to recursively inline the call.  */
1280
1281 static void
1282 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1283                         fibheap_t heap)
1284 {
1285   struct cgraph_edge *e;
1286   enum availability avail;
1287
1288   for (e = where->callees; e; e = e->next_callee)
1289     if (e->callee == node
1290         || (e->callee->ultimate_alias_target (&avail) == node
1291             && avail > AVAIL_INTERPOSABLE))
1292       {
1293         /* When profile feedback is available, prioritize by expected number
1294            of calls.  */
1295         fibheap_insert (heap,
1296                         !max_count ? -e->frequency
1297                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1298                         e);
1299       }
1300   for (e = where->callees; e; e = e->next_callee)
1301     if (!e->inline_failed)
1302       lookup_recursive_calls (node, e->callee, heap);
1303 }
1304
1305 /* Decide on recursive inlining: in the case function has recursive calls,
1306    inline until body size reaches given argument.  If any new indirect edges
1307    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1308    is NULL.  */
1309
1310 static bool
1311 recursive_inlining (struct cgraph_edge *edge,
1312                     vec<cgraph_edge *> *new_edges)
1313 {
1314   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1315   fibheap_t heap;
1316   struct cgraph_node *node;
1317   struct cgraph_edge *e;
1318   struct cgraph_node *master_clone = NULL, *next;
1319   int depth = 0;
1320   int n = 0;
1321
1322   node = edge->caller;
1323   if (node->global.inlined_to)
1324     node = node->global.inlined_to;
1325
1326   if (DECL_DECLARED_INLINE_P (node->decl))
1327     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1328
1329   /* Make sure that function is small enough to be considered for inlining.  */
1330   if (estimate_size_after_inlining (node, edge)  >= limit)
1331     return false;
1332   heap = fibheap_new ();
1333   lookup_recursive_calls (node, node, heap);
1334   if (fibheap_empty (heap))
1335     {
1336       fibheap_delete (heap);
1337       return false;
1338     }
1339
1340   if (dump_file)
1341     fprintf (dump_file,
1342              "  Performing recursive inlining on %s\n",
1343              node->name ());
1344
1345   /* Do the inlining and update list of recursive call during process.  */
1346   while (!fibheap_empty (heap))
1347     {
1348       struct cgraph_edge *curr
1349         = (struct cgraph_edge *) fibheap_extract_min (heap);
1350       struct cgraph_node *cnode, *dest = curr->callee;
1351
1352       if (!can_inline_edge_p (curr, true))
1353         continue;
1354
1355       /* MASTER_CLONE is produced in the case we already started modified
1356          the function. Be sure to redirect edge to the original body before
1357          estimating growths otherwise we will be seeing growths after inlining
1358          the already modified body.  */
1359       if (master_clone)
1360         {
1361           curr->redirect_callee (master_clone);
1362           reset_edge_growth_cache (curr);
1363         }
1364
1365       if (estimate_size_after_inlining (node, curr) > limit)
1366         {
1367           curr->redirect_callee (dest);
1368           reset_edge_growth_cache (curr);
1369           break;
1370         }
1371
1372       depth = 1;
1373       for (cnode = curr->caller;
1374            cnode->global.inlined_to; cnode = cnode->callers->caller)
1375         if (node->decl
1376             == curr->callee->ultimate_alias_target ()->decl)
1377           depth++;
1378
1379       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1380         {
1381           curr->redirect_callee (dest);
1382           reset_edge_growth_cache (curr);
1383           continue;
1384         }
1385
1386       if (dump_file)
1387         {
1388           fprintf (dump_file,
1389                    "   Inlining call of depth %i", depth);
1390           if (node->count)
1391             {
1392               fprintf (dump_file, " called approx. %.2f times per call",
1393                        (double)curr->count / node->count);
1394             }
1395           fprintf (dump_file, "\n");
1396         }
1397       if (!master_clone)
1398         {
1399           /* We need original clone to copy around.  */
1400           master_clone = node->create_clone (node->decl, node->count,
1401             CGRAPH_FREQ_BASE, false, vNULL,
1402             true, NULL, NULL);
1403           for (e = master_clone->callees; e; e = e->next_callee)
1404             if (!e->inline_failed)
1405               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1406           curr->redirect_callee (master_clone);
1407           reset_edge_growth_cache (curr);
1408         }
1409
1410       inline_call (curr, false, new_edges, &overall_size, true);
1411       lookup_recursive_calls (node, curr->callee, heap);
1412       n++;
1413     }
1414
1415   if (!fibheap_empty (heap) && dump_file)
1416     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1417   fibheap_delete (heap);
1418
1419   if (!master_clone)
1420     return false;
1421
1422   if (dump_file)
1423     fprintf (dump_file,
1424              "\n   Inlined %i times, "
1425              "body grown from size %i to %i, time %i to %i\n", n,
1426              inline_summary (master_clone)->size, inline_summary (node)->size,
1427              inline_summary (master_clone)->time, inline_summary (node)->time);
1428
1429   /* Remove master clone we used for inlining.  We rely that clones inlined
1430      into master clone gets queued just before master clone so we don't
1431      need recursion.  */
1432   for (node = symtab->first_function (); node != master_clone;
1433        node = next)
1434     {
1435       next = symtab->next_function (node);
1436       if (node->global.inlined_to == master_clone)
1437         node->remove ();
1438     }
1439   master_clone->remove ();
1440   return true;
1441 }
1442
1443
1444 /* Given whole compilation unit estimate of INSNS, compute how large we can
1445    allow the unit to grow.  */
1446
1447 static int
1448 compute_max_insns (int insns)
1449 {
1450   int max_insns = insns;
1451   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1452     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1453
1454   return ((int64_t) max_insns
1455           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1456 }
1457
1458
1459 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1460
1461 static void
1462 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge *> new_edges)
1463 {
1464   while (new_edges.length () > 0)
1465     {
1466       struct cgraph_edge *edge = new_edges.pop ();
1467
1468       gcc_assert (!edge->aux);
1469       if (edge->inline_failed
1470           && can_inline_edge_p (edge, true)
1471           && want_inline_small_function_p (edge, true))
1472         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1473     }
1474 }
1475
1476 /* Remove EDGE from the fibheap.  */
1477
1478 static void
1479 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1480 {
1481   if (e->callee)
1482     reset_node_growth_cache (e->callee);
1483   if (e->aux)
1484     {
1485       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1486       e->aux = NULL;
1487     }
1488 }
1489
1490 /* Return true if speculation of edge E seems useful.
1491    If ANTICIPATE_INLINING is true, be conservative and hope that E
1492    may get inlined.  */
1493
1494 bool
1495 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1496 {
1497   enum availability avail;
1498   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1499   struct cgraph_edge *direct, *indirect;
1500   struct ipa_ref *ref;
1501
1502   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1503
1504   if (!e->maybe_hot_p ())
1505     return false;
1506
1507   /* See if IP optimizations found something potentially useful about the
1508      function.  For now we look only for CONST/PURE flags.  Almost everything
1509      else we propagate is useless.  */
1510   if (avail >= AVAIL_AVAILABLE)
1511     {
1512       int ecf_flags = flags_from_decl_or_type (target->decl);
1513       if (ecf_flags & ECF_CONST)
1514         {
1515           e->speculative_call_info (direct, indirect, ref);
1516           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1517             return true;
1518         }
1519       else if (ecf_flags & ECF_PURE)
1520         {
1521           e->speculative_call_info (direct, indirect, ref);
1522           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1523             return true;
1524         }
1525     }
1526   /* If we did not managed to inline the function nor redirect
1527      to an ipa-cp clone (that are seen by having local flag set),
1528      it is probably pointless to inline it unless hardware is missing
1529      indirect call predictor.  */
1530   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1531     return false;
1532   /* For overwritable targets there is not much to do.  */
1533   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1534     return false;
1535   /* OK, speculation seems interesting.  */
1536   return true;
1537 }
1538
1539 /* We know that EDGE is not going to be inlined.
1540    See if we can remove speculation.  */
1541
1542 static void
1543 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1544 {
1545   if (edge->speculative && !speculation_useful_p (edge, false))
1546     {
1547       struct cgraph_node *node = edge->caller;
1548       struct cgraph_node *where = node->global.inlined_to
1549                                   ? node->global.inlined_to : node;
1550       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1551
1552       spec_rem += edge->count;
1553       edge->resolve_speculation ();
1554       reset_edge_caches (where);
1555       inline_update_overall_summary (where);
1556       update_caller_keys (edge_heap, where,
1557                           updated_nodes, NULL);
1558       update_callee_keys (edge_heap, where,
1559                           updated_nodes);
1560       BITMAP_FREE (updated_nodes);
1561     }
1562 }
1563
1564 /* We use greedy algorithm for inlining of small functions:
1565    All inline candidates are put into prioritized heap ordered in
1566    increasing badness.
1567
1568    The inlining of small functions is bounded by unit growth parameters.  */
1569
1570 static void
1571 inline_small_functions (void)
1572 {
1573   struct cgraph_node *node;
1574   struct cgraph_edge *edge;
1575   fibheap_t edge_heap = fibheap_new ();
1576   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1577   int min_size, max_size;
1578   auto_vec<cgraph_edge *> new_indirect_edges;
1579   int initial_size = 0;
1580   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1581   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1582   if (flag_indirect_inlining)
1583     new_indirect_edges.create (8);
1584
1585   edge_removal_hook_holder
1586     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1587
1588   /* Compute overall unit size and other global parameters used by badness
1589      metrics.  */
1590
1591   max_count = 0;
1592   ipa_reduced_postorder (order, true, true, NULL);
1593   free (order);
1594
1595   FOR_EACH_DEFINED_FUNCTION (node)
1596     if (!node->global.inlined_to)
1597       {
1598         if (node->has_gimple_body_p ()
1599             || node->thunk.thunk_p)
1600           {
1601             struct inline_summary *info = inline_summary (node);
1602             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1603
1604             /* Do not account external functions, they will be optimized out
1605                if not inlined.  Also only count the non-cold portion of program.  */
1606             if (!DECL_EXTERNAL (node->decl)
1607                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1608               initial_size += info->size;
1609             info->growth = estimate_growth (node);
1610             if (dfs && dfs->next_cycle)
1611               {
1612                 struct cgraph_node *n2;
1613                 int id = dfs->scc_no + 1;
1614                 for (n2 = node; n2;
1615                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1616                   {
1617                     struct inline_summary *info2 = inline_summary (n2);
1618                     if (info2->scc_no)
1619                       break;
1620                     info2->scc_no = id;
1621                   }
1622               }
1623           }
1624
1625         for (edge = node->callers; edge; edge = edge->next_caller)
1626           if (max_count < edge->count)
1627             max_count = edge->count;
1628       }
1629   max_count_real = sreal (max_count, 0);
1630   max_relbenefit_real = sreal (RELATIVE_TIME_BENEFIT_RANGE, 0);
1631   half_int_min_real = sreal (INT_MAX / 2, 0);
1632   ipa_free_postorder_info ();
1633   initialize_growth_caches ();
1634
1635   if (dump_file)
1636     fprintf (dump_file,
1637              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1638              initial_size);
1639
1640   overall_size = initial_size;
1641   max_size = compute_max_insns (overall_size);
1642   min_size = overall_size;
1643
1644   /* Populate the heap with all edges we might inline.  */
1645
1646   FOR_EACH_DEFINED_FUNCTION (node)
1647     {
1648       bool update = false;
1649       struct cgraph_edge *next;
1650
1651       if (dump_file)
1652         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1653                  node->name (), node->order);
1654
1655       for (edge = node->callees; edge; edge = next)
1656         {
1657           next = edge->next_callee;
1658           if (edge->inline_failed
1659               && !edge->aux
1660               && can_inline_edge_p (edge, true)
1661               && want_inline_small_function_p (edge, true)
1662               && edge->inline_failed)
1663             {
1664               gcc_assert (!edge->aux);
1665               update_edge_key (edge_heap, edge);
1666             }
1667           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1668             {
1669               edge->resolve_speculation ();
1670               update = true;
1671             }
1672         }
1673       if (update)
1674         {
1675           struct cgraph_node *where = node->global.inlined_to
1676                                       ? node->global.inlined_to : node;
1677           inline_update_overall_summary (where);
1678           reset_node_growth_cache (where);
1679           reset_edge_caches (where);
1680           update_caller_keys (edge_heap, where,
1681                               updated_nodes, NULL);
1682           bitmap_clear (updated_nodes);
1683         }
1684     }
1685
1686   gcc_assert (in_lto_p
1687               || !max_count
1688               || (profile_info && flag_branch_probabilities));
1689
1690   while (!fibheap_empty (edge_heap))
1691     {
1692       int old_size = overall_size;
1693       struct cgraph_node *where, *callee;
1694       int badness = fibheap_min_key (edge_heap);
1695       int current_badness;
1696       int cached_badness;
1697       int growth;
1698
1699       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1700       gcc_assert (edge->aux);
1701       edge->aux = NULL;
1702       if (!edge->inline_failed || !edge->callee->analyzed)
1703         continue;
1704
1705       /* Be sure that caches are maintained consistent.
1706          We can not make this ENABLE_CHECKING only because it cause different
1707          updates of the fibheap queue.  */
1708       cached_badness = edge_badness (edge, false);
1709       reset_edge_growth_cache (edge);
1710       reset_node_growth_cache (edge->callee);
1711
1712       /* When updating the edge costs, we only decrease badness in the keys.
1713          Increases of badness are handled lazilly; when we see key with out
1714          of date value on it, we re-insert it now.  */
1715       current_badness = edge_badness (edge, false);
1716       gcc_assert (cached_badness == current_badness);
1717       gcc_assert (current_badness >= badness);
1718       if (current_badness != badness)
1719         {
1720           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1721           continue;
1722         }
1723
1724       if (!can_inline_edge_p (edge, true))
1725         {
1726           resolve_noninline_speculation (edge_heap, edge);
1727           continue;
1728         }
1729
1730       callee = edge->callee->ultimate_alias_target ();
1731       growth = estimate_edge_growth (edge);
1732       if (dump_file)
1733         {
1734           fprintf (dump_file,
1735                    "\nConsidering %s/%i with %i size\n",
1736                    callee->name (), callee->order,
1737                    inline_summary (callee)->size);
1738           fprintf (dump_file,
1739                    " to be inlined into %s/%i in %s:%i\n"
1740                    " Estimated badness is %i, frequency %.2f.\n",
1741                    edge->caller->name (), edge->caller->order,
1742                    flag_wpa ? "unknown"
1743                    : gimple_filename ((const_gimple) edge->call_stmt),
1744                    flag_wpa ? -1
1745                    : gimple_lineno ((const_gimple) edge->call_stmt),
1746                    badness,
1747                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1748           if (edge->count)
1749             fprintf (dump_file," Called %"PRId64"x\n",
1750                      edge->count);
1751           if (dump_flags & TDF_DETAILS)
1752             edge_badness (edge, true);
1753         }
1754
1755       if (overall_size + growth > max_size
1756           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1757         {
1758           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1759           report_inline_failed_reason (edge);
1760           resolve_noninline_speculation (edge_heap, edge);
1761           continue;
1762         }
1763
1764       if (!want_inline_small_function_p (edge, true))
1765         {
1766           resolve_noninline_speculation (edge_heap, edge);
1767           continue;
1768         }
1769
1770       /* Heuristics for inlining small functions work poorly for
1771          recursive calls where we do effects similar to loop unrolling.
1772          When inlining such edge seems profitable, leave decision on
1773          specific inliner.  */
1774       if (edge->recursive_p ())
1775         {
1776           where = edge->caller;
1777           if (where->global.inlined_to)
1778             where = where->global.inlined_to;
1779           if (!recursive_inlining (edge,
1780                                    flag_indirect_inlining
1781                                    ? &new_indirect_edges : NULL))
1782             {
1783               edge->inline_failed = CIF_RECURSIVE_INLINING;
1784               resolve_noninline_speculation (edge_heap, edge);
1785               continue;
1786             }
1787           reset_edge_caches (where);
1788           /* Recursive inliner inlines all recursive calls of the function
1789              at once. Consequently we need to update all callee keys.  */
1790           if (flag_indirect_inlining)
1791             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1792           update_callee_keys (edge_heap, where, updated_nodes);
1793           bitmap_clear (updated_nodes);
1794         }
1795       else
1796         {
1797           struct cgraph_node *outer_node = NULL;
1798           int depth = 0;
1799
1800           /* Consider the case where self recursive function A is inlined
1801              into B.  This is desired optimization in some cases, since it
1802              leads to effect similar of loop peeling and we might completely
1803              optimize out the recursive call.  However we must be extra
1804              selective.  */
1805
1806           where = edge->caller;
1807           while (where->global.inlined_to)
1808             {
1809               if (where->decl == callee->decl)
1810                 outer_node = where, depth++;
1811               where = where->callers->caller;
1812             }
1813           if (outer_node
1814               && !want_inline_self_recursive_call_p (edge, outer_node,
1815                                                      true, depth))
1816             {
1817               edge->inline_failed
1818                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1819                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1820               resolve_noninline_speculation (edge_heap, edge);
1821               continue;
1822             }
1823           else if (depth && dump_file)
1824             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1825
1826           gcc_checking_assert (!callee->global.inlined_to);
1827           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1828           if (flag_indirect_inlining)
1829             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1830
1831           reset_edge_caches (edge->callee);
1832           reset_node_growth_cache (callee);
1833
1834           update_callee_keys (edge_heap, where, updated_nodes);
1835         }
1836       where = edge->caller;
1837       if (where->global.inlined_to)
1838         where = where->global.inlined_to;
1839
1840       /* Our profitability metric can depend on local properties
1841          such as number of inlinable calls and size of the function body.
1842          After inlining these properties might change for the function we
1843          inlined into (since it's body size changed) and for the functions
1844          called by function we inlined (since number of it inlinable callers
1845          might change).  */
1846       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1847       bitmap_clear (updated_nodes);
1848
1849       if (dump_file)
1850         {
1851           fprintf (dump_file,
1852                    " Inlined into %s which now has time %i and size %i,"
1853                    "net change of %+i.\n",
1854                    edge->caller->name (),
1855                    inline_summary (edge->caller)->time,
1856                    inline_summary (edge->caller)->size,
1857                    overall_size - old_size);
1858         }
1859       if (min_size > overall_size)
1860         {
1861           min_size = overall_size;
1862           max_size = compute_max_insns (min_size);
1863
1864           if (dump_file)
1865             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1866         }
1867     }
1868
1869   free_growth_caches ();
1870   fibheap_delete (edge_heap);
1871   if (dump_file)
1872     fprintf (dump_file,
1873              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1874              initial_size, overall_size,
1875              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1876   BITMAP_FREE (updated_nodes);
1877   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1878 }
1879
1880 /* Flatten NODE.  Performed both during early inlining and
1881    at IPA inlining time.  */
1882
1883 static void
1884 flatten_function (struct cgraph_node *node, bool early)
1885 {
1886   struct cgraph_edge *e;
1887
1888   /* We shouldn't be called recursively when we are being processed.  */
1889   gcc_assert (node->aux == NULL);
1890
1891   node->aux = (void *) node;
1892
1893   for (e = node->callees; e; e = e->next_callee)
1894     {
1895       struct cgraph_node *orig_callee;
1896       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1897
1898       /* We've hit cycle?  It is time to give up.  */
1899       if (callee->aux)
1900         {
1901           if (dump_file)
1902             fprintf (dump_file,
1903                      "Not inlining %s into %s to avoid cycle.\n",
1904                      xstrdup (callee->name ()),
1905                      xstrdup (e->caller->name ()));
1906           e->inline_failed = CIF_RECURSIVE_INLINING;
1907           continue;
1908         }
1909
1910       /* When the edge is already inlined, we just need to recurse into
1911          it in order to fully flatten the leaves.  */
1912       if (!e->inline_failed)
1913         {
1914           flatten_function (callee, early);
1915           continue;
1916         }
1917
1918       /* Flatten attribute needs to be processed during late inlining. For
1919          extra code quality we however do flattening during early optimization,
1920          too.  */
1921       if (!early
1922           ? !can_inline_edge_p (e, true)
1923           : !can_early_inline_edge_p (e))
1924         continue;
1925
1926       if (e->recursive_p ())
1927         {
1928           if (dump_file)
1929             fprintf (dump_file, "Not inlining: recursive call.\n");
1930           continue;
1931         }
1932
1933       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1934           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1935         {
1936           if (dump_file)
1937             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1938           continue;
1939         }
1940
1941       /* Inline the edge and flatten the inline clone.  Avoid
1942          recursing through the original node if the node was cloned.  */
1943       if (dump_file)
1944         fprintf (dump_file, " Inlining %s into %s.\n",
1945                  xstrdup (callee->name ()),
1946                  xstrdup (e->caller->name ()));
1947       orig_callee = callee;
1948       inline_call (e, true, NULL, NULL, false);
1949       if (e->callee != orig_callee)
1950         orig_callee->aux = (void *) node;
1951       flatten_function (e->callee, early);
1952       if (e->callee != orig_callee)
1953         orig_callee->aux = NULL;
1954     }
1955
1956   node->aux = NULL;
1957   if (!node->global.inlined_to)
1958     inline_update_overall_summary (node);
1959 }
1960
1961 /* Count number of callers of NODE and store it into DATA (that
1962    points to int.  Worker for cgraph_for_node_and_aliases.  */
1963
1964 static bool
1965 sum_callers (struct cgraph_node *node, void *data)
1966 {
1967   struct cgraph_edge *e;
1968   int *num_calls = (int *)data;
1969
1970   for (e = node->callers; e; e = e->next_caller)
1971     (*num_calls)++;
1972   return false;
1973 }
1974
1975 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1976    DATA points to number of calls originally found so we avoid infinite
1977    recursion.  */
1978
1979 static bool
1980 inline_to_all_callers (struct cgraph_node *node, void *data)
1981 {
1982   int *num_calls = (int *)data;
1983   bool callee_removed = false;
1984
1985   while (node->callers && !node->global.inlined_to)
1986     {
1987       struct cgraph_node *caller = node->callers->caller;
1988
1989       if (dump_file)
1990         {
1991           fprintf (dump_file,
1992                    "\nInlining %s size %i.\n",
1993                    node->name (),
1994                    inline_summary (node)->size);
1995           fprintf (dump_file,
1996                    " Called once from %s %i insns.\n",
1997                    node->callers->caller->name (),
1998                    inline_summary (node->callers->caller)->size);
1999         }
2000
2001       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
2002       if (dump_file)
2003         fprintf (dump_file,
2004                  " Inlined into %s which now has %i size\n",
2005                  caller->name (),
2006                  inline_summary (caller)->size);
2007       if (!(*num_calls)--)
2008         {
2009           if (dump_file)
2010             fprintf (dump_file, "New calls found; giving up.\n");
2011           return callee_removed;
2012         }
2013       if (callee_removed)
2014         return true;
2015     }
2016   return false;
2017 }
2018
2019 /* Output overall time estimate.  */
2020 static void
2021 dump_overall_stats (void)
2022 {
2023   int64_t sum_weighted = 0, sum = 0;
2024   struct cgraph_node *node;
2025
2026   FOR_EACH_DEFINED_FUNCTION (node)
2027     if (!node->global.inlined_to
2028         && !node->alias)
2029       {
2030         int time = inline_summary (node)->time;
2031         sum += time;
2032         sum_weighted += time * node->count;
2033       }
2034   fprintf (dump_file, "Overall time estimate: "
2035            "%"PRId64" weighted by profile: "
2036            "%"PRId64"\n", sum, sum_weighted);
2037 }
2038
2039 /* Output some useful stats about inlining.  */
2040
2041 static void
2042 dump_inline_stats (void)
2043 {
2044   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2045   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2046   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2047   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2048   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2049   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2050   int64_t reason[CIF_N_REASONS][3];
2051   int i;
2052   struct cgraph_node *node;
2053
2054   memset (reason, 0, sizeof (reason));
2055   FOR_EACH_DEFINED_FUNCTION (node)
2056   {
2057     struct cgraph_edge *e;
2058     for (e = node->callees; e; e = e->next_callee)
2059       {
2060         if (e->inline_failed)
2061           {
2062             reason[(int) e->inline_failed][0] += e->count;
2063             reason[(int) e->inline_failed][1] += e->frequency;
2064             reason[(int) e->inline_failed][2] ++;
2065             if (DECL_VIRTUAL_P (e->callee->decl))
2066               {
2067                 if (e->indirect_inlining_edge)
2068                   noninlined_virt_indir_cnt += e->count;
2069                 else
2070                   noninlined_virt_cnt += e->count;
2071               }
2072             else
2073               {
2074                 if (e->indirect_inlining_edge)
2075                   noninlined_indir_cnt += e->count;
2076                 else
2077                   noninlined_cnt += e->count;
2078               }
2079           }
2080         else
2081           {
2082             if (e->speculative)
2083               {
2084                 if (DECL_VIRTUAL_P (e->callee->decl))
2085                   inlined_speculative_ply += e->count;
2086                 else
2087                   inlined_speculative += e->count;
2088               }
2089             else if (DECL_VIRTUAL_P (e->callee->decl))
2090               {
2091                 if (e->indirect_inlining_edge)
2092                   inlined_virt_indir_cnt += e->count;
2093                 else
2094                   inlined_virt_cnt += e->count;
2095               }
2096             else
2097               {
2098                 if (e->indirect_inlining_edge)
2099                   inlined_indir_cnt += e->count;
2100                 else
2101                   inlined_cnt += e->count;
2102               }
2103           }
2104       }
2105     for (e = node->indirect_calls; e; e = e->next_callee)
2106       if (e->indirect_info->polymorphic)
2107         indirect_poly_cnt += e->count;
2108       else
2109         indirect_cnt += e->count;
2110   }
2111   if (max_count)
2112     {
2113       fprintf (dump_file,
2114                "Inlined %"PRId64 " + speculative "
2115                "%"PRId64 " + speculative polymorphic "
2116                "%"PRId64 " + previously indirect "
2117                "%"PRId64 " + virtual "
2118                "%"PRId64 " + virtual and previously indirect "
2119                "%"PRId64 "\n" "Not inlined "
2120                "%"PRId64 " + previously indirect "
2121                "%"PRId64 " + virtual "
2122                "%"PRId64 " + virtual and previously indirect "
2123                "%"PRId64 " + stil indirect "
2124                "%"PRId64 " + still indirect polymorphic "
2125                "%"PRId64 "\n", inlined_cnt,
2126                inlined_speculative, inlined_speculative_ply,
2127                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2128                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2129                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2130       fprintf (dump_file,
2131                "Removed speculations %"PRId64 "\n",
2132                spec_rem);
2133     }
2134   dump_overall_stats ();
2135   fprintf (dump_file, "\nWhy inlining failed?\n");
2136   for (i = 0; i < CIF_N_REASONS; i++)
2137     if (reason[i][2])
2138       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2139                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2140                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2141 }
2142
2143 /* Decide on the inlining.  We do so in the topological order to avoid
2144    expenses on updating data structures.  */
2145
2146 static unsigned int
2147 ipa_inline (void)
2148 {
2149   struct cgraph_node *node;
2150   int nnodes;
2151   struct cgraph_node **order;
2152   int i;
2153   int cold;
2154   bool remove_functions = false;
2155
2156   if (!optimize)
2157     return 0;
2158
2159   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2160
2161   if (in_lto_p && optimize)
2162     ipa_update_after_lto_read ();
2163
2164   if (dump_file)
2165     dump_inline_summaries (dump_file);
2166
2167   nnodes = ipa_reverse_postorder (order);
2168
2169   FOR_EACH_FUNCTION (node)
2170     node->aux = 0;
2171
2172   if (dump_file)
2173     fprintf (dump_file, "\nFlattening functions:\n");
2174
2175   /* In the first pass handle functions to be flattened.  Do this with
2176      a priority so none of our later choices will make this impossible.  */
2177   for (i = nnodes - 1; i >= 0; i--)
2178     {
2179       node = order[i];
2180
2181       /* Handle nodes to be flattened.
2182          Ideally when processing callees we stop inlining at the
2183          entry of cycles, possibly cloning that entry point and
2184          try to flatten itself turning it into a self-recursive
2185          function.  */
2186       if (lookup_attribute ("flatten",
2187                             DECL_ATTRIBUTES (node->decl)) != NULL)
2188         {
2189           if (dump_file)
2190             fprintf (dump_file,
2191                      "Flattening %s\n", node->name ());
2192           flatten_function (node, false);
2193         }
2194     }
2195   if (dump_file)
2196     dump_overall_stats ();
2197
2198   inline_small_functions ();
2199
2200   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
2201      functions and virtual functions so we really know what is called once.  */
2202   symtab->remove_unreachable_nodes (false, dump_file);
2203   free (order);
2204
2205   /* Inline functions with a property that after inlining into all callers the
2206      code size will shrink because the out-of-line copy is eliminated.
2207      We do this regardless on the callee size as long as function growth limits
2208      are met.  */
2209   if (dump_file)
2210     fprintf (dump_file,
2211              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2212
2213   /* Inlining one function called once has good chance of preventing
2214      inlining other function into the same callee.  Ideally we should
2215      work in priority order, but probably inlining hot functions first
2216      is good cut without the extra pain of maintaining the queue.
2217
2218      ??? this is not really fitting the bill perfectly: inlining function
2219      into callee often leads to better optimization of callee due to
2220      increased context for optimization.
2221      For example if main() function calls a function that outputs help
2222      and then function that does the main optmization, we should inline
2223      the second with priority even if both calls are cold by themselves.
2224
2225      We probably want to implement new predicate replacing our use of
2226      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2227      to be hot.  */
2228   for (cold = 0; cold <= 1; cold ++)
2229     {
2230       FOR_EACH_DEFINED_FUNCTION (node)
2231         {
2232           struct cgraph_edge *edge, *next;
2233           bool update=false;
2234
2235           for (edge = node->callees; edge; edge = next)
2236             {
2237               next = edge->next_callee;
2238               if (edge->speculative && !speculation_useful_p (edge, false))
2239                 {
2240                   edge->resolve_speculation ();
2241                   spec_rem += edge->count;
2242                   update = true;
2243                   remove_functions = true;
2244                 }
2245             }
2246           if (update)
2247             {
2248               struct cgraph_node *where = node->global.inlined_to
2249                                           ? node->global.inlined_to : node;
2250               reset_node_growth_cache (where);
2251               reset_edge_caches (where);
2252               inline_update_overall_summary (where);
2253             }
2254           if (flag_inline_functions_called_once
2255               && want_inline_function_to_all_callers_p (node, cold))
2256             {
2257               int num_calls = 0;
2258               node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2259                                                       true);
2260               while (node->call_for_symbol_thunks_and_aliases (inline_to_all_callers,
2261                                                              &num_calls, true))
2262                 ;
2263               remove_functions = true;
2264             }
2265         }
2266     }
2267
2268   /* Free ipa-prop structures if they are no longer needed.  */
2269   if (optimize)
2270     ipa_free_all_structures_after_iinln ();
2271
2272   if (dump_file)
2273     {
2274       fprintf (dump_file,
2275                "\nInlined %i calls, eliminated %i functions\n\n",
2276                ncalls_inlined, nfunctions_inlined);
2277       dump_inline_stats ();
2278     }
2279
2280   if (dump_file)
2281     dump_inline_summaries (dump_file);
2282   /* In WPA we use inline summaries for partitioning process.  */
2283   if (!flag_wpa)
2284     inline_free_summary ();
2285   return remove_functions ? TODO_remove_functions : 0;
2286 }
2287
2288 /* Inline always-inline function calls in NODE.  */
2289
2290 static bool
2291 inline_always_inline_functions (struct cgraph_node *node)
2292 {
2293   struct cgraph_edge *e;
2294   bool inlined = false;
2295
2296   for (e = node->callees; e; e = e->next_callee)
2297     {
2298       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2299       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2300         continue;
2301
2302       if (e->recursive_p ())
2303         {
2304           if (dump_file)
2305             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2306                      e->callee->name ());
2307           e->inline_failed = CIF_RECURSIVE_INLINING;
2308           continue;
2309         }
2310
2311       if (!can_early_inline_edge_p (e))
2312         {
2313           /* Set inlined to true if the callee is marked "always_inline" but
2314              is not inlinable.  This will allow flagging an error later in
2315              expand_call_inline in tree-inline.c.  */
2316           if (lookup_attribute ("always_inline",
2317                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2318             inlined = true;
2319           continue;
2320         }
2321
2322       if (dump_file)
2323         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2324                  xstrdup (e->callee->name ()),
2325                  xstrdup (e->caller->name ()));
2326       inline_call (e, true, NULL, NULL, false);
2327       inlined = true;
2328     }
2329   if (inlined)
2330     inline_update_overall_summary (node);
2331
2332   return inlined;
2333 }
2334
2335 /* Decide on the inlining.  We do so in the topological order to avoid
2336    expenses on updating data structures.  */
2337
2338 static bool
2339 early_inline_small_functions (struct cgraph_node *node)
2340 {
2341   struct cgraph_edge *e;
2342   bool inlined = false;
2343
2344   for (e = node->callees; e; e = e->next_callee)
2345     {
2346       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2347       if (!inline_summary (callee)->inlinable
2348           || !e->inline_failed)
2349         continue;
2350
2351       /* Do not consider functions not declared inline.  */
2352       if (!DECL_DECLARED_INLINE_P (callee->decl)
2353           && !flag_inline_small_functions
2354           && !flag_inline_functions)
2355         continue;
2356
2357       if (dump_file)
2358         fprintf (dump_file, "Considering inline candidate %s.\n",
2359                  callee->name ());
2360
2361       if (!can_early_inline_edge_p (e))
2362         continue;
2363
2364       if (e->recursive_p ())
2365         {
2366           if (dump_file)
2367             fprintf (dump_file, "  Not inlining: recursive call.\n");
2368           continue;
2369         }
2370
2371       if (!want_early_inline_function_p (e))
2372         continue;
2373
2374       if (dump_file)
2375         fprintf (dump_file, " Inlining %s into %s.\n",
2376                  xstrdup (callee->name ()),
2377                  xstrdup (e->caller->name ()));
2378       inline_call (e, true, NULL, NULL, true);
2379       inlined = true;
2380     }
2381
2382   return inlined;
2383 }
2384
2385 unsigned int
2386 early_inliner (function *fun)
2387 {
2388   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2389   struct cgraph_edge *edge;
2390   unsigned int todo = 0;
2391   int iterations = 0;
2392   bool inlined = false;
2393
2394   if (seen_error ())
2395     return 0;
2396
2397   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2398      happens when some pass decides to construct new function and
2399      cgraph_add_new_function calls lowering passes and early optimization on
2400      it.  This may confuse ourself when early inliner decide to inline call to
2401      function clone, because function clones don't have parameter list in
2402      ipa-prop matching their signature.  */
2403   if (ipa_node_params_vector.exists ())
2404     return 0;
2405
2406 #ifdef ENABLE_CHECKING
2407   node->verify ();
2408 #endif
2409   node->remove_all_references ();
2410
2411   /* Even when not optimizing or not inlining inline always-inline
2412      functions.  */
2413   inlined = inline_always_inline_functions (node);
2414
2415   if (!optimize
2416       || flag_no_inline
2417       || !flag_early_inlining
2418       /* Never inline regular functions into always-inline functions
2419          during incremental inlining.  This sucks as functions calling
2420          always inline functions will get less optimized, but at the
2421          same time inlining of functions calling always inline
2422          function into an always inline function might introduce
2423          cycles of edges to be always inlined in the callgraph.
2424
2425          We might want to be smarter and just avoid this type of inlining.  */
2426       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2427     ;
2428   else if (lookup_attribute ("flatten",
2429                              DECL_ATTRIBUTES (node->decl)) != NULL)
2430     {
2431       /* When the function is marked to be flattened, recursively inline
2432          all calls in it.  */
2433       if (dump_file)
2434         fprintf (dump_file,
2435                  "Flattening %s\n", node->name ());
2436       flatten_function (node, true);
2437       inlined = true;
2438     }
2439   else
2440     {
2441       /* We iterate incremental inlining to get trivial cases of indirect
2442          inlining.  */
2443       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2444              && early_inline_small_functions (node))
2445         {
2446           timevar_push (TV_INTEGRATION);
2447           todo |= optimize_inline_calls (current_function_decl);
2448
2449           /* Technically we ought to recompute inline parameters so the new
2450              iteration of early inliner works as expected.  We however have
2451              values approximately right and thus we only need to update edge
2452              info that might be cleared out for newly discovered edges.  */
2453           for (edge = node->callees; edge; edge = edge->next_callee)
2454             {
2455               /* We have no summary for new bound store calls yet.  */
2456               if (inline_edge_summary_vec.length () > (unsigned)edge->uid)
2457                 {
2458                   struct inline_edge_summary *es = inline_edge_summary (edge);
2459                   es->call_stmt_size
2460                     = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2461                   es->call_stmt_time
2462                     = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2463                 }
2464               if (edge->callee->decl
2465                   && !gimple_check_call_matching_types (
2466                       edge->call_stmt, edge->callee->decl, false))
2467                 edge->call_stmt_cannot_inline_p = true;
2468             }
2469           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2470             inline_update_overall_summary (node);
2471           timevar_pop (TV_INTEGRATION);
2472           iterations++;
2473           inlined = false;
2474         }
2475       if (dump_file)
2476         fprintf (dump_file, "Iterations: %i\n", iterations);
2477     }
2478
2479   if (inlined)
2480     {
2481       timevar_push (TV_INTEGRATION);
2482       todo |= optimize_inline_calls (current_function_decl);
2483       timevar_pop (TV_INTEGRATION);
2484     }
2485
2486   fun->always_inline_functions_inlined = true;
2487
2488   return todo;
2489 }
2490
2491 /* Do inlining of small functions.  Doing so early helps profiling and other
2492    passes to be somewhat more effective and avoids some code duplication in
2493    later real inlining pass for testcases with very many function calls.  */
2494
2495 namespace {
2496
2497 const pass_data pass_data_early_inline =
2498 {
2499   GIMPLE_PASS, /* type */
2500   "einline", /* name */
2501   OPTGROUP_INLINE, /* optinfo_flags */
2502   TV_EARLY_INLINING, /* tv_id */
2503   PROP_ssa, /* properties_required */
2504   0, /* properties_provided */
2505   0, /* properties_destroyed */
2506   0, /* todo_flags_start */
2507   0, /* todo_flags_finish */
2508 };
2509
2510 class pass_early_inline : public gimple_opt_pass
2511 {
2512 public:
2513   pass_early_inline (gcc::context *ctxt)
2514     : gimple_opt_pass (pass_data_early_inline, ctxt)
2515   {}
2516
2517   /* opt_pass methods: */
2518   virtual unsigned int execute (function *);
2519
2520 }; // class pass_early_inline
2521
2522 unsigned int
2523 pass_early_inline::execute (function *fun)
2524 {
2525   return early_inliner (fun);
2526 }
2527
2528 } // anon namespace
2529
2530 gimple_opt_pass *
2531 make_pass_early_inline (gcc::context *ctxt)
2532 {
2533   return new pass_early_inline (ctxt);
2534 }
2535
2536 namespace {
2537
2538 const pass_data pass_data_ipa_inline =
2539 {
2540   IPA_PASS, /* type */
2541   "inline", /* name */
2542   OPTGROUP_INLINE, /* optinfo_flags */
2543   TV_IPA_INLINING, /* tv_id */
2544   0, /* properties_required */
2545   0, /* properties_provided */
2546   0, /* properties_destroyed */
2547   0, /* todo_flags_start */
2548   ( TODO_dump_symtab ), /* todo_flags_finish */
2549 };
2550
2551 class pass_ipa_inline : public ipa_opt_pass_d
2552 {
2553 public:
2554   pass_ipa_inline (gcc::context *ctxt)
2555     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2556                       inline_generate_summary, /* generate_summary */
2557                       inline_write_summary, /* write_summary */
2558                       inline_read_summary, /* read_summary */
2559                       NULL, /* write_optimization_summary */
2560                       NULL, /* read_optimization_summary */
2561                       NULL, /* stmt_fixup */
2562                       0, /* function_transform_todo_flags_start */
2563                       inline_transform, /* function_transform */
2564                       NULL) /* variable_transform */
2565   {}
2566
2567   /* opt_pass methods: */
2568   virtual unsigned int execute (function *) { return ipa_inline (); }
2569
2570 }; // class pass_ipa_inline
2571
2572 } // anon namespace
2573
2574 ipa_opt_pass_d *
2575 make_pass_ipa_inline (gcc::context *ctxt)
2576 {
2577   return new pass_ipa_inline (ctxt);
2578 }