gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "fibheap.h"
 106 #include "intl.h"
 107 #include "tree-pass.h"
 108 #include "coverage.h"
 109 #include "rtl.h"
 110 #include "bitmap.h"
 111 #include "profile.h"
 112 #include "predict.h"
 113 #include "vec.h"
 114 #include "hashtab.h"
 115 #include "hash-set.h"
 116 #include "machmode.h"
 117 #include "hard-reg-set.h"
 118 #include "input.h"
 119 #include "function.h"
 120 #include "basic-block.h"
 121 #include "tree-ssa-alias.h"
 122 #include "internal-fn.h"
 123 #include "gimple-expr.h"
 124 #include "is-a.h"
 125 #include "gimple.h"
 126 #include "gimple-ssa.h"
 127 #include "hash-map.h"
 128 #include "plugin-api.h"
 129 #include "ipa-ref.h"
 130 #include "cgraph.h"
 131 #include "alloc-pool.h"
 132 #include "ipa-prop.h"
 133 #include "except.h"
 134 #include "target.h"
 135 #include "ipa-inline.h"
 136 #include "ipa-utils.h"
 137 #include "sreal.h"
 138 #include "auto-profile.h"
 139 #include "cilk.h"
 140 #include "builtins.h"
 141
 142 /* Statistics we collect about inlining algorithm.  */
 143 static int overall_size;
 144 static gcov_type max_count;
 145 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 146 static gcov_type spec_rem;
 147
 148 /* Return false when inlining edge E would lead to violating
 149    limits on function unit growth or stack usage growth.
 150
 151    The relative function body growth limit is present generally
 152    to avoid problems with non-linear behavior of the compiler.
 153    To allow inlining huge functions into tiny wrapper, the limit
 154    is always based on the bigger of the two functions considered.
 155
 156    For stack growth limits we always base the growth in stack usage
 157    of the callers.  We want to prevent applications from segfaulting
 158    on stack overflow when functions with huge stack frames gets
 159    inlined. */
 160
 161 static bool
 162 caller_growth_limits (struct cgraph_edge *e)
 163 {
 164   struct cgraph_node *to = e->caller;
 165   struct cgraph_node *what = e->callee->ultimate_alias_target ();
 166   int newsize;
 167   int limit = 0;
 168   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 169   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 170
 171   /* Look for function e->caller is inlined to.  While doing
 172      so work out the largest function body on the way.  As
 173      described above, we want to base our function growth
 174      limits based on that.  Not on the self size of the
 175      outer function, not on the self size of inline code
 176      we immediately inline to.  This is the most relaxed
 177      interpretation of the rule "do not grow large functions
 178      too much in order to prevent compiler from exploding".  */
 179   while (true)
 180     {
 181       info = inline_summary (to);
 182       if (limit < info->self_size)
 183         limit = info->self_size;
 184       if (stack_size_limit < info->estimated_self_stack_size)
 185         stack_size_limit = info->estimated_self_stack_size;
 186       if (to->global.inlined_to)
 187         to = to->callers->caller;
 188       else
 189         break;
 190     }
 191
 192   what_info = inline_summary (what);
 193
 194   if (limit < what_info->self_size)
 195     limit = what_info->self_size;
 196
 197   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 198
 199   /* Check the size after inlining against the function limits.  But allow
 200      the function to shrink if it went over the limits by forced inlining.  */
 201   newsize = estimate_size_after_inlining (to, e);
 202   if (newsize >= info->size
 203       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 204       && newsize > limit)
 205     {
 206       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 207       return false;
 208     }
 209
 210   if (!what_info->estimated_stack_size)
 211     return true;
 212
 213   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 214      due to large i/o datastructures used by the Fortran front-end.
 215      We ought to ignore this limit when we know that the edge is executed
 216      on every invocation of the caller (i.e. its call statement dominates
 217      exit block).  We do not track this information, yet.  */
 218   stack_size_limit += ((gcov_type)stack_size_limit
 219                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 220
 221   inlined_stack = (outer_info->stack_frame_offset
 222                    + outer_info->estimated_self_stack_size
 223                    + what_info->estimated_stack_size);
 224   /* Check new stack consumption with stack consumption at the place
 225      stack is used.  */
 226   if (inlined_stack > stack_size_limit
 227       /* If function already has large stack usage from sibling
 228          inline call, we can inline, too.
 229          This bit overoptimistically assume that we are good at stack
 230          packing.  */
 231       && inlined_stack > info->estimated_stack_size
 232       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 233     {
 234       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 235       return false;
 236     }
 237   return true;
 238 }
 239
 240 /* Dump info about why inlining has failed.  */
 241
 242 static void
 243 report_inline_failed_reason (struct cgraph_edge *e)
 244 {
 245   if (dump_file)
 246     {
 247       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 248                xstrdup (e->caller->name ()), e->caller->order,
 249                xstrdup (e->callee->name ()), e->callee->order,
 250                cgraph_inline_failed_string (e->inline_failed));
 251     }
 252 }
 253
 254  /* Decide whether sanitizer-related attributes allow inlining. */
 255
 256 static bool
 257 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 258 {
 259   /* Don't care if sanitizer is disabled */
 260   if (!(flag_sanitize & SANITIZE_ADDRESS))
 261     return true;
 262
 263   if (!caller || !callee)
 264     return true;
 265
 266   return !!lookup_attribute ("no_sanitize_address",
 267       DECL_ATTRIBUTES (caller)) ==
 268       !!lookup_attribute ("no_sanitize_address",
 269       DECL_ATTRIBUTES (callee));
 270 }
 271
 272  /* Decide if we can inline the edge and possibly update
 273    inline_failed reason.
 274    We check whether inlining is possible at all and whether
 275    caller growth limits allow doing so.
 276
 277    if REPORT is true, output reason to the dump file.
 278
 279    if DISREGARD_LIMITS is true, ignore size limits.*/
 280
 281 static bool
 282 can_inline_edge_p (struct cgraph_edge *e, bool report,
 283                    bool disregard_limits = false)
 284 {
 285   bool inlinable = true;
 286   enum availability avail;
 287   cgraph_node *callee = e->callee->ultimate_alias_target (&avail);
 288   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 289   tree callee_tree
 290     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 291   struct function *caller_fun = e->caller->get_fun ();
 292   struct function *callee_fun = callee ? callee->get_fun () : NULL;
 293
 294   gcc_assert (e->inline_failed);
 295
 296   if (!callee || !callee->definition)
 297     {
 298       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 299       inlinable = false;
 300     }
 301   else if (callee->calls_comdat_local)
 302     {
 303       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 304       inlinable = false;
 305     }
 306   else if (!inline_summary (callee)->inlinable
 307            || (caller_fun && fn_contains_cilk_spawn_p (caller_fun)))
 308     {
 309       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 310       inlinable = false;
 311     }
 312   else if (avail <= AVAIL_INTERPOSABLE)
 313     {
 314       e->inline_failed = CIF_OVERWRITABLE;
 315       inlinable = false;
 316     }
 317   else if (e->call_stmt_cannot_inline_p)
 318     {
 319       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 320         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 321       inlinable = false;
 322     }
 323   /* Don't inline if the functions have different EH personalities.  */
 324   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 325            && DECL_FUNCTION_PERSONALITY (callee->decl)
 326            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 327                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 328     {
 329       e->inline_failed = CIF_EH_PERSONALITY;
 330       inlinable = false;
 331     }
 332   /* TM pure functions should not be inlined into non-TM_pure
 333      functions.  */
 334   else if (is_tm_pure (callee->decl)
 335            && !is_tm_pure (e->caller->decl))
 336     {
 337       e->inline_failed = CIF_UNSPECIFIED;
 338       inlinable = false;
 339     }
 340   /* Don't inline if the callee can throw non-call exceptions but the
 341      caller cannot.
 342      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 343      Move the flag into cgraph node or mirror it in the inline summary.  */
 344   else if (callee_fun && callee_fun->can_throw_non_call_exceptions
 345            && !(caller_fun && caller_fun->can_throw_non_call_exceptions))
 346     {
 347       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 348       inlinable = false;
 349     }
 350   /* Check compatibility of target optimization options.  */
 351   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 352                                                 callee->decl))
 353     {
 354       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 355       inlinable = false;
 356     }
 357   /* Don't inline a function with mismatched sanitization attributes. */
 358   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 359     {
 360       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 361       inlinable = false;
 362     }
 363   /* Check if caller growth allows the inlining.  */
 364   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 365            && !disregard_limits
 366            && !lookup_attribute ("flatten",
 367                                  DECL_ATTRIBUTES
 368                                    (e->caller->global.inlined_to
 369                                     ? e->caller->global.inlined_to->decl
 370                                     : e->caller->decl))
 371            && !caller_growth_limits (e))
 372     inlinable = false;
 373   /* Don't inline a function with a higher optimization level than the
 374      caller.  FIXME: this is really just tip of iceberg of handling
 375      optimization attribute.  */
 376   else if (caller_tree != callee_tree)
 377     {
 378       struct cl_optimization *caller_opt
 379         = TREE_OPTIMIZATION ((caller_tree)
 380                              ? caller_tree
 381                              : optimization_default_node);
 382
 383       struct cl_optimization *callee_opt
 384         = TREE_OPTIMIZATION ((callee_tree)
 385                              ? callee_tree
 386                              : optimization_default_node);
 387
 388       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 389            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 390           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 391           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 392         {
 393           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 394           inlinable = false;
 395         }
 396     }
 397
 398   if (!inlinable && report)
 399     report_inline_failed_reason (e);
 400   return inlinable;
 401 }
 402
 403
 404 /* Return true if the edge E is inlinable during early inlining.  */
 405
 406 static bool
 407 can_early_inline_edge_p (struct cgraph_edge *e)
 408 {
 409   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 410   /* Early inliner might get called at WPA stage when IPA pass adds new
 411      function.  In this case we can not really do any of early inlining
 412      because function bodies are missing.  */
 413   if (!gimple_has_body_p (callee->decl))
 414     {
 415       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 416       return false;
 417     }
 418   /* In early inliner some of callees may not be in SSA form yet
 419      (i.e. the callgraph is cyclic and we did not process
 420      the callee by early inliner, yet).  We don't have CIF code for this
 421      case; later we will re-do the decision in the real inliner.  */
 422   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 423       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 424     {
 425       if (dump_file)
 426         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 427       return false;
 428     }
 429   if (!can_inline_edge_p (e, true))
 430     return false;
 431   return true;
 432 }
 433
 434
 435 /* Return number of calls in N.  Ignore cheap builtins.  */
 436
 437 static int
 438 num_calls (struct cgraph_node *n)
 439 {
 440   struct cgraph_edge *e;
 441   int num = 0;
 442
 443   for (e = n->callees; e; e = e->next_callee)
 444     if (!is_inexpensive_builtin (e->callee->decl))
 445       num++;
 446   return num;
 447 }
 448
 449
 450 /* Return true if we are interested in inlining small function.  */
 451
 452 static bool
 453 want_early_inline_function_p (struct cgraph_edge *e)
 454 {
 455   bool want_inline = true;
 456   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 457
 458   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 459     ;
 460   /* For AutoFDO, we need to make sure that before profile annotation, all
 461      hot paths' IR look exactly the same as profiled binary. As a result,
 462      in einliner, we will disregard size limit and inline those callsites
 463      that are:
 464        * inlined in the profiled binary, and
 465        * the cloned callee has enough samples to be considered "hot".  */
 466   else if (flag_auto_profile && afdo_callsite_hot_enough_for_early_inline (e))
 467     ;
 468   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 469            && !flag_inline_small_functions)
 470     {
 471       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 472       report_inline_failed_reason (e);
 473       want_inline = false;
 474     }
 475   else
 476     {
 477       int growth = estimate_edge_growth (e);
 478       int n;
 479
 480       if (growth <= 0)
 481         ;
 482       else if (!e->maybe_hot_p ()
 483                && growth > 0)
 484         {
 485           if (dump_file)
 486             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 487                      "call is cold and code would grow by %i\n",
 488                      xstrdup (e->caller->name ()),
 489                      e->caller->order,
 490                      xstrdup (callee->name ()), callee->order,
 491                      growth);
 492           want_inline = false;
 493         }
 494       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 495         {
 496           if (dump_file)
 497             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 498                      "growth %i exceeds --param early-inlining-insns\n",
 499                      xstrdup (e->caller->name ()),
 500                      e->caller->order,
 501                      xstrdup (callee->name ()), callee->order,
 502                      growth);
 503           want_inline = false;
 504         }
 505       else if ((n = num_calls (callee)) != 0
 506                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 507         {
 508           if (dump_file)
 509             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 510                      "growth %i exceeds --param early-inlining-insns "
 511                      "divided by number of calls\n",
 512                      xstrdup (e->caller->name ()),
 513                      e->caller->order,
 514                      xstrdup (callee->name ()), callee->order,
 515                      growth);
 516           want_inline = false;
 517         }
 518     }
 519   return want_inline;
 520 }
 521
 522 /* Compute time of the edge->caller + edge->callee execution when inlining
 523    does not happen.  */
 524
 525 inline gcov_type
 526 compute_uninlined_call_time (struct inline_summary *callee_info,
 527                              struct cgraph_edge *edge)
 528 {
 529   gcov_type uninlined_call_time =
 530     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 531           CGRAPH_FREQ_BASE);
 532   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 533                                           ? edge->caller->global.inlined_to
 534                                           : edge->caller)->time;
 535   return uninlined_call_time + caller_time;
 536 }
 537
 538 /* Same as compute_uinlined_call_time but compute time when inlining
 539    does happen.  */
 540
 541 inline gcov_type
 542 compute_inlined_call_time (struct cgraph_edge *edge,
 543                            int edge_time)
 544 {
 545   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 546                                           ? edge->caller->global.inlined_to
 547                                           : edge->caller)->time;
 548   gcov_type time = (caller_time
 549                     + RDIV (((gcov_type) edge_time
 550                              - inline_edge_summary (edge)->call_stmt_time)
 551                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 552   /* Possible one roundoff error, but watch for overflows.  */
 553   gcc_checking_assert (time >= INT_MIN / 2);
 554   if (time < 0)
 555     time = 0;
 556   return time;
 557 }
 558
 559 /* Return true if the speedup for inlining E is bigger than
 560    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 561
 562 static bool
 563 big_speedup_p (struct cgraph_edge *e)
 564 {
 565   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 566                                                 e);
 567   gcov_type inlined_time = compute_inlined_call_time (e,
 568                                                       estimate_edge_time (e));
 569   if (time - inlined_time
 570       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 571     return true;
 572   return false;
 573 }
 574
 575 /* Return true if we are interested in inlining small function.
 576    When REPORT is true, report reason to dump file.  */
 577
 578 static bool
 579 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 580 {
 581   bool want_inline = true;
 582   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
 583
 584   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 585     ;
 586   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 587            && !flag_inline_small_functions)
 588     {
 589       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 590       want_inline = false;
 591     }
 592   /* Do fast and conservative check if the function can be good
 593      inline candidate.  At the moment we allow inline hints to
 594      promote non-inline functions to inline and we increase
 595      MAX_INLINE_INSNS_SINGLE 16-fold for inline functions.  */
 596   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 597            && (!e->count || !e->maybe_hot_p ()))
 598            && inline_summary (callee)->min_size
 599                 - inline_edge_summary (e)->call_stmt_size
 600               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 601     {
 602       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 603       want_inline = false;
 604     }
 605   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 606            && inline_summary (callee)->min_size
 607                 - inline_edge_summary (e)->call_stmt_size
 608               > 16 * MAX_INLINE_INSNS_SINGLE)
 609     {
 610       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 611                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 612                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 613       want_inline = false;
 614     }
 615   else
 616     {
 617       int growth = estimate_edge_growth (e);
 618       inline_hints hints = estimate_edge_hints (e);
 619       bool big_speedup = big_speedup_p (e);
 620
 621       if (growth <= 0)
 622         ;
 623       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 624          hints suggests that inlining given function is very profitable.  */
 625       else if (DECL_DECLARED_INLINE_P (callee->decl)
 626                && growth >= MAX_INLINE_INSNS_SINGLE
 627                && ((!big_speedup
 628                     && !(hints & (INLINE_HINT_indirect_call
 629                                   | INLINE_HINT_known_hot
 630                                   | INLINE_HINT_loop_iterations
 631                                   | INLINE_HINT_array_index
 632                                   | INLINE_HINT_loop_stride)))
 633                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 634         {
 635           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 636           want_inline = false;
 637         }
 638       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 639                && !flag_inline_functions)
 640         {
 641           /* growth_likely_positive is expensive, always test it last.  */
 642           if (growth >= MAX_INLINE_INSNS_SINGLE
 643               || growth_likely_positive (callee, growth))
 644             {
 645               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 646               want_inline = false;
 647             }
 648         }
 649       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 650          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 651          inlining given function is very profitable.  */
 652       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 653                && !big_speedup
 654                && !(hints & INLINE_HINT_known_hot)
 655                && growth >= ((hints & (INLINE_HINT_indirect_call
 656                                        | INLINE_HINT_loop_iterations
 657                                        | INLINE_HINT_array_index
 658                                        | INLINE_HINT_loop_stride))
 659                              ? MAX (MAX_INLINE_INSNS_AUTO,
 660                                     MAX_INLINE_INSNS_SINGLE)
 661                              : MAX_INLINE_INSNS_AUTO))
 662         {
 663           /* growth_likely_positive is expensive, always test it last.  */
 664           if (growth >= MAX_INLINE_INSNS_SINGLE
 665               || growth_likely_positive (callee, growth))
 666             {
 667               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 668               want_inline = false;
 669             }
 670         }
 671       /* If call is cold, do not inline when function body would grow. */
 672       else if (!e->maybe_hot_p ()
 673                && (growth >= MAX_INLINE_INSNS_SINGLE
 674                    || growth_likely_positive (callee, growth)))
 675         {
 676           e->inline_failed = CIF_UNLIKELY_CALL;
 677           want_inline = false;
 678         }
 679     }
 680   if (!want_inline && report)
 681     report_inline_failed_reason (e);
 682   return want_inline;
 683 }
 684
 685 /* EDGE is self recursive edge.
 686    We hand two cases - when function A is inlining into itself
 687    or when function A is being inlined into another inliner copy of function
 688    A within function B.
 689
 690    In first case OUTER_NODE points to the toplevel copy of A, while
 691    in the second case OUTER_NODE points to the outermost copy of A in B.
 692
 693    In both cases we want to be extra selective since
 694    inlining the call will just introduce new recursive calls to appear.  */
 695
 696 static bool
 697 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 698                                    struct cgraph_node *outer_node,
 699                                    bool peeling,
 700                                    int depth)
 701 {
 702   char const *reason = NULL;
 703   bool want_inline = true;
 704   int caller_freq = CGRAPH_FREQ_BASE;
 705   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 706
 707   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 708     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 709
 710   if (!edge->maybe_hot_p ())
 711     {
 712       reason = "recursive call is cold";
 713       want_inline = false;
 714     }
 715   else if (max_count && !outer_node->count)
 716     {
 717       reason = "not executed in profile";
 718       want_inline = false;
 719     }
 720   else if (depth > max_depth)
 721     {
 722       reason = "--param max-inline-recursive-depth exceeded.";
 723       want_inline = false;
 724     }
 725
 726   if (outer_node->global.inlined_to)
 727     caller_freq = outer_node->callers->frequency;
 728
 729   if (!caller_freq)
 730     {
 731       reason = "function is inlined and unlikely";
 732       want_inline = false;
 733     }
 734
 735   if (!want_inline)
 736     ;
 737   /* Inlining of self recursive function into copy of itself within other function
 738      is transformation similar to loop peeling.
 739
 740      Peeling is profitable if we can inline enough copies to make probability
 741      of actual call to the self recursive function very small.  Be sure that
 742      the probability of recursion is small.
 743
 744      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 745      This way the expected number of recision is at most max_depth.  */
 746   else if (peeling)
 747     {
 748       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 749                                          / max_depth);
 750       int i;
 751       for (i = 1; i < depth; i++)
 752         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 753       if (max_count
 754           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 755               >= max_prob))
 756         {
 757           reason = "profile of recursive call is too large";
 758           want_inline = false;
 759         }
 760       if (!max_count
 761           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 762               >= max_prob))
 763         {
 764           reason = "frequency of recursive call is too large";
 765           want_inline = false;
 766         }
 767     }
 768   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 769      depth is large.  We reduce function call overhead and increase chances that
 770      things fit in hardware return predictor.
 771
 772      Recursive inlining might however increase cost of stack frame setup
 773      actually slowing down functions whose recursion tree is wide rather than
 774      deep.
 775
 776      Deciding reliably on when to do recursive inlining without profile feedback
 777      is tricky.  For now we disable recursive inlining when probability of self
 778      recursion is low.
 779
 780      Recursive inlining of self recursive call within loop also results in large loop
 781      depths that generally optimize badly.  We may want to throttle down inlining
 782      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 783      methods.  */
 784   else
 785     {
 786       if (max_count
 787           && (edge->count * 100 / outer_node->count
 788               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 789         {
 790           reason = "profile of recursive call is too small";
 791           want_inline = false;
 792         }
 793       else if (!max_count
 794                && (edge->frequency * 100 / caller_freq
 795                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 796         {
 797           reason = "frequency of recursive call is too small";
 798           want_inline = false;
 799         }
 800     }
 801   if (!want_inline && dump_file)
 802     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 803   return want_inline;
 804 }
 805
 806 /* Return true when NODE has uninlinable caller;
 807    set HAS_HOT_CALL if it has hot call.
 808    Worker for cgraph_for_node_and_aliases.  */
 809
 810 static bool
 811 check_callers (struct cgraph_node *node, void *has_hot_call)
 812 {
 813   struct cgraph_edge *e;
 814    for (e = node->callers; e; e = e->next_caller)
 815      {
 816        if (!can_inline_edge_p (e, true))
 817          return true;
 818        if (!(*(bool *)has_hot_call) && e->maybe_hot_p ())
 819          *(bool *)has_hot_call = true;
 820      }
 821   return false;
 822 }
 823
 824 /* If NODE has a caller, return true.  */
 825
 826 static bool
 827 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 828 {
 829   if (node->callers)
 830     return true;
 831   return false;
 832 }
 833
 834 /* Decide if inlining NODE would reduce unit size by eliminating
 835    the offline copy of function.
 836    When COLD is true the cold calls are considered, too.  */
 837
 838 static bool
 839 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 840 {
 841   bool has_hot_call = false;
 842
 843   if (node->ultimate_alias_target () != node)
 844     return false;
 845   /* Already inlined?  */
 846   if (node->global.inlined_to)
 847     return false;
 848   /* Does it have callers?  */
 849   if (!node->call_for_symbol_thunks_and_aliases (has_caller_p, NULL, true))
 850     return false;
 851   /* Inlining into all callers would increase size?  */
 852   if (estimate_growth (node) > 0)
 853     return false;
 854   /* All inlines must be possible.  */
 855   if (node->call_for_symbol_thunks_and_aliases (check_callers, &has_hot_call,
 856                                                 true))
 857     return false;
 858   if (!cold && !has_hot_call)
 859     return false;
 860   return true;
 861 }
 862
 863 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 864
 865 /* Return relative time improvement for inlining EDGE in range
 866    1...RELATIVE_TIME_BENEFIT_RANGE  */
 867
 868 static inline int
 869 relative_time_benefit (struct inline_summary *callee_info,
 870                        struct cgraph_edge *edge,
 871                        int edge_time)
 872 {
 873   gcov_type relbenefit;
 874   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 875   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 876
 877   /* Inlining into extern inline function is not a win.  */
 878   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 879                      ? edge->caller->global.inlined_to->decl
 880                      : edge->caller->decl))
 881     return 1;
 882
 883   /* Watch overflows.  */
 884   gcc_checking_assert (uninlined_call_time >= 0);
 885   gcc_checking_assert (inlined_call_time >= 0);
 886   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 887
 888   /* Compute relative time benefit, i.e. how much the call becomes faster.
 889      ??? perhaps computing how much the caller+calle together become faster
 890      would lead to more realistic results.  */
 891   if (!uninlined_call_time)
 892     uninlined_call_time = 1;
 893   relbenefit =
 894     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 895           uninlined_call_time);
 896   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 897   gcc_checking_assert (relbenefit >= 0);
 898   relbenefit = MAX (relbenefit, 1);
 899   return relbenefit;
 900 }
 901
 902
 903 /* A cost model driving the inlining heuristics in a way so the edges with
 904    smallest badness are inlined first.  After each inlining is performed
 905    the costs of all caller edges of nodes affected are recomputed so the
 906    metrics may accurately depend on values such as number of inlinable callers
 907    of the function or function body size.  */
 908
 909 static int
 910 edge_badness (struct cgraph_edge *edge, bool dump)
 911 {
 912   gcov_type badness;
 913   int growth, edge_time;
 914   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
 915   struct inline_summary *callee_info = inline_summary (callee);
 916   inline_hints hints;
 917
 918   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 919     return INT_MIN;
 920
 921   growth = estimate_edge_growth (edge);
 922   edge_time = estimate_edge_time (edge);
 923   hints = estimate_edge_hints (edge);
 924   gcc_checking_assert (edge_time >= 0);
 925   gcc_checking_assert (edge_time <= callee_info->time);
 926   gcc_checking_assert (growth <= callee_info->size);
 927
 928   if (dump)
 929     {
 930       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 931                xstrdup (edge->caller->name ()),
 932                edge->caller->order,
 933                xstrdup (callee->name ()),
 934                edge->callee->order);
 935       fprintf (dump_file, "      size growth %i, time %i ",
 936                growth,
 937                edge_time);
 938       dump_inline_hints (dump_file, hints);
 939       if (big_speedup_p (edge))
 940         fprintf (dump_file, " big_speedup");
 941       fprintf (dump_file, "\n");
 942     }
 943
 944   /* Always prefer inlining saving code size.  */
 945   if (growth <= 0)
 946     {
 947       badness = INT_MIN / 2 + growth;
 948       if (dump)
 949         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 950                  growth);
 951     }
 952
 953   /* When profiling is available, compute badness as:
 954
 955                 relative_edge_count * relative_time_benefit
 956      goodness = -------------------------------------------
 957                 growth_f_caller
 958      badness = -goodness
 959
 960     The fraction is upside down, because on edge counts and time beneits
 961     the bounds are known. Edge growth is essentially unlimited.  */
 962
 963   else if (max_count)
 964     {
 965       sreal tmp, relbenefit_real, growth_real;
 966       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 967       /* Capping edge->count to max_count. edge->count can be larger than
 968          max_count if an inline adds new edges which increase max_count
 969          after max_count is computed.  */
 970       gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
 971
 972       sreal_init (&relbenefit_real, relbenefit, 0);
 973       sreal_init (&growth_real, growth, 0);
 974
 975       /* relative_edge_count.  */
 976       sreal_init (&tmp, edge_count, 0);
 977       sreal_div (&tmp, &tmp, &max_count_real);
 978
 979       /* relative_time_benefit.  */
 980       sreal_mul (&tmp, &tmp, &relbenefit_real);
 981       sreal_div (&tmp, &tmp, &max_relbenefit_real);
 982
 983       /* growth_f_caller.  */
 984       sreal_mul (&tmp, &tmp, &half_int_min_real);
 985       sreal_div (&tmp, &tmp, &growth_real);
 986
 987       badness = -1 * sreal_to_int (&tmp);
 988
 989       if (dump)
 990         {
 991           fprintf (dump_file,
 992                    "      %i (relative %f): profile info. Relative count %f%s"
 993                    " * Relative benefit %f\n",
 994                    (int) badness, (double) badness / INT_MIN,
 995                    (double) edge_count / max_count,
 996                    edge->count > max_count ? " (capped to max_count)" : "",
 997                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 998         }
 999     }
1000
1001   /* When function local profile is available. Compute badness as:
1002
1003                  relative_time_benefit
1004      goodness =  ---------------------------------
1005                  growth_of_caller * overall_growth
1006
1007      badness = - goodness
1008
1009      compensated by the inline hints.
1010   */
1011   else if (flag_guess_branch_prob)
1012     {
1013       badness = (relative_time_benefit (callee_info, edge, edge_time)
1014                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
1015       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
1016       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
1017       if ((hints & (INLINE_HINT_indirect_call
1018                     | INLINE_HINT_loop_iterations
1019                     | INLINE_HINT_array_index
1020                     | INLINE_HINT_loop_stride))
1021           || callee_info->growth <= 0)
1022         badness *= 8;
1023       if (hints & (INLINE_HINT_same_scc))
1024         badness /= 16;
1025       else if (hints & (INLINE_HINT_in_scc))
1026         badness /= 8;
1027       else if (hints & (INLINE_HINT_cross_module))
1028         badness /= 2;
1029       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1030       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1031         badness *= 16;
1032       if (dump)
1033         {
1034           fprintf (dump_file,
1035                    "      %i: guessed profile. frequency %f,"
1036                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
1037                    " overall growth %i (current) %i (original)\n",
1038                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
1039                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
1040                    / RELATIVE_TIME_BENEFIT_RANGE,
1041                    (int)compute_uninlined_call_time (callee_info, edge),
1042                    (int)compute_inlined_call_time (edge, edge_time),
1043                    estimate_growth (callee),
1044                    callee_info->growth);
1045         }
1046     }
1047   /* When function local profile is not available or it does not give
1048      useful information (ie frequency is zero), base the cost on
1049      loop nest and overall size growth, so we optimize for overall number
1050      of functions fully inlined in program.  */
1051   else
1052     {
1053       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1054       badness = growth * 256;
1055
1056       /* Decrease badness if call is nested.  */
1057       if (badness > 0)
1058         badness >>= nest;
1059       else
1060         {
1061           badness <<= nest;
1062         }
1063       if (dump)
1064         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
1065                  nest);
1066     }
1067
1068   /* Ensure that we did not overflow in all the fixed point math above.  */
1069   gcc_assert (badness >= INT_MIN);
1070   gcc_assert (badness <= INT_MAX - 1);
1071   /* Make recursive inlining happen always after other inlining is done.  */
1072   if (edge->recursive_p ())
1073     return badness + 1;
1074   else
1075     return badness;
1076 }
1077
1078 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1079 static inline void
1080 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1081 {
1082   int badness = edge_badness (edge, false);
1083   if (edge->aux)
1084     {
1085       fibnode_t n = (fibnode_t) edge->aux;
1086       gcc_checking_assert (n->data == edge);
1087
1088       /* fibheap_replace_key only decrease the keys.
1089          When we increase the key we do not update heap
1090          and instead re-insert the element once it becomes
1091          a minimum of heap.  */
1092       if (badness < n->key)
1093         {
1094           if (dump_file && (dump_flags & TDF_DETAILS))
1095             {
1096               fprintf (dump_file,
1097                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1098                        xstrdup (edge->caller->name ()),
1099                        edge->caller->order,
1100                        xstrdup (edge->callee->name ()),
1101                        edge->callee->order,
1102                        (int)n->key,
1103                        badness);
1104             }
1105           fibheap_replace_key (heap, n, badness);
1106           gcc_checking_assert (n->key == badness);
1107         }
1108     }
1109   else
1110     {
1111        if (dump_file && (dump_flags & TDF_DETAILS))
1112          {
1113            fprintf (dump_file,
1114                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1115                     xstrdup (edge->caller->name ()),
1116                     edge->caller->order,
1117                     xstrdup (edge->callee->name ()),
1118                     edge->callee->order,
1119                     badness);
1120          }
1121       edge->aux = fibheap_insert (heap, badness, edge);
1122     }
1123 }
1124
1125
1126 /* NODE was inlined.
1127    All caller edges needs to be resetted because
1128    size estimates change. Similarly callees needs reset
1129    because better context may be known.  */
1130
1131 static void
1132 reset_edge_caches (struct cgraph_node *node)
1133 {
1134   struct cgraph_edge *edge;
1135   struct cgraph_edge *e = node->callees;
1136   struct cgraph_node *where = node;
1137   struct ipa_ref *ref;
1138
1139   if (where->global.inlined_to)
1140     where = where->global.inlined_to;
1141
1142   /* WHERE body size has changed, the cached growth is invalid.  */
1143   reset_node_growth_cache (where);
1144
1145   for (edge = where->callers; edge; edge = edge->next_caller)
1146     if (edge->inline_failed)
1147       reset_edge_growth_cache (edge);
1148
1149   FOR_EACH_ALIAS (where, ref)
1150     reset_edge_caches (dyn_cast <cgraph_node *> (ref->referring));
1151
1152   if (!e)
1153     return;
1154
1155   while (true)
1156     if (!e->inline_failed && e->callee->callees)
1157       e = e->callee->callees;
1158     else
1159       {
1160         if (e->inline_failed)
1161           reset_edge_growth_cache (e);
1162         if (e->next_callee)
1163           e = e->next_callee;
1164         else
1165           {
1166             do
1167               {
1168                 if (e->caller == node)
1169                   return;
1170                 e = e->caller->callers;
1171               }
1172             while (!e->next_callee);
1173             e = e->next_callee;
1174           }
1175       }
1176 }
1177
1178 /* Recompute HEAP nodes for each of caller of NODE.
1179    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1180    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1181    it is inlinable. Otherwise check all edges.  */
1182
1183 static void
1184 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1185                     bitmap updated_nodes,
1186                     struct cgraph_edge *check_inlinablity_for)
1187 {
1188   struct cgraph_edge *edge;
1189   struct ipa_ref *ref;
1190
1191   if ((!node->alias && !inline_summary (node)->inlinable)
1192       || node->global.inlined_to)
1193     return;
1194   if (!bitmap_set_bit (updated_nodes, node->uid))
1195     return;
1196
1197   FOR_EACH_ALIAS (node, ref)
1198     {
1199       struct cgraph_node *alias = dyn_cast <cgraph_node *> (ref->referring);
1200       update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1201     }
1202
1203   for (edge = node->callers; edge; edge = edge->next_caller)
1204     if (edge->inline_failed)
1205       {
1206         if (!check_inlinablity_for
1207             || check_inlinablity_for == edge)
1208           {
1209             if (can_inline_edge_p (edge, false)
1210                 && want_inline_small_function_p (edge, false))
1211               update_edge_key (heap, edge);
1212             else if (edge->aux)
1213               {
1214                 report_inline_failed_reason (edge);
1215                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1216                 edge->aux = NULL;
1217               }
1218           }
1219         else if (edge->aux)
1220           update_edge_key (heap, edge);
1221       }
1222 }
1223
1224 /* Recompute HEAP nodes for each uninlined call in NODE.
1225    This is used when we know that edge badnesses are going only to increase
1226    (we introduced new call site) and thus all we need is to insert newly
1227    created edges into heap.  */
1228
1229 static void
1230 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1231                     bitmap updated_nodes)
1232 {
1233   struct cgraph_edge *e = node->callees;
1234
1235   if (!e)
1236     return;
1237   while (true)
1238     if (!e->inline_failed && e->callee->callees)
1239       e = e->callee->callees;
1240     else
1241       {
1242         enum availability avail;
1243         struct cgraph_node *callee;
1244         /* We do not reset callee growth cache here.  Since we added a new call,
1245            growth chould have just increased and consequentely badness metric
1246            don't need updating.  */
1247         if (e->inline_failed
1248             && (callee = e->callee->ultimate_alias_target (&avail))
1249             && inline_summary (callee)->inlinable
1250             && avail >= AVAIL_AVAILABLE
1251             && !bitmap_bit_p (updated_nodes, callee->uid))
1252           {
1253             if (can_inline_edge_p (e, false)
1254                 && want_inline_small_function_p (e, false))
1255               update_edge_key (heap, e);
1256             else if (e->aux)
1257               {
1258                 report_inline_failed_reason (e);
1259                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1260                 e->aux = NULL;
1261               }
1262           }
1263         if (e->next_callee)
1264           e = e->next_callee;
1265         else
1266           {
1267             do
1268               {
1269                 if (e->caller == node)
1270                   return;
1271                 e = e->caller->callers;
1272               }
1273             while (!e->next_callee);
1274             e = e->next_callee;
1275           }
1276       }
1277 }
1278
1279 /* Enqueue all recursive calls from NODE into priority queue depending on
1280    how likely we want to recursively inline the call.  */
1281
1282 static void
1283 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1284                         fibheap_t heap)
1285 {
1286   struct cgraph_edge *e;
1287   enum availability avail;
1288
1289   for (e = where->callees; e; e = e->next_callee)
1290     if (e->callee == node
1291         || (e->callee->ultimate_alias_target (&avail) == node
1292             && avail > AVAIL_INTERPOSABLE))
1293       {
1294         /* When profile feedback is available, prioritize by expected number
1295            of calls.  */
1296         fibheap_insert (heap,
1297                         !max_count ? -e->frequency
1298                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1299                         e);
1300       }
1301   for (e = where->callees; e; e = e->next_callee)
1302     if (!e->inline_failed)
1303       lookup_recursive_calls (node, e->callee, heap);
1304 }
1305
1306 /* Decide on recursive inlining: in the case function has recursive calls,
1307    inline until body size reaches given argument.  If any new indirect edges
1308    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1309    is NULL.  */
1310
1311 static bool
1312 recursive_inlining (struct cgraph_edge *edge,
1313                     vec<cgraph_edge *> *new_edges)
1314 {
1315   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1316   fibheap_t heap;
1317   struct cgraph_node *node;
1318   struct cgraph_edge *e;
1319   struct cgraph_node *master_clone = NULL, *next;
1320   int depth = 0;
1321   int n = 0;
1322
1323   node = edge->caller;
1324   if (node->global.inlined_to)
1325     node = node->global.inlined_to;
1326
1327   if (DECL_DECLARED_INLINE_P (node->decl))
1328     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1329
1330   /* Make sure that function is small enough to be considered for inlining.  */
1331   if (estimate_size_after_inlining (node, edge)  >= limit)
1332     return false;
1333   heap = fibheap_new ();
1334   lookup_recursive_calls (node, node, heap);
1335   if (fibheap_empty (heap))
1336     {
1337       fibheap_delete (heap);
1338       return false;
1339     }
1340
1341   if (dump_file)
1342     fprintf (dump_file,
1343              "  Performing recursive inlining on %s\n",
1344              node->name ());
1345
1346   /* Do the inlining and update list of recursive call during process.  */
1347   while (!fibheap_empty (heap))
1348     {
1349       struct cgraph_edge *curr
1350         = (struct cgraph_edge *) fibheap_extract_min (heap);
1351       struct cgraph_node *cnode, *dest = curr->callee;
1352
1353       if (!can_inline_edge_p (curr, true))
1354         continue;
1355
1356       /* MASTER_CLONE is produced in the case we already started modified
1357          the function. Be sure to redirect edge to the original body before
1358          estimating growths otherwise we will be seeing growths after inlining
1359          the already modified body.  */
1360       if (master_clone)
1361         {
1362           curr->redirect_callee (master_clone);
1363           reset_edge_growth_cache (curr);
1364         }
1365
1366       if (estimate_size_after_inlining (node, curr) > limit)
1367         {
1368           curr->redirect_callee (dest);
1369           reset_edge_growth_cache (curr);
1370           break;
1371         }
1372
1373       depth = 1;
1374       for (cnode = curr->caller;
1375            cnode->global.inlined_to; cnode = cnode->callers->caller)
1376         if (node->decl
1377             == curr->callee->ultimate_alias_target ()->decl)
1378           depth++;
1379
1380       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1381         {
1382           curr->redirect_callee (dest);
1383           reset_edge_growth_cache (curr);
1384           continue;
1385         }
1386
1387       if (dump_file)
1388         {
1389           fprintf (dump_file,
1390                    "   Inlining call of depth %i", depth);
1391           if (node->count)
1392             {
1393               fprintf (dump_file, " called approx. %.2f times per call",
1394                        (double)curr->count / node->count);
1395             }
1396           fprintf (dump_file, "\n");
1397         }
1398       if (!master_clone)
1399         {
1400           /* We need original clone to copy around.  */
1401           master_clone = node->create_clone (node->decl, node->count,
1402             CGRAPH_FREQ_BASE, false, vNULL,
1403             true, NULL, NULL);
1404           for (e = master_clone->callees; e; e = e->next_callee)
1405             if (!e->inline_failed)
1406               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1407           curr->redirect_callee (master_clone);
1408           reset_edge_growth_cache (curr);
1409         }
1410
1411       inline_call (curr, false, new_edges, &overall_size, true);
1412       lookup_recursive_calls (node, curr->callee, heap);
1413       n++;
1414     }
1415
1416   if (!fibheap_empty (heap) && dump_file)
1417     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1418   fibheap_delete (heap);
1419
1420   if (!master_clone)
1421     return false;
1422
1423   if (dump_file)
1424     fprintf (dump_file,
1425              "\n   Inlined %i times, "
1426              "body grown from size %i to %i, time %i to %i\n", n,
1427              inline_summary (master_clone)->size, inline_summary (node)->size,
1428              inline_summary (master_clone)->time, inline_summary (node)->time);
1429
1430   /* Remove master clone we used for inlining.  We rely that clones inlined
1431      into master clone gets queued just before master clone so we don't
1432      need recursion.  */
1433   for (node = symtab->first_function (); node != master_clone;
1434        node = next)
1435     {
1436       next = symtab->next_function (node);
1437       if (node->global.inlined_to == master_clone)
1438         node->remove ();
1439     }
1440   master_clone->remove ();
1441   return true;
1442 }
1443
1444
1445 /* Given whole compilation unit estimate of INSNS, compute how large we can
1446    allow the unit to grow.  */
1447
1448 static int
1449 compute_max_insns (int insns)
1450 {
1451   int max_insns = insns;
1452   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1453     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1454
1455   return ((int64_t) max_insns
1456           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1457 }
1458
1459
1460 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1461
1462 static void
1463 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge *> new_edges)
1464 {
1465   while (new_edges.length () > 0)
1466     {
1467       struct cgraph_edge *edge = new_edges.pop ();
1468
1469       gcc_assert (!edge->aux);
1470       if (edge->inline_failed
1471           && can_inline_edge_p (edge, true)
1472           && want_inline_small_function_p (edge, true))
1473         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1474     }
1475 }
1476
1477 /* Remove EDGE from the fibheap.  */
1478
1479 static void
1480 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1481 {
1482   if (e->callee)
1483     reset_node_growth_cache (e->callee);
1484   if (e->aux)
1485     {
1486       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1487       e->aux = NULL;
1488     }
1489 }
1490
1491 /* Return true if speculation of edge E seems useful.
1492    If ANTICIPATE_INLINING is true, be conservative and hope that E
1493    may get inlined.  */
1494
1495 bool
1496 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1497 {
1498   enum availability avail;
1499   struct cgraph_node *target = e->callee->ultimate_alias_target (&avail);
1500   struct cgraph_edge *direct, *indirect;
1501   struct ipa_ref *ref;
1502
1503   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1504
1505   if (!e->maybe_hot_p ())
1506     return false;
1507
1508   /* See if IP optimizations found something potentially useful about the
1509      function.  For now we look only for CONST/PURE flags.  Almost everything
1510      else we propagate is useless.  */
1511   if (avail >= AVAIL_AVAILABLE)
1512     {
1513       int ecf_flags = flags_from_decl_or_type (target->decl);
1514       if (ecf_flags & ECF_CONST)
1515         {
1516           e->speculative_call_info (direct, indirect, ref);
1517           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1518             return true;
1519         }
1520       else if (ecf_flags & ECF_PURE)
1521         {
1522           e->speculative_call_info (direct, indirect, ref);
1523           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1524             return true;
1525         }
1526     }
1527   /* If we did not managed to inline the function nor redirect
1528      to an ipa-cp clone (that are seen by having local flag set),
1529      it is probably pointless to inline it unless hardware is missing
1530      indirect call predictor.  */
1531   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1532     return false;
1533   /* For overwritable targets there is not much to do.  */
1534   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1535     return false;
1536   /* OK, speculation seems interesting.  */
1537   return true;
1538 }
1539
1540 /* We know that EDGE is not going to be inlined.
1541    See if we can remove speculation.  */
1542
1543 static void
1544 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1545 {
1546   if (edge->speculative && !speculation_useful_p (edge, false))
1547     {
1548       struct cgraph_node *node = edge->caller;
1549       struct cgraph_node *where = node->global.inlined_to
1550                                   ? node->global.inlined_to : node;
1551       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1552
1553       spec_rem += edge->count;
1554       edge->resolve_speculation ();
1555       reset_edge_caches (where);
1556       inline_update_overall_summary (where);
1557       update_caller_keys (edge_heap, where,
1558                           updated_nodes, NULL);
1559       update_callee_keys (edge_heap, where,
1560                           updated_nodes);
1561       BITMAP_FREE (updated_nodes);
1562     }
1563 }
1564
1565 /* We use greedy algorithm for inlining of small functions:
1566    All inline candidates are put into prioritized heap ordered in
1567    increasing badness.
1568
1569    The inlining of small functions is bounded by unit growth parameters.  */
1570
1571 static void
1572 inline_small_functions (void)
1573 {
1574   struct cgraph_node *node;
1575   struct cgraph_edge *edge;
1576   fibheap_t edge_heap = fibheap_new ();
1577   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1578   int min_size, max_size;
1579   auto_vec<cgraph_edge *> new_indirect_edges;
1580   int initial_size = 0;
1581   struct cgraph_node **order = XCNEWVEC (cgraph_node *, symtab->cgraph_count);
1582   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1583   if (flag_indirect_inlining)
1584     new_indirect_edges.create (8);
1585
1586   edge_removal_hook_holder
1587     = symtab->add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1588
1589   /* Compute overall unit size and other global parameters used by badness
1590      metrics.  */
1591
1592   max_count = 0;
1593   ipa_reduced_postorder (order, true, true, NULL);
1594   free (order);
1595
1596   FOR_EACH_DEFINED_FUNCTION (node)
1597     if (!node->global.inlined_to)
1598       {
1599         if (node->has_gimple_body_p ()
1600             || node->thunk.thunk_p)
1601           {
1602             struct inline_summary *info = inline_summary (node);
1603             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1604
1605             /* Do not account external functions, they will be optimized out
1606                if not inlined.  Also only count the non-cold portion of program.  */
1607             if (!DECL_EXTERNAL (node->decl)
1608                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1609               initial_size += info->size;
1610             info->growth = estimate_growth (node);
1611             if (dfs && dfs->next_cycle)
1612               {
1613                 struct cgraph_node *n2;
1614                 int id = dfs->scc_no + 1;
1615                 for (n2 = node; n2;
1616                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1617                   {
1618                     struct inline_summary *info2 = inline_summary (n2);
1619                     if (info2->scc_no)
1620                       break;
1621                     info2->scc_no = id;
1622                   }
1623               }
1624           }
1625
1626         for (edge = node->callers; edge; edge = edge->next_caller)
1627           if (max_count < edge->count)
1628             max_count = edge->count;
1629       }
1630   sreal_init (&max_count_real, max_count, 0);
1631   sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1632   sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1633   ipa_free_postorder_info ();
1634   initialize_growth_caches ();
1635
1636   if (dump_file)
1637     fprintf (dump_file,
1638              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1639              initial_size);
1640
1641   overall_size = initial_size;
1642   max_size = compute_max_insns (overall_size);
1643   min_size = overall_size;
1644
1645   /* Populate the heap with all edges we might inline.  */
1646
1647   FOR_EACH_DEFINED_FUNCTION (node)
1648     {
1649       bool update = false;
1650       struct cgraph_edge *next;
1651
1652       if (dump_file)
1653         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1654                  node->name (), node->order);
1655
1656       for (edge = node->callees; edge; edge = next)
1657         {
1658           next = edge->next_callee;
1659           if (edge->inline_failed
1660               && !edge->aux
1661               && can_inline_edge_p (edge, true)
1662               && want_inline_small_function_p (edge, true)
1663               && edge->inline_failed)
1664             {
1665               gcc_assert (!edge->aux);
1666               update_edge_key (edge_heap, edge);
1667             }
1668           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1669             {
1670               edge->resolve_speculation ();
1671               update = true;
1672             }
1673         }
1674       if (update)
1675         {
1676           struct cgraph_node *where = node->global.inlined_to
1677                                       ? node->global.inlined_to : node;
1678           inline_update_overall_summary (where);
1679           reset_node_growth_cache (where);
1680           reset_edge_caches (where);
1681           update_caller_keys (edge_heap, where,
1682                               updated_nodes, NULL);
1683           bitmap_clear (updated_nodes);
1684         }
1685     }
1686
1687   gcc_assert (in_lto_p
1688               || !max_count
1689               || (profile_info && flag_branch_probabilities));
1690
1691   while (!fibheap_empty (edge_heap))
1692     {
1693       int old_size = overall_size;
1694       struct cgraph_node *where, *callee;
1695       int badness = fibheap_min_key (edge_heap);
1696       int current_badness;
1697       int cached_badness;
1698       int growth;
1699
1700       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1701       gcc_assert (edge->aux);
1702       edge->aux = NULL;
1703       if (!edge->inline_failed || !edge->callee->analyzed)
1704         continue;
1705
1706       /* Be sure that caches are maintained consistent.
1707          We can not make this ENABLE_CHECKING only because it cause different
1708          updates of the fibheap queue.  */
1709       cached_badness = edge_badness (edge, false);
1710       reset_edge_growth_cache (edge);
1711       reset_node_growth_cache (edge->callee);
1712
1713       /* When updating the edge costs, we only decrease badness in the keys.
1714          Increases of badness are handled lazilly; when we see key with out
1715          of date value on it, we re-insert it now.  */
1716       current_badness = edge_badness (edge, false);
1717       gcc_assert (cached_badness == current_badness);
1718       gcc_assert (current_badness >= badness);
1719       if (current_badness != badness)
1720         {
1721           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1722           continue;
1723         }
1724
1725       if (!can_inline_edge_p (edge, true))
1726         {
1727           resolve_noninline_speculation (edge_heap, edge);
1728           continue;
1729         }
1730
1731       callee = edge->callee->ultimate_alias_target ();
1732       growth = estimate_edge_growth (edge);
1733       if (dump_file)
1734         {
1735           fprintf (dump_file,
1736                    "\nConsidering %s/%i with %i size\n",
1737                    callee->name (), callee->order,
1738                    inline_summary (callee)->size);
1739           fprintf (dump_file,
1740                    " to be inlined into %s/%i in %s:%i\n"
1741                    " Estimated badness is %i, frequency %.2f.\n",
1742                    edge->caller->name (), edge->caller->order,
1743                    flag_wpa ? "unknown"
1744                    : gimple_filename ((const_gimple) edge->call_stmt),
1745                    flag_wpa ? -1
1746                    : gimple_lineno ((const_gimple) edge->call_stmt),
1747                    badness,
1748                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1749           if (edge->count)
1750             fprintf (dump_file," Called %"PRId64"x\n",
1751                      edge->count);
1752           if (dump_flags & TDF_DETAILS)
1753             edge_badness (edge, true);
1754         }
1755
1756       if (overall_size + growth > max_size
1757           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1758         {
1759           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1760           report_inline_failed_reason (edge);
1761           resolve_noninline_speculation (edge_heap, edge);
1762           continue;
1763         }
1764
1765       if (!want_inline_small_function_p (edge, true))
1766         {
1767           resolve_noninline_speculation (edge_heap, edge);
1768           continue;
1769         }
1770
1771       /* Heuristics for inlining small functions work poorly for
1772          recursive calls where we do effects similar to loop unrolling.
1773          When inlining such edge seems profitable, leave decision on
1774          specific inliner.  */
1775       if (edge->recursive_p ())
1776         {
1777           where = edge->caller;
1778           if (where->global.inlined_to)
1779             where = where->global.inlined_to;
1780           if (!recursive_inlining (edge,
1781                                    flag_indirect_inlining
1782                                    ? &new_indirect_edges : NULL))
1783             {
1784               edge->inline_failed = CIF_RECURSIVE_INLINING;
1785               resolve_noninline_speculation (edge_heap, edge);
1786               continue;
1787             }
1788           reset_edge_caches (where);
1789           /* Recursive inliner inlines all recursive calls of the function
1790              at once. Consequently we need to update all callee keys.  */
1791           if (flag_indirect_inlining)
1792             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1793           update_callee_keys (edge_heap, where, updated_nodes);
1794           bitmap_clear (updated_nodes);
1795         }
1796       else
1797         {
1798           struct cgraph_node *outer_node = NULL;
1799           int depth = 0;
1800
1801           /* Consider the case where self recursive function A is inlined
1802              into B.  This is desired optimization in some cases, since it
1803              leads to effect similar of loop peeling and we might completely
1804              optimize out the recursive call.  However we must be extra
1805              selective.  */
1806
1807           where = edge->caller;
1808           while (where->global.inlined_to)
1809             {
1810               if (where->decl == callee->decl)
1811                 outer_node = where, depth++;
1812               where = where->callers->caller;
1813             }
1814           if (outer_node
1815               && !want_inline_self_recursive_call_p (edge, outer_node,
1816                                                      true, depth))
1817             {
1818               edge->inline_failed
1819                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1820                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1821               resolve_noninline_speculation (edge_heap, edge);
1822               continue;
1823             }
1824           else if (depth && dump_file)
1825             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1826
1827           gcc_checking_assert (!callee->global.inlined_to);
1828           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1829           if (flag_indirect_inlining)
1830             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1831
1832           reset_edge_caches (edge->callee);
1833           reset_node_growth_cache (callee);
1834
1835           update_callee_keys (edge_heap, where, updated_nodes);
1836         }
1837       where = edge->caller;
1838       if (where->global.inlined_to)
1839         where = where->global.inlined_to;
1840
1841       /* Our profitability metric can depend on local properties
1842          such as number of inlinable calls and size of the function body.
1843          After inlining these properties might change for the function we
1844          inlined into (since it's body size changed) and for the functions
1845          called by function we inlined (since number of it inlinable callers
1846          might change).  */
1847       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1848       bitmap_clear (updated_nodes);
1849
1850       if (dump_file)
1851         {
1852           fprintf (dump_file,
1853                    " Inlined into %s which now has time %i and size %i,"
1854                    "net change of %+i.\n",
1855                    edge->caller->name (),
1856                    inline_summary (edge->caller)->time,
1857                    inline_summary (edge->caller)->size,
1858                    overall_size - old_size);
1859         }
1860       if (min_size > overall_size)
1861         {
1862           min_size = overall_size;
1863           max_size = compute_max_insns (min_size);
1864
1865           if (dump_file)
1866             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1867         }
1868     }
1869
1870   free_growth_caches ();
1871   fibheap_delete (edge_heap);
1872   if (dump_file)
1873     fprintf (dump_file,
1874              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1875              initial_size, overall_size,
1876              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1877   BITMAP_FREE (updated_nodes);
1878   symtab->remove_edge_removal_hook (edge_removal_hook_holder);
1879 }
1880
1881 /* Flatten NODE.  Performed both during early inlining and
1882    at IPA inlining time.  */
1883
1884 static void
1885 flatten_function (struct cgraph_node *node, bool early)
1886 {
1887   struct cgraph_edge *e;
1888
1889   /* We shouldn't be called recursively when we are being processed.  */
1890   gcc_assert (node->aux == NULL);
1891
1892   node->aux = (void *) node;
1893
1894   for (e = node->callees; e; e = e->next_callee)
1895     {
1896       struct cgraph_node *orig_callee;
1897       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
1898
1899       /* We've hit cycle?  It is time to give up.  */
1900       if (callee->aux)
1901         {
1902           if (dump_file)
1903             fprintf (dump_file,
1904                      "Not inlining %s into %s to avoid cycle.\n",
1905                      xstrdup (callee->name ()),
1906                      xstrdup (e->caller->name ()));
1907           e->inline_failed = CIF_RECURSIVE_INLINING;
1908           continue;
1909         }
1910
1911       /* When the edge is already inlined, we just need to recurse into
1912          it in order to fully flatten the leaves.  */
1913       if (!e->inline_failed)
1914         {
1915           flatten_function (callee, early);
1916           continue;
1917         }
1918
1919       /* Flatten attribute needs to be processed during late inlining. For
1920          extra code quality we however do flattening during early optimization,
1921          too.  */
1922       if (!early
1923           ? !can_inline_edge_p (e, true)
1924           : !can_early_inline_edge_p (e))
1925         continue;
1926
1927       if (e->recursive_p ())
1928         {
1929           if (dump_file)
1930             fprintf (dump_file, "Not inlining: recursive call.\n");
1931           continue;
1932         }
1933
1934       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1935           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1936         {
1937           if (dump_file)
1938             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1939           continue;
1940         }
1941
1942       /* Inline the edge and flatten the inline clone.  Avoid
1943          recursing through the original node if the node was cloned.  */
1944       if (dump_file)
1945         fprintf (dump_file, " Inlining %s into %s.\n",
1946                  xstrdup (callee->name ()),
1947                  xstrdup (e->caller->name ()));
1948       orig_callee = callee;
1949       inline_call (e, true, NULL, NULL, false);
1950       if (e->callee != orig_callee)
1951         orig_callee->aux = (void *) node;
1952       flatten_function (e->callee, early);
1953       if (e->callee != orig_callee)
1954         orig_callee->aux = NULL;
1955     }
1956
1957   node->aux = NULL;
1958   if (!node->global.inlined_to)
1959     inline_update_overall_summary (node);
1960 }
1961
1962 /* Count number of callers of NODE and store it into DATA (that
1963    points to int.  Worker for cgraph_for_node_and_aliases.  */
1964
1965 static bool
1966 sum_callers (struct cgraph_node *node, void *data)
1967 {
1968   struct cgraph_edge *e;
1969   int *num_calls = (int *)data;
1970
1971   for (e = node->callers; e; e = e->next_caller)
1972     (*num_calls)++;
1973   return false;
1974 }
1975
1976 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1977    DATA points to number of calls originally found so we avoid infinite
1978    recursion.  */
1979
1980 static bool
1981 inline_to_all_callers (struct cgraph_node *node, void *data)
1982 {
1983   int *num_calls = (int *)data;
1984   bool callee_removed = false;
1985
1986   while (node->callers && !node->global.inlined_to)
1987     {
1988       struct cgraph_node *caller = node->callers->caller;
1989
1990       if (dump_file)
1991         {
1992           fprintf (dump_file,
1993                    "\nInlining %s size %i.\n",
1994                    node->name (),
1995                    inline_summary (node)->size);
1996           fprintf (dump_file,
1997                    " Called once from %s %i insns.\n",
1998                    node->callers->caller->name (),
1999                    inline_summary (node->callers->caller)->size);
2000         }
2001
2002       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
2003       if (dump_file)
2004         fprintf (dump_file,
2005                  " Inlined into %s which now has %i size\n",
2006                  caller->name (),
2007                  inline_summary (caller)->size);
2008       if (!(*num_calls)--)
2009         {
2010           if (dump_file)
2011             fprintf (dump_file, "New calls found; giving up.\n");
2012           return callee_removed;
2013         }
2014       if (callee_removed)
2015         return true;
2016     }
2017   return false;
2018 }
2019
2020 /* Output overall time estimate.  */
2021 static void
2022 dump_overall_stats (void)
2023 {
2024   int64_t sum_weighted = 0, sum = 0;
2025   struct cgraph_node *node;
2026
2027   FOR_EACH_DEFINED_FUNCTION (node)
2028     if (!node->global.inlined_to
2029         && !node->alias)
2030       {
2031         int time = inline_summary (node)->time;
2032         sum += time;
2033         sum_weighted += time * node->count;
2034       }
2035   fprintf (dump_file, "Overall time estimate: "
2036            "%"PRId64" weighted by profile: "
2037            "%"PRId64"\n", sum, sum_weighted);
2038 }
2039
2040 /* Output some useful stats about inlining.  */
2041
2042 static void
2043 dump_inline_stats (void)
2044 {
2045   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2046   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2047   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2048   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2049   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2050   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2051   int64_t reason[CIF_N_REASONS][3];
2052   int i;
2053   struct cgraph_node *node;
2054
2055   memset (reason, 0, sizeof (reason));
2056   FOR_EACH_DEFINED_FUNCTION (node)
2057   {
2058     struct cgraph_edge *e;
2059     for (e = node->callees; e; e = e->next_callee)
2060       {
2061         if (e->inline_failed)
2062           {
2063             reason[(int) e->inline_failed][0] += e->count;
2064             reason[(int) e->inline_failed][1] += e->frequency;
2065             reason[(int) e->inline_failed][2] ++;
2066             if (DECL_VIRTUAL_P (e->callee->decl))
2067               {
2068                 if (e->indirect_inlining_edge)
2069                   noninlined_virt_indir_cnt += e->count;
2070                 else
2071                   noninlined_virt_cnt += e->count;
2072               }
2073             else
2074               {
2075                 if (e->indirect_inlining_edge)
2076                   noninlined_indir_cnt += e->count;
2077                 else
2078                   noninlined_cnt += e->count;
2079               }
2080           }
2081         else
2082           {
2083             if (e->speculative)
2084               {
2085                 if (DECL_VIRTUAL_P (e->callee->decl))
2086                   inlined_speculative_ply += e->count;
2087                 else
2088                   inlined_speculative += e->count;
2089               }
2090             else if (DECL_VIRTUAL_P (e->callee->decl))
2091               {
2092                 if (e->indirect_inlining_edge)
2093                   inlined_virt_indir_cnt += e->count;
2094                 else
2095                   inlined_virt_cnt += e->count;
2096               }
2097             else
2098               {
2099                 if (e->indirect_inlining_edge)
2100                   inlined_indir_cnt += e->count;
2101                 else
2102                   inlined_cnt += e->count;
2103               }
2104           }
2105       }
2106     for (e = node->indirect_calls; e; e = e->next_callee)
2107       if (e->indirect_info->polymorphic)
2108         indirect_poly_cnt += e->count;
2109       else
2110         indirect_cnt += e->count;
2111   }
2112   if (max_count)
2113     {
2114       fprintf (dump_file,
2115                "Inlined %"PRId64 " + speculative "
2116                "%"PRId64 " + speculative polymorphic "
2117                "%"PRId64 " + previously indirect "
2118                "%"PRId64 " + virtual "
2119                "%"PRId64 " + virtual and previously indirect "
2120                "%"PRId64 "\n" "Not inlined "
2121                "%"PRId64 " + previously indirect "
2122                "%"PRId64 " + virtual "
2123                "%"PRId64 " + virtual and previously indirect "
2124                "%"PRId64 " + stil indirect "
2125                "%"PRId64 " + still indirect polymorphic "
2126                "%"PRId64 "\n", inlined_cnt,
2127                inlined_speculative, inlined_speculative_ply,
2128                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2129                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2130                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2131       fprintf (dump_file,
2132                "Removed speculations %"PRId64 "\n",
2133                spec_rem);
2134     }
2135   dump_overall_stats ();
2136   fprintf (dump_file, "\nWhy inlining failed?\n");
2137   for (i = 0; i < CIF_N_REASONS; i++)
2138     if (reason[i][2])
2139       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2140                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2141                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2142 }
2143
2144 /* Decide on the inlining.  We do so in the topological order to avoid
2145    expenses on updating data structures.  */
2146
2147 static unsigned int
2148 ipa_inline (void)
2149 {
2150   struct cgraph_node *node;
2151   int nnodes;
2152   struct cgraph_node **order;
2153   int i;
2154   int cold;
2155   bool remove_functions = false;
2156
2157   if (!optimize)
2158     return 0;
2159
2160   order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
2161
2162   if (in_lto_p && optimize)
2163     ipa_update_after_lto_read ();
2164
2165   if (dump_file)
2166     dump_inline_summaries (dump_file);
2167
2168   nnodes = ipa_reverse_postorder (order);
2169
2170   FOR_EACH_FUNCTION (node)
2171     node->aux = 0;
2172
2173   if (dump_file)
2174     fprintf (dump_file, "\nFlattening functions:\n");
2175
2176   /* In the first pass handle functions to be flattened.  Do this with
2177      a priority so none of our later choices will make this impossible.  */
2178   for (i = nnodes - 1; i >= 0; i--)
2179     {
2180       node = order[i];
2181
2182       /* Handle nodes to be flattened.
2183          Ideally when processing callees we stop inlining at the
2184          entry of cycles, possibly cloning that entry point and
2185          try to flatten itself turning it into a self-recursive
2186          function.  */
2187       if (lookup_attribute ("flatten",
2188                             DECL_ATTRIBUTES (node->decl)) != NULL)
2189         {
2190           if (dump_file)
2191             fprintf (dump_file,
2192                      "Flattening %s\n", node->name ());
2193           flatten_function (node, false);
2194         }
2195     }
2196   if (dump_file)
2197     dump_overall_stats ();
2198
2199   inline_small_functions ();
2200
2201   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
2202      functions and virtual functions so we really know what is called once.  */
2203   symtab->remove_unreachable_nodes (false, dump_file);
2204   free (order);
2205
2206   /* Inline functions with a property that after inlining into all callers the
2207      code size will shrink because the out-of-line copy is eliminated.
2208      We do this regardless on the callee size as long as function growth limits
2209      are met.  */
2210   if (dump_file)
2211     fprintf (dump_file,
2212              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2213
2214   /* Inlining one function called once has good chance of preventing
2215      inlining other function into the same callee.  Ideally we should
2216      work in priority order, but probably inlining hot functions first
2217      is good cut without the extra pain of maintaining the queue.
2218
2219      ??? this is not really fitting the bill perfectly: inlining function
2220      into callee often leads to better optimization of callee due to
2221      increased context for optimization.
2222      For example if main() function calls a function that outputs help
2223      and then function that does the main optmization, we should inline
2224      the second with priority even if both calls are cold by themselves.
2225
2226      We probably want to implement new predicate replacing our use of
2227      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2228      to be hot.  */
2229   for (cold = 0; cold <= 1; cold ++)
2230     {
2231       FOR_EACH_DEFINED_FUNCTION (node)
2232         {
2233           struct cgraph_edge *edge, *next;
2234           bool update=false;
2235
2236           for (edge = node->callees; edge; edge = next)
2237             {
2238               next = edge->next_callee;
2239               if (edge->speculative && !speculation_useful_p (edge, false))
2240                 {
2241                   edge->resolve_speculation ();
2242                   spec_rem += edge->count;
2243                   update = true;
2244                   remove_functions = true;
2245                 }
2246             }
2247           if (update)
2248             {
2249               struct cgraph_node *where = node->global.inlined_to
2250                                           ? node->global.inlined_to : node;
2251               reset_node_growth_cache (where);
2252               reset_edge_caches (where);
2253               inline_update_overall_summary (where);
2254             }
2255           if (flag_inline_functions_called_once
2256               && want_inline_function_to_all_callers_p (node, cold))
2257             {
2258               int num_calls = 0;
2259               node->call_for_symbol_thunks_and_aliases (sum_callers, &num_calls,
2260                                                       true);
2261               while (node->call_for_symbol_thunks_and_aliases (inline_to_all_callers,
2262                                                              &num_calls, true))
2263                 ;
2264               remove_functions = true;
2265             }
2266         }
2267     }
2268
2269   /* Free ipa-prop structures if they are no longer needed.  */
2270   if (optimize)
2271     ipa_free_all_structures_after_iinln ();
2272
2273   if (dump_file)
2274     {
2275       fprintf (dump_file,
2276                "\nInlined %i calls, eliminated %i functions\n\n",
2277                ncalls_inlined, nfunctions_inlined);
2278       dump_inline_stats ();
2279     }
2280
2281   if (dump_file)
2282     dump_inline_summaries (dump_file);
2283   /* In WPA we use inline summaries for partitioning process.  */
2284   if (!flag_wpa)
2285     inline_free_summary ();
2286   return remove_functions ? TODO_remove_functions : 0;
2287 }
2288
2289 /* Inline always-inline function calls in NODE.  */
2290
2291 static bool
2292 inline_always_inline_functions (struct cgraph_node *node)
2293 {
2294   struct cgraph_edge *e;
2295   bool inlined = false;
2296
2297   for (e = node->callees; e; e = e->next_callee)
2298     {
2299       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2300       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2301         continue;
2302
2303       if (e->recursive_p ())
2304         {
2305           if (dump_file)
2306             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2307                      e->callee->name ());
2308           e->inline_failed = CIF_RECURSIVE_INLINING;
2309           continue;
2310         }
2311
2312       if (!can_early_inline_edge_p (e))
2313         {
2314           /* Set inlined to true if the callee is marked "always_inline" but
2315              is not inlinable.  This will allow flagging an error later in
2316              expand_call_inline in tree-inline.c.  */
2317           if (lookup_attribute ("always_inline",
2318                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2319             inlined = true;
2320           continue;
2321         }
2322
2323       if (dump_file)
2324         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2325                  xstrdup (e->callee->name ()),
2326                  xstrdup (e->caller->name ()));
2327       inline_call (e, true, NULL, NULL, false);
2328       inlined = true;
2329     }
2330   if (inlined)
2331     inline_update_overall_summary (node);
2332
2333   return inlined;
2334 }
2335
2336 /* Decide on the inlining.  We do so in the topological order to avoid
2337    expenses on updating data structures.  */
2338
2339 static bool
2340 early_inline_small_functions (struct cgraph_node *node)
2341 {
2342   struct cgraph_edge *e;
2343   bool inlined = false;
2344
2345   for (e = node->callees; e; e = e->next_callee)
2346     {
2347       struct cgraph_node *callee = e->callee->ultimate_alias_target ();
2348       if (!inline_summary (callee)->inlinable
2349           || !e->inline_failed)
2350         continue;
2351
2352       /* Do not consider functions not declared inline.  */
2353       if (!DECL_DECLARED_INLINE_P (callee->decl)
2354           && !flag_inline_small_functions
2355           && !flag_inline_functions)
2356         continue;
2357
2358       if (dump_file)
2359         fprintf (dump_file, "Considering inline candidate %s.\n",
2360                  callee->name ());
2361
2362       if (!can_early_inline_edge_p (e))
2363         continue;
2364
2365       if (e->recursive_p ())
2366         {
2367           if (dump_file)
2368             fprintf (dump_file, "  Not inlining: recursive call.\n");
2369           continue;
2370         }
2371
2372       if (!want_early_inline_function_p (e))
2373         continue;
2374
2375       if (dump_file)
2376         fprintf (dump_file, " Inlining %s into %s.\n",
2377                  xstrdup (callee->name ()),
2378                  xstrdup (e->caller->name ()));
2379       inline_call (e, true, NULL, NULL, true);
2380       inlined = true;
2381     }
2382
2383   return inlined;
2384 }
2385
2386 unsigned int
2387 early_inliner (function *fun)
2388 {
2389   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2390   struct cgraph_edge *edge;
2391   unsigned int todo = 0;
2392   int iterations = 0;
2393   bool inlined = false;
2394
2395   if (seen_error ())
2396     return 0;
2397
2398   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2399      happens when some pass decides to construct new function and
2400      cgraph_add_new_function calls lowering passes and early optimization on
2401      it.  This may confuse ourself when early inliner decide to inline call to
2402      function clone, because function clones don't have parameter list in
2403      ipa-prop matching their signature.  */
2404   if (ipa_node_params_vector.exists ())
2405     return 0;
2406
2407 #ifdef ENABLE_CHECKING
2408   node->verify ();
2409 #endif
2410   node->remove_all_references ();
2411
2412   /* Even when not optimizing or not inlining inline always-inline
2413      functions.  */
2414   inlined = inline_always_inline_functions (node);
2415
2416   if (!optimize
2417       || flag_no_inline
2418       || !flag_early_inlining
2419       /* Never inline regular functions into always-inline functions
2420          during incremental inlining.  This sucks as functions calling
2421          always inline functions will get less optimized, but at the
2422          same time inlining of functions calling always inline
2423          function into an always inline function might introduce
2424          cycles of edges to be always inlined in the callgraph.
2425
2426          We might want to be smarter and just avoid this type of inlining.  */
2427       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2428     ;
2429   else if (lookup_attribute ("flatten",
2430                              DECL_ATTRIBUTES (node->decl)) != NULL)
2431     {
2432       /* When the function is marked to be flattened, recursively inline
2433          all calls in it.  */
2434       if (dump_file)
2435         fprintf (dump_file,
2436                  "Flattening %s\n", node->name ());
2437       flatten_function (node, true);
2438       inlined = true;
2439     }
2440   else
2441     {
2442       /* We iterate incremental inlining to get trivial cases of indirect
2443          inlining.  */
2444       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2445              && early_inline_small_functions (node))
2446         {
2447           timevar_push (TV_INTEGRATION);
2448           todo |= optimize_inline_calls (current_function_decl);
2449
2450           /* Technically we ought to recompute inline parameters so the new
2451              iteration of early inliner works as expected.  We however have
2452              values approximately right and thus we only need to update edge
2453              info that might be cleared out for newly discovered edges.  */
2454           for (edge = node->callees; edge; edge = edge->next_callee)
2455             {
2456               struct inline_edge_summary *es = inline_edge_summary (edge);
2457               es->call_stmt_size
2458                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2459               es->call_stmt_time
2460                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2461               if (edge->callee->decl
2462                   && !gimple_check_call_matching_types (
2463                       edge->call_stmt, edge->callee->decl, false))
2464                 edge->call_stmt_cannot_inline_p = true;
2465             }
2466           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2467             inline_update_overall_summary (node);
2468           timevar_pop (TV_INTEGRATION);
2469           iterations++;
2470           inlined = false;
2471         }
2472       if (dump_file)
2473         fprintf (dump_file, "Iterations: %i\n", iterations);
2474     }
2475
2476   if (inlined)
2477     {
2478       timevar_push (TV_INTEGRATION);
2479       todo |= optimize_inline_calls (current_function_decl);
2480       timevar_pop (TV_INTEGRATION);
2481     }
2482
2483   fun->always_inline_functions_inlined = true;
2484
2485   return todo;
2486 }
2487
2488 /* Do inlining of small functions.  Doing so early helps profiling and other
2489    passes to be somewhat more effective and avoids some code duplication in
2490    later real inlining pass for testcases with very many function calls.  */
2491
2492 namespace {
2493
2494 const pass_data pass_data_early_inline =
2495 {
2496   GIMPLE_PASS, /* type */
2497   "einline", /* name */
2498   OPTGROUP_INLINE, /* optinfo_flags */
2499   TV_EARLY_INLINING, /* tv_id */
2500   PROP_ssa, /* properties_required */
2501   0, /* properties_provided */
2502   0, /* properties_destroyed */
2503   0, /* todo_flags_start */
2504   0, /* todo_flags_finish */
2505 };
2506
2507 class pass_early_inline : public gimple_opt_pass
2508 {
2509 public:
2510   pass_early_inline (gcc::context *ctxt)
2511     : gimple_opt_pass (pass_data_early_inline, ctxt)
2512   {}
2513
2514   /* opt_pass methods: */
2515   virtual unsigned int execute (function *);
2516
2517 }; // class pass_early_inline
2518
2519 unsigned int
2520 pass_early_inline::execute (function *fun)
2521 {
2522   return early_inliner (fun);
2523 }
2524
2525 } // anon namespace
2526
2527 gimple_opt_pass *
2528 make_pass_early_inline (gcc::context *ctxt)
2529 {
2530   return new pass_early_inline (ctxt);
2531 }
2532
2533 namespace {
2534
2535 const pass_data pass_data_ipa_inline =
2536 {
2537   IPA_PASS, /* type */
2538   "inline", /* name */
2539   OPTGROUP_INLINE, /* optinfo_flags */
2540   TV_IPA_INLINING, /* tv_id */
2541   0, /* properties_required */
2542   0, /* properties_provided */
2543   0, /* properties_destroyed */
2544   0, /* todo_flags_start */
2545   ( TODO_dump_symtab ), /* todo_flags_finish */
2546 };
2547
2548 class pass_ipa_inline : public ipa_opt_pass_d
2549 {
2550 public:
2551   pass_ipa_inline (gcc::context *ctxt)
2552     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2553                       inline_generate_summary, /* generate_summary */
2554                       inline_write_summary, /* write_summary */
2555                       inline_read_summary, /* read_summary */
2556                       NULL, /* write_optimization_summary */
2557                       NULL, /* read_optimization_summary */
2558                       NULL, /* stmt_fixup */
2559                       0, /* function_transform_todo_flags_start */
2560                       inline_transform, /* function_transform */
2561                       NULL) /* variable_transform */
2562   {}
2563
2564   /* opt_pass methods: */
2565   virtual unsigned int execute (function *) { return ipa_inline (); }
2566
2567 }; // class pass_ipa_inline
2568
2569 } // anon namespace
2570
2571 ipa_opt_pass_d *
2572 make_pass_ipa_inline (gcc::context *ctxt)
2573 {
2574   return new pass_ipa_inline (ctxt);
2575 }