gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "tree-inline.h"
  98 #include "langhooks.h"
  99 #include "flags.h"
 100 #include "cgraph.h"
 101 #include "diagnostic.h"
 102 #include "gimple-pretty-print.h"
 103 #include "params.h"
 104 #include "fibheap.h"
 105 #include "intl.h"
 106 #include "tree-pass.h"
 107 #include "coverage.h"
 108 #include "ggc.h"
 109 #include "rtl.h"
 110 #include "tree-ssa.h"
 111 #include "ipa-prop.h"
 112 #include "except.h"
 113 #include "target.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117
 118 /* Statistics we collect about inlining algorithm.  */
 119 static int overall_size;
 120 static gcov_type max_count;
 121 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 122
 123 /* Return false when inlining edge E would lead to violating
 124    limits on function unit growth or stack usage growth.
 125
 126    The relative function body growth limit is present generally
 127    to avoid problems with non-linear behavior of the compiler.
 128    To allow inlining huge functions into tiny wrapper, the limit
 129    is always based on the bigger of the two functions considered.
 130
 131    For stack growth limits we always base the growth in stack usage
 132    of the callers.  We want to prevent applications from segfaulting
 133    on stack overflow when functions with huge stack frames gets
 134    inlined. */
 135
 136 static bool
 137 caller_growth_limits (struct cgraph_edge *e)
 138 {
 139   struct cgraph_node *to = e->caller;
 140   struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
 141   int newsize;
 142   int limit = 0;
 143   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 144   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 145
 146   /* Look for function e->caller is inlined to.  While doing
 147      so work out the largest function body on the way.  As
 148      described above, we want to base our function growth
 149      limits based on that.  Not on the self size of the
 150      outer function, not on the self size of inline code
 151      we immediately inline to.  This is the most relaxed
 152      interpretation of the rule "do not grow large functions
 153      too much in order to prevent compiler from exploding".  */
 154   while (true)
 155     {
 156       info = inline_summary (to);
 157       if (limit < info->self_size)
 158         limit = info->self_size;
 159       if (stack_size_limit < info->estimated_self_stack_size)
 160         stack_size_limit = info->estimated_self_stack_size;
 161       if (to->global.inlined_to)
 162         to = to->callers->caller;
 163       else
 164         break;
 165     }
 166
 167   what_info = inline_summary (what);
 168
 169   if (limit < what_info->self_size)
 170     limit = what_info->self_size;
 171
 172   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 173
 174   /* Check the size after inlining against the function limits.  But allow
 175      the function to shrink if it went over the limits by forced inlining.  */
 176   newsize = estimate_size_after_inlining (to, e);
 177   if (newsize >= info->size
 178       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 179       && newsize > limit)
 180     {
 181       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 182       return false;
 183     }
 184
 185   if (!what_info->estimated_stack_size)
 186     return true;
 187
 188   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 189      due to large i/o datastructures used by the Fortran front-end.
 190      We ought to ignore this limit when we know that the edge is executed
 191      on every invocation of the caller (i.e. its call statement dominates
 192      exit block).  We do not track this information, yet.  */
 193   stack_size_limit += ((gcov_type)stack_size_limit
 194                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 195
 196   inlined_stack = (outer_info->stack_frame_offset
 197                    + outer_info->estimated_self_stack_size
 198                    + what_info->estimated_stack_size);
 199   /* Check new stack consumption with stack consumption at the place
 200      stack is used.  */
 201   if (inlined_stack > stack_size_limit
 202       /* If function already has large stack usage from sibling
 203          inline call, we can inline, too.
 204          This bit overoptimistically assume that we are good at stack
 205          packing.  */
 206       && inlined_stack > info->estimated_stack_size
 207       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 208     {
 209       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 210       return false;
 211     }
 212   return true;
 213 }
 214
 215 /* Dump info about why inlining has failed.  */
 216
 217 static void
 218 report_inline_failed_reason (struct cgraph_edge *e)
 219 {
 220   if (dump_file)
 221     {
 222       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 223                xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
 224                xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
 225                cgraph_inline_failed_string (e->inline_failed));
 226     }
 227 }
 228
 229 /* Decide if we can inline the edge and possibly update
 230    inline_failed reason.
 231    We check whether inlining is possible at all and whether
 232    caller growth limits allow doing so.
 233
 234    if REPORT is true, output reason to the dump file.
 235
 236    if DISREGARD_LIMITES is true, ignore size limits.*/
 237
 238 static bool
 239 can_inline_edge_p (struct cgraph_edge *e, bool report,
 240                    bool disregard_limits = false)
 241 {
 242   bool inlinable = true;
 243   enum availability avail;
 244   struct cgraph_node *callee
 245     = cgraph_function_or_thunk_node (e->callee, &avail);
 246   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->symbol.decl);
 247   tree callee_tree
 248     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->symbol.decl) : NULL;
 249   struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->symbol.decl);
 250   struct function *callee_cfun
 251     = callee ? DECL_STRUCT_FUNCTION (callee->symbol.decl) : NULL;
 252
 253   if (!caller_cfun && e->caller->clone_of)
 254     caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->symbol.decl);
 255
 256   if (!callee_cfun && callee && callee->clone_of)
 257     callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->symbol.decl);
 258
 259   gcc_assert (e->inline_failed);
 260
 261   if (!callee || !callee->symbol.definition)
 262     {
 263       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 264       inlinable = false;
 265     }
 266   else if (!inline_summary (callee)->inlinable)
 267     {
 268       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 269       inlinable = false;
 270     }
 271   else if (avail <= AVAIL_OVERWRITABLE)
 272     {
 273       e->inline_failed = CIF_OVERWRITABLE;
 274       inlinable = false;
 275     }
 276   else if (e->call_stmt_cannot_inline_p)
 277     {
 278       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 279         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 280       inlinable = false;
 281     }
 282   /* Don't inline if the functions have different EH personalities.  */
 283   else if (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
 284            && DECL_FUNCTION_PERSONALITY (callee->symbol.decl)
 285            && (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
 286                != DECL_FUNCTION_PERSONALITY (callee->symbol.decl)))
 287     {
 288       e->inline_failed = CIF_EH_PERSONALITY;
 289       inlinable = false;
 290     }
 291   /* TM pure functions should not be inlined into non-TM_pure
 292      functions.  */
 293   else if (is_tm_pure (callee->symbol.decl)
 294            && !is_tm_pure (e->caller->symbol.decl))
 295     {
 296       e->inline_failed = CIF_UNSPECIFIED;
 297       inlinable = false;
 298     }
 299   /* Don't inline if the callee can throw non-call exceptions but the
 300      caller cannot.
 301      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 302      Move the flag into cgraph node or mirror it in the inline summary.  */
 303   else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
 304            && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
 305     {
 306       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 307       inlinable = false;
 308     }
 309   /* Check compatibility of target optimization options.  */
 310   else if (!targetm.target_option.can_inline_p (e->caller->symbol.decl,
 311                                                 callee->symbol.decl))
 312     {
 313       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 314       inlinable = false;
 315     }
 316   /* Check if caller growth allows the inlining.  */
 317   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
 318            && !disregard_limits
 319            && !lookup_attribute ("flatten",
 320                                  DECL_ATTRIBUTES
 321                                    (e->caller->global.inlined_to
 322                                     ? e->caller->global.inlined_to->symbol.decl
 323                                     : e->caller->symbol.decl))
 324            && !caller_growth_limits (e))
 325     inlinable = false;
 326   /* Don't inline a function with a higher optimization level than the
 327      caller.  FIXME: this is really just tip of iceberg of handling
 328      optimization attribute.  */
 329   else if (caller_tree != callee_tree)
 330     {
 331       struct cl_optimization *caller_opt
 332         = TREE_OPTIMIZATION ((caller_tree)
 333                              ? caller_tree
 334                              : optimization_default_node);
 335
 336       struct cl_optimization *callee_opt
 337         = TREE_OPTIMIZATION ((callee_tree)
 338                              ? callee_tree
 339                              : optimization_default_node);
 340
 341       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 342            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 343           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 344           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->symbol.decl))
 345         {
 346           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 347           inlinable = false;
 348         }
 349     }
 350
 351   if (!inlinable && report)
 352     report_inline_failed_reason (e);
 353   return inlinable;
 354 }
 355
 356
 357 /* Return true if the edge E is inlinable during early inlining.  */
 358
 359 static bool
 360 can_early_inline_edge_p (struct cgraph_edge *e)
 361 {
 362   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
 363                                                               NULL);
 364   /* Early inliner might get called at WPA stage when IPA pass adds new
 365      function.  In this case we can not really do any of early inlining
 366      because function bodies are missing.  */
 367   if (!gimple_has_body_p (callee->symbol.decl))
 368     {
 369       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 370       return false;
 371     }
 372   /* In early inliner some of callees may not be in SSA form yet
 373      (i.e. the callgraph is cyclic and we did not process
 374      the callee by early inliner, yet).  We don't have CIF code for this
 375      case; later we will re-do the decision in the real inliner.  */
 376   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->symbol.decl))
 377       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
 378     {
 379       if (dump_file)
 380         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 381       return false;
 382     }
 383   if (!can_inline_edge_p (e, true))
 384     return false;
 385   return true;
 386 }
 387
 388
 389 /* Return number of calls in N.  Ignore cheap builtins.  */
 390
 391 static int
 392 num_calls (struct cgraph_node *n)
 393 {
 394   struct cgraph_edge *e;
 395   int num = 0;
 396
 397   for (e = n->callees; e; e = e->next_callee)
 398     if (!is_inexpensive_builtin (e->callee->symbol.decl))
 399       num++;
 400   return num;
 401 }
 402
 403
 404 /* Return true if we are interested in inlining small function.  */
 405
 406 static bool
 407 want_early_inline_function_p (struct cgraph_edge *e)
 408 {
 409   bool want_inline = true;
 410   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 411
 412   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 413     ;
 414   else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 415            && !flag_inline_small_functions)
 416     {
 417       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 418       report_inline_failed_reason (e);
 419       want_inline = false;
 420     }
 421   else
 422     {
 423       int growth = estimate_edge_growth (e);
 424       int n;
 425
 426       if (growth <= 0)
 427         ;
 428       else if (!cgraph_maybe_hot_edge_p (e)
 429                && growth > 0)
 430         {
 431           if (dump_file)
 432             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 433                      "call is cold and code would grow by %i\n",
 434                      xstrdup (cgraph_node_name (e->caller)),
 435                      e->caller->symbol.order,
 436                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 437                      growth);
 438           want_inline = false;
 439         }
 440       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 441         {
 442           if (dump_file)
 443             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 444                      "growth %i exceeds --param early-inlining-insns\n",
 445                      xstrdup (cgraph_node_name (e->caller)),
 446                      e->caller->symbol.order,
 447                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 448                      growth);
 449           want_inline = false;
 450         }
 451       else if ((n = num_calls (callee)) != 0
 452                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 453         {
 454           if (dump_file)
 455             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 456                      "growth %i exceeds --param early-inlining-insns "
 457                      "divided by number of calls\n",
 458                      xstrdup (cgraph_node_name (e->caller)),
 459                      e->caller->symbol.order,
 460                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 461                      growth);
 462           want_inline = false;
 463         }
 464     }
 465   return want_inline;
 466 }
 467
 468 /* Compute time of the edge->caller + edge->callee execution when inlining
 469    does not happen.  */
 470
 471 inline gcov_type
 472 compute_uninlined_call_time (struct inline_summary *callee_info,
 473                              struct cgraph_edge *edge)
 474 {
 475   gcov_type uninlined_call_time =
 476     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 477           CGRAPH_FREQ_BASE);
 478   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 479                                           ? edge->caller->global.inlined_to
 480                                           : edge->caller)->time;
 481   return uninlined_call_time + caller_time;
 482 }
 483
 484 /* Same as compute_uinlined_call_time but compute time when inlining
 485    does happen.  */
 486
 487 inline gcov_type
 488 compute_inlined_call_time (struct cgraph_edge *edge,
 489                            int edge_time)
 490 {
 491   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 492                                           ? edge->caller->global.inlined_to
 493                                           : edge->caller)->time;
 494   gcov_type time = (caller_time
 495                     + RDIV (((gcov_type) edge_time
 496                              - inline_edge_summary (edge)->call_stmt_time)
 497                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 498   /* Possible one roundoff error, but watch for overflows.  */
 499   gcc_checking_assert (time >= INT_MIN / 2);
 500   if (time < 0)
 501     time = 0;
 502   return time;
 503 }
 504
 505 /* Return true if the speedup for inlining E is bigger than
 506    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 507
 508 static bool
 509 big_speedup_p (struct cgraph_edge *e)
 510 {
 511   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 512                                                 e);
 513   gcov_type inlined_time = compute_inlined_call_time (e,
 514                                                       estimate_edge_time (e));
 515   if (time - inlined_time
 516       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 517     return true;
 518   return false;
 519 }
 520
 521 /* Return true if we are interested in inlining small function.
 522    When REPORT is true, report reason to dump file.  */
 523
 524 static bool
 525 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 526 {
 527   bool want_inline = true;
 528   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 529
 530   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 531     ;
 532   else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 533            && !flag_inline_small_functions)
 534     {
 535       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 536       want_inline = false;
 537     }
 538   else
 539     {
 540       int growth = estimate_edge_growth (e);
 541       inline_hints hints = estimate_edge_hints (e);
 542       bool big_speedup = big_speedup_p (e);
 543
 544       if (growth <= 0)
 545         ;
 546       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 547          hints suggests that inlining given function is very profitable.  */
 548       else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
 549                && growth >= MAX_INLINE_INSNS_SINGLE
 550                && !big_speedup
 551                && !(hints & (INLINE_HINT_indirect_call
 552                              | INLINE_HINT_loop_iterations
 553                              | INLINE_HINT_array_index
 554                              | INLINE_HINT_loop_stride)))
 555         {
 556           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 557           want_inline = false;
 558         }
 559       /* Before giving up based on fact that caller size will grow, allow
 560          functions that are called few times and eliminating the offline
 561          copy will lead to overall code size reduction.
 562          Not all of these will be handled by subsequent inlining of functions
 563          called once: in particular weak functions are not handled or funcitons
 564          that inline to multiple calls but a lot of bodies is optimized out.
 565          Finally we want to inline earlier to allow inlining of callbacks.
 566
 567          This is slightly wrong on aggressive side:  it is entirely possible
 568          that function is called many times with a context where inlining
 569          reduces code size and few times with a context where inlining increase
 570          code size.  Resoluting growth estimate will be negative even if it
 571          would make more sense to keep offline copy and do not inline into the
 572          call sites that makes the code size grow.
 573
 574          When badness orders the calls in a way that code reducing calls come
 575          first, this situation is not a problem at all: after inlining all
 576          "good" calls, we will realize that keeping the function around is
 577          better.  */
 578       else if (growth <= MAX_INLINE_INSNS_SINGLE
 579                /* Unlike for functions called once, we play unsafe with
 580                   COMDATs.  We can allow that since we know functions
 581                   in consideration are small (and thus risk is small) and
 582                   moreover grow estimates already accounts that COMDAT
 583                   functions may or may not disappear when eliminated from
 584                   current unit. With good probability making aggressive
 585                   choice in all units is going to make overall program
 586                   smaller.
 587
 588                   Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 589                   instead of
 590                   cgraph_will_be_removed_from_program_if_no_direct_calls  */
 591                 && !DECL_EXTERNAL (callee->symbol.decl)
 592                 && cgraph_can_remove_if_no_direct_calls_p (callee)
 593                 && estimate_growth (callee) <= 0)
 594         ;
 595       else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 596                && !flag_inline_functions)
 597         {
 598           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 599           want_inline = false;
 600         }
 601       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 602          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 603          inlining given function is very profitable.  */
 604       else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 605                && !big_speedup
 606                && growth >= ((hints & (INLINE_HINT_indirect_call
 607                                        | INLINE_HINT_loop_iterations
 608                                        | INLINE_HINT_array_index
 609                                        | INLINE_HINT_loop_stride))
 610                              ? MAX (MAX_INLINE_INSNS_AUTO,
 611                                     MAX_INLINE_INSNS_SINGLE)
 612                              : MAX_INLINE_INSNS_AUTO))
 613         {
 614           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 615           want_inline = false;
 616         }
 617       /* If call is cold, do not inline when function body would grow. */
 618       else if (!cgraph_maybe_hot_edge_p (e))
 619         {
 620           e->inline_failed = CIF_UNLIKELY_CALL;
 621           want_inline = false;
 622         }
 623     }
 624   if (!want_inline && report)
 625     report_inline_failed_reason (e);
 626   return want_inline;
 627 }
 628
 629 /* EDGE is self recursive edge.
 630    We hand two cases - when function A is inlining into itself
 631    or when function A is being inlined into another inliner copy of function
 632    A within function B.
 633
 634    In first case OUTER_NODE points to the toplevel copy of A, while
 635    in the second case OUTER_NODE points to the outermost copy of A in B.
 636
 637    In both cases we want to be extra selective since
 638    inlining the call will just introduce new recursive calls to appear.  */
 639
 640 static bool
 641 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 642                                    struct cgraph_node *outer_node,
 643                                    bool peeling,
 644                                    int depth)
 645 {
 646   char const *reason = NULL;
 647   bool want_inline = true;
 648   int caller_freq = CGRAPH_FREQ_BASE;
 649   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 650
 651   if (DECL_DECLARED_INLINE_P (edge->caller->symbol.decl))
 652     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 653
 654   if (!cgraph_maybe_hot_edge_p (edge))
 655     {
 656       reason = "recursive call is cold";
 657       want_inline = false;
 658     }
 659   else if (max_count && !outer_node->count)
 660     {
 661       reason = "not executed in profile";
 662       want_inline = false;
 663     }
 664   else if (depth > max_depth)
 665     {
 666       reason = "--param max-inline-recursive-depth exceeded.";
 667       want_inline = false;
 668     }
 669
 670   if (outer_node->global.inlined_to)
 671     caller_freq = outer_node->callers->frequency;
 672
 673   if (!want_inline)
 674     ;
 675   /* Inlining of self recursive function into copy of itself within other function
 676      is transformation similar to loop peeling.
 677
 678      Peeling is profitable if we can inline enough copies to make probability
 679      of actual call to the self recursive function very small.  Be sure that
 680      the probability of recursion is small.
 681
 682      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 683      This way the expected number of recision is at most max_depth.  */
 684   else if (peeling)
 685     {
 686       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 687                                          / max_depth);
 688       int i;
 689       for (i = 1; i < depth; i++)
 690         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 691       if (max_count
 692           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 693               >= max_prob))
 694         {
 695           reason = "profile of recursive call is too large";
 696           want_inline = false;
 697         }
 698       if (!max_count
 699           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 700               >= max_prob))
 701         {
 702           reason = "frequency of recursive call is too large";
 703           want_inline = false;
 704         }
 705     }
 706   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 707      depth is large.  We reduce function call overhead and increase chances that
 708      things fit in hardware return predictor.
 709
 710      Recursive inlining might however increase cost of stack frame setup
 711      actually slowing down functions whose recursion tree is wide rather than
 712      deep.
 713
 714      Deciding reliably on when to do recursive inlining without profile feedback
 715      is tricky.  For now we disable recursive inlining when probability of self
 716      recursion is low.
 717
 718      Recursive inlining of self recursive call within loop also results in large loop
 719      depths that generally optimize badly.  We may want to throttle down inlining
 720      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 721      methods.  */
 722   else
 723     {
 724       if (max_count
 725           && (edge->count * 100 / outer_node->count
 726               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 727         {
 728           reason = "profile of recursive call is too small";
 729           want_inline = false;
 730         }
 731       else if (!max_count
 732                && (edge->frequency * 100 / caller_freq
 733                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 734         {
 735           reason = "frequency of recursive call is too small";
 736           want_inline = false;
 737         }
 738     }
 739   if (!want_inline && dump_file)
 740     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 741   return want_inline;
 742 }
 743
 744 /* Return true when NODE has uninlinable caller;
 745    set HAS_HOT_CALL if it has hot call.
 746    Worker for cgraph_for_node_and_aliases.  */
 747
 748 static bool
 749 check_callers (struct cgraph_node *node, void *has_hot_call)
 750 {
 751   struct cgraph_edge *e;
 752    for (e = node->callers; e; e = e->next_caller)
 753      {
 754        if (!can_inline_edge_p (e, true))
 755          return true;
 756        if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
 757          *(bool *)has_hot_call = true;
 758      }
 759   return false;
 760 }
 761
 762 /* If NODE has a caller, return true.  */
 763
 764 static bool
 765 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 766 {
 767   if (node->callers)
 768     return true;
 769   return false;
 770 }
 771
 772 /* Decide if inlining NODE would reduce unit size by eliminating
 773    the offline copy of function.
 774    When COLD is true the cold calls are considered, too.  */
 775
 776 static bool
 777 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 778 {
 779    struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
 780    bool has_hot_call = false;
 781
 782    /* Does it have callers?  */
 783    if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
 784      return false;
 785    /* Already inlined?  */
 786    if (function->global.inlined_to)
 787      return false;
 788    if (cgraph_function_or_thunk_node (node, NULL) != node)
 789      return false;
 790    /* Inlining into all callers would increase size?  */
 791    if (estimate_growth (node) > 0)
 792      return false;
 793    /* All inlines must be possible.  */
 794    if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
 795      return false;
 796    if (!cold && !has_hot_call)
 797      return false;
 798    return true;
 799 }
 800
 801 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 802
 803 /* Return relative time improvement for inlining EDGE in range
 804    1...RELATIVE_TIME_BENEFIT_RANGE  */
 805
 806 static inline int
 807 relative_time_benefit (struct inline_summary *callee_info,
 808                        struct cgraph_edge *edge,
 809                        int edge_time)
 810 {
 811   gcov_type relbenefit;
 812   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 813   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 814
 815   /* Inlining into extern inline function is not a win.  */
 816   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 817                      ? edge->caller->global.inlined_to->symbol.decl
 818                      : edge->caller->symbol.decl))
 819     return 1;
 820
 821   /* Watch overflows.  */
 822   gcc_checking_assert (uninlined_call_time >= 0);
 823   gcc_checking_assert (inlined_call_time >= 0);
 824   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 825
 826   /* Compute relative time benefit, i.e. how much the call becomes faster.
 827      ??? perhaps computing how much the caller+calle together become faster
 828      would lead to more realistic results.  */
 829   if (!uninlined_call_time)
 830     uninlined_call_time = 1;
 831   relbenefit =
 832     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 833           uninlined_call_time);
 834   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 835   gcc_checking_assert (relbenefit >= 0);
 836   relbenefit = MAX (relbenefit, 1);
 837   return relbenefit;
 838 }
 839
 840
 841 /* A cost model driving the inlining heuristics in a way so the edges with
 842    smallest badness are inlined first.  After each inlining is performed
 843    the costs of all caller edges of nodes affected are recomputed so the
 844    metrics may accurately depend on values such as number of inlinable callers
 845    of the function or function body size.  */
 846
 847 static int
 848 edge_badness (struct cgraph_edge *edge, bool dump)
 849 {
 850   gcov_type badness;
 851   int growth, edge_time;
 852   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 853                                                               NULL);
 854   struct inline_summary *callee_info = inline_summary (callee);
 855   inline_hints hints;
 856
 857   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 858     return INT_MIN;
 859
 860   growth = estimate_edge_growth (edge);
 861   edge_time = estimate_edge_time (edge);
 862   hints = estimate_edge_hints (edge);
 863   gcc_checking_assert (edge_time >= 0);
 864   gcc_checking_assert (edge_time <= callee_info->time);
 865   gcc_checking_assert (growth <= callee_info->size);
 866
 867   if (dump)
 868     {
 869       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 870                xstrdup (cgraph_node_name (edge->caller)),
 871                edge->caller->symbol.order,
 872                xstrdup (cgraph_node_name (callee)),
 873                edge->callee->symbol.order);
 874       fprintf (dump_file, "      size growth %i, time %i ",
 875                growth,
 876                edge_time);
 877       dump_inline_hints (dump_file, hints);
 878       if (big_speedup_p (edge))
 879         fprintf (dump_file, " big_speedup");
 880       fprintf (dump_file, "\n");
 881     }
 882
 883   /* Always prefer inlining saving code size.  */
 884   if (growth <= 0)
 885     {
 886       badness = INT_MIN / 2 + growth;
 887       if (dump)
 888         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 889                  growth);
 890     }
 891
 892   /* When profiling is available, compute badness as:
 893
 894                 relative_edge_count * relative_time_benefit
 895      goodness = -------------------------------------------
 896                 growth_f_caller
 897      badness = -goodness
 898
 899     The fraction is upside down, because on edge counts and time beneits
 900     the bounds are known. Edge growth is essentially unlimited.  */
 901
 902   else if (max_count)
 903     {
 904       sreal tmp, relbenefit_real, growth_real;
 905       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 906
 907       sreal_init(&relbenefit_real, relbenefit, 0);
 908       sreal_init(&growth_real, growth, 0);
 909
 910       /* relative_edge_count.  */
 911       sreal_init (&tmp, edge->count, 0);
 912       sreal_div (&tmp, &tmp, &max_count_real);
 913
 914       /* relative_time_benefit.  */
 915       sreal_mul (&tmp, &tmp, &relbenefit_real);
 916       sreal_div (&tmp, &tmp, &max_relbenefit_real);
 917
 918       /* growth_f_caller.  */
 919       sreal_mul (&tmp, &tmp, &half_int_min_real);
 920       sreal_div (&tmp, &tmp, &growth_real);
 921
 922       badness = -1 * sreal_to_int (&tmp);
 923
 924       /* Be sure that insanity of the profile won't lead to increasing counts
 925          in the scalling and thus to overflow in the computation above.  */
 926       gcc_assert (max_count >= edge->count);
 927       if (dump)
 928         {
 929           fprintf (dump_file,
 930                    "      %i (relative %f): profile info. Relative count %f"
 931                    " * Relative benefit %f\n",
 932                    (int) badness, (double) badness / INT_MIN,
 933                    (double) edge->count / max_count,
 934                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 935         }
 936     }
 937
 938   /* When function local profile is available. Compute badness as:
 939
 940                  relative_time_benefit
 941      goodness =  ---------------------------------
 942                  growth_of_caller * overall_growth
 943
 944      badness = - goodness
 945
 946      compensated by the inline hints.
 947   */
 948   else if (flag_guess_branch_prob)
 949     {
 950       badness = (relative_time_benefit (callee_info, edge, edge_time)
 951                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
 952       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
 953       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
 954       if ((hints & (INLINE_HINT_indirect_call
 955                     | INLINE_HINT_loop_iterations
 956                     | INLINE_HINT_array_index
 957                     | INLINE_HINT_loop_stride))
 958           || callee_info->growth <= 0)
 959         badness *= 8;
 960       if (hints & (INLINE_HINT_same_scc))
 961         badness /= 16;
 962       else if (hints & (INLINE_HINT_in_scc))
 963         badness /= 8;
 964       else if (hints & (INLINE_HINT_cross_module))
 965         badness /= 2;
 966       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
 967       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
 968         badness *= 16;
 969       if (dump)
 970         {
 971           fprintf (dump_file,
 972                    "      %i: guessed profile. frequency %f,"
 973                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
 974                    " overall growth %i (current) %i (original)\n",
 975                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
 976                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
 977                    / RELATIVE_TIME_BENEFIT_RANGE,
 978                    (int)compute_uninlined_call_time (callee_info, edge),
 979                    (int)compute_inlined_call_time (edge, edge_time),
 980                    estimate_growth (callee),
 981                    callee_info->growth);
 982         }
 983     }
 984   /* When function local profile is not available or it does not give
 985      useful information (ie frequency is zero), base the cost on
 986      loop nest and overall size growth, so we optimize for overall number
 987      of functions fully inlined in program.  */
 988   else
 989     {
 990       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 991       badness = growth * 256;
 992
 993       /* Decrease badness if call is nested.  */
 994       if (badness > 0)
 995         badness >>= nest;
 996       else
 997         {
 998           badness <<= nest;
 999         }
1000       if (dump)
1001         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
1002                  nest);
1003     }
1004
1005   /* Ensure that we did not overflow in all the fixed point math above.  */
1006   gcc_assert (badness >= INT_MIN);
1007   gcc_assert (badness <= INT_MAX - 1);
1008   /* Make recursive inlining happen always after other inlining is done.  */
1009   if (cgraph_edge_recursive_p (edge))
1010     return badness + 1;
1011   else
1012     return badness;
1013 }
1014
1015 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1016 static inline void
1017 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1018 {
1019   int badness = edge_badness (edge, false);
1020   if (edge->aux)
1021     {
1022       fibnode_t n = (fibnode_t) edge->aux;
1023       gcc_checking_assert (n->data == edge);
1024
1025       /* fibheap_replace_key only decrease the keys.
1026          When we increase the key we do not update heap
1027          and instead re-insert the element once it becomes
1028          a minimum of heap.  */
1029       if (badness < n->key)
1030         {
1031           if (dump_file && (dump_flags & TDF_DETAILS))
1032             {
1033               fprintf (dump_file,
1034                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1035                        xstrdup (cgraph_node_name (edge->caller)),
1036                        edge->caller->symbol.order,
1037                        xstrdup (cgraph_node_name (edge->callee)),
1038                        edge->callee->symbol.order,
1039                        (int)n->key,
1040                        badness);
1041             }
1042           fibheap_replace_key (heap, n, badness);
1043           gcc_checking_assert (n->key == badness);
1044         }
1045     }
1046   else
1047     {
1048        if (dump_file && (dump_flags & TDF_DETAILS))
1049          {
1050            fprintf (dump_file,
1051                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1052                     xstrdup (cgraph_node_name (edge->caller)),
1053                     edge->caller->symbol.order,
1054                     xstrdup (cgraph_node_name (edge->callee)),
1055                     edge->callee->symbol.order,
1056                     badness);
1057          }
1058       edge->aux = fibheap_insert (heap, badness, edge);
1059     }
1060 }
1061
1062
1063 /* NODE was inlined.
1064    All caller edges needs to be resetted because
1065    size estimates change. Similarly callees needs reset
1066    because better context may be known.  */
1067
1068 static void
1069 reset_edge_caches (struct cgraph_node *node)
1070 {
1071   struct cgraph_edge *edge;
1072   struct cgraph_edge *e = node->callees;
1073   struct cgraph_node *where = node;
1074   int i;
1075   struct ipa_ref *ref;
1076
1077   if (where->global.inlined_to)
1078     where = where->global.inlined_to;
1079
1080   /* WHERE body size has changed, the cached growth is invalid.  */
1081   reset_node_growth_cache (where);
1082
1083   for (edge = where->callers; edge; edge = edge->next_caller)
1084     if (edge->inline_failed)
1085       reset_edge_growth_cache (edge);
1086   for (i = 0; ipa_ref_list_referring_iterate (&where->symbol.ref_list,
1087                                               i, ref); i++)
1088     if (ref->use == IPA_REF_ALIAS)
1089       reset_edge_caches (ipa_ref_referring_node (ref));
1090
1091   if (!e)
1092     return;
1093
1094   while (true)
1095     if (!e->inline_failed && e->callee->callees)
1096       e = e->callee->callees;
1097     else
1098       {
1099         if (e->inline_failed)
1100           reset_edge_growth_cache (e);
1101         if (e->next_callee)
1102           e = e->next_callee;
1103         else
1104           {
1105             do
1106               {
1107                 if (e->caller == node)
1108                   return;
1109                 e = e->caller->callers;
1110               }
1111             while (!e->next_callee);
1112             e = e->next_callee;
1113           }
1114       }
1115 }
1116
1117 /* Recompute HEAP nodes for each of caller of NODE.
1118    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1119    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1120    it is inlinable. Otherwise check all edges.  */
1121
1122 static void
1123 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1124                     bitmap updated_nodes,
1125                     struct cgraph_edge *check_inlinablity_for)
1126 {
1127   struct cgraph_edge *edge;
1128   int i;
1129   struct ipa_ref *ref;
1130
1131   if ((!node->symbol.alias && !inline_summary (node)->inlinable)
1132       || node->global.inlined_to)
1133     return;
1134   if (!bitmap_set_bit (updated_nodes, node->uid))
1135     return;
1136
1137   for (i = 0; ipa_ref_list_referring_iterate (&node->symbol.ref_list,
1138                                               i, ref); i++)
1139     if (ref->use == IPA_REF_ALIAS)
1140       {
1141         struct cgraph_node *alias = ipa_ref_referring_node (ref);
1142         update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1143       }
1144
1145   for (edge = node->callers; edge; edge = edge->next_caller)
1146     if (edge->inline_failed)
1147       {
1148         if (!check_inlinablity_for
1149             || check_inlinablity_for == edge)
1150           {
1151             if (can_inline_edge_p (edge, false)
1152                 && want_inline_small_function_p (edge, false))
1153               update_edge_key (heap, edge);
1154             else if (edge->aux)
1155               {
1156                 report_inline_failed_reason (edge);
1157                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1158                 edge->aux = NULL;
1159               }
1160           }
1161         else if (edge->aux)
1162           update_edge_key (heap, edge);
1163       }
1164 }
1165
1166 /* Recompute HEAP nodes for each uninlined call in NODE.
1167    This is used when we know that edge badnesses are going only to increase
1168    (we introduced new call site) and thus all we need is to insert newly
1169    created edges into heap.  */
1170
1171 static void
1172 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1173                     bitmap updated_nodes)
1174 {
1175   struct cgraph_edge *e = node->callees;
1176
1177   if (!e)
1178     return;
1179   while (true)
1180     if (!e->inline_failed && e->callee->callees)
1181       e = e->callee->callees;
1182     else
1183       {
1184         enum availability avail;
1185         struct cgraph_node *callee;
1186         /* We do not reset callee growth cache here.  Since we added a new call,
1187            growth chould have just increased and consequentely badness metric
1188            don't need updating.  */
1189         if (e->inline_failed
1190             && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1191             && inline_summary (callee)->inlinable
1192             && avail >= AVAIL_AVAILABLE
1193             && !bitmap_bit_p (updated_nodes, callee->uid))
1194           {
1195             if (can_inline_edge_p (e, false)
1196                 && want_inline_small_function_p (e, false))
1197               update_edge_key (heap, e);
1198             else if (e->aux)
1199               {
1200                 report_inline_failed_reason (e);
1201                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1202                 e->aux = NULL;
1203               }
1204           }
1205         if (e->next_callee)
1206           e = e->next_callee;
1207         else
1208           {
1209             do
1210               {
1211                 if (e->caller == node)
1212                   return;
1213                 e = e->caller->callers;
1214               }
1215             while (!e->next_callee);
1216             e = e->next_callee;
1217           }
1218       }
1219 }
1220
1221 /* Enqueue all recursive calls from NODE into priority queue depending on
1222    how likely we want to recursively inline the call.  */
1223
1224 static void
1225 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1226                         fibheap_t heap)
1227 {
1228   struct cgraph_edge *e;
1229   enum availability avail;
1230
1231   for (e = where->callees; e; e = e->next_callee)
1232     if (e->callee == node
1233         || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1234             && avail > AVAIL_OVERWRITABLE))
1235       {
1236         /* When profile feedback is available, prioritize by expected number
1237            of calls.  */
1238         fibheap_insert (heap,
1239                         !max_count ? -e->frequency
1240                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1241                         e);
1242       }
1243   for (e = where->callees; e; e = e->next_callee)
1244     if (!e->inline_failed)
1245       lookup_recursive_calls (node, e->callee, heap);
1246 }
1247
1248 /* Decide on recursive inlining: in the case function has recursive calls,
1249    inline until body size reaches given argument.  If any new indirect edges
1250    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1251    is NULL.  */
1252
1253 static bool
1254 recursive_inlining (struct cgraph_edge *edge,
1255                     vec<cgraph_edge_p> *new_edges)
1256 {
1257   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1258   fibheap_t heap;
1259   struct cgraph_node *node;
1260   struct cgraph_edge *e;
1261   struct cgraph_node *master_clone = NULL, *next;
1262   int depth = 0;
1263   int n = 0;
1264
1265   node = edge->caller;
1266   if (node->global.inlined_to)
1267     node = node->global.inlined_to;
1268
1269   if (DECL_DECLARED_INLINE_P (node->symbol.decl))
1270     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1271
1272   /* Make sure that function is small enough to be considered for inlining.  */
1273   if (estimate_size_after_inlining (node, edge)  >= limit)
1274     return false;
1275   heap = fibheap_new ();
1276   lookup_recursive_calls (node, node, heap);
1277   if (fibheap_empty (heap))
1278     {
1279       fibheap_delete (heap);
1280       return false;
1281     }
1282
1283   if (dump_file)
1284     fprintf (dump_file,
1285              "  Performing recursive inlining on %s\n",
1286              cgraph_node_name (node));
1287
1288   /* Do the inlining and update list of recursive call during process.  */
1289   while (!fibheap_empty (heap))
1290     {
1291       struct cgraph_edge *curr
1292         = (struct cgraph_edge *) fibheap_extract_min (heap);
1293       struct cgraph_node *cnode, *dest = curr->callee;
1294
1295       if (!can_inline_edge_p (curr, true))
1296         continue;
1297
1298       /* MASTER_CLONE is produced in the case we already started modified
1299          the function. Be sure to redirect edge to the original body before
1300          estimating growths otherwise we will be seeing growths after inlining
1301          the already modified body.  */
1302       if (master_clone)
1303         {
1304           cgraph_redirect_edge_callee (curr, master_clone);
1305           reset_edge_growth_cache (curr);
1306         }
1307
1308       if (estimate_size_after_inlining (node, curr) > limit)
1309         {
1310           cgraph_redirect_edge_callee (curr, dest);
1311           reset_edge_growth_cache (curr);
1312           break;
1313         }
1314
1315       depth = 1;
1316       for (cnode = curr->caller;
1317            cnode->global.inlined_to; cnode = cnode->callers->caller)
1318         if (node->symbol.decl
1319             == cgraph_function_or_thunk_node (curr->callee, NULL)->symbol.decl)
1320           depth++;
1321
1322       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1323         {
1324           cgraph_redirect_edge_callee (curr, dest);
1325           reset_edge_growth_cache (curr);
1326           continue;
1327         }
1328
1329       if (dump_file)
1330         {
1331           fprintf (dump_file,
1332                    "   Inlining call of depth %i", depth);
1333           if (node->count)
1334             {
1335               fprintf (dump_file, " called approx. %.2f times per call",
1336                        (double)curr->count / node->count);
1337             }
1338           fprintf (dump_file, "\n");
1339         }
1340       if (!master_clone)
1341         {
1342           /* We need original clone to copy around.  */
1343           master_clone = cgraph_clone_node (node, node->symbol.decl,
1344                                             node->count, CGRAPH_FREQ_BASE,
1345                                             false, vNULL, true, NULL);
1346           for (e = master_clone->callees; e; e = e->next_callee)
1347             if (!e->inline_failed)
1348               clone_inlined_nodes (e, true, false, NULL);
1349           cgraph_redirect_edge_callee (curr, master_clone);
1350           reset_edge_growth_cache (curr);
1351         }
1352
1353       inline_call (curr, false, new_edges, &overall_size, true);
1354       lookup_recursive_calls (node, curr->callee, heap);
1355       n++;
1356     }
1357
1358   if (!fibheap_empty (heap) && dump_file)
1359     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1360   fibheap_delete (heap);
1361
1362   if (!master_clone)
1363     return false;
1364
1365   if (dump_file)
1366     fprintf (dump_file,
1367              "\n   Inlined %i times, "
1368              "body grown from size %i to %i, time %i to %i\n", n,
1369              inline_summary (master_clone)->size, inline_summary (node)->size,
1370              inline_summary (master_clone)->time, inline_summary (node)->time);
1371
1372   /* Remove master clone we used for inlining.  We rely that clones inlined
1373      into master clone gets queued just before master clone so we don't
1374      need recursion.  */
1375   for (node = cgraph_first_function (); node != master_clone;
1376        node = next)
1377     {
1378       next = cgraph_next_function (node);
1379       if (node->global.inlined_to == master_clone)
1380         cgraph_remove_node (node);
1381     }
1382   cgraph_remove_node (master_clone);
1383   return true;
1384 }
1385
1386
1387 /* Given whole compilation unit estimate of INSNS, compute how large we can
1388    allow the unit to grow.  */
1389
1390 static int
1391 compute_max_insns (int insns)
1392 {
1393   int max_insns = insns;
1394   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1395     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1396
1397   return ((HOST_WIDEST_INT) max_insns
1398           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1399 }
1400
1401
1402 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1403
1404 static void
1405 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1406 {
1407   while (new_edges.length () > 0)
1408     {
1409       struct cgraph_edge *edge = new_edges.pop ();
1410
1411       gcc_assert (!edge->aux);
1412       if (edge->inline_failed
1413           && can_inline_edge_p (edge, true)
1414           && want_inline_small_function_p (edge, true))
1415         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1416     }
1417 }
1418
1419 /* Remove EDGE from the fibheap.  */
1420
1421 static void
1422 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1423 {
1424   if (e->callee)
1425     reset_node_growth_cache (e->callee);
1426   if (e->aux)
1427     {
1428       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1429       e->aux = NULL;
1430     }
1431 }
1432
1433 /* Return true if speculation of edge E seems useful.
1434    If ANTICIPATE_INLINING is true, be conservative and hope that E
1435    may get inlined.  */
1436
1437 bool
1438 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1439 {
1440   enum availability avail;
1441   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1442   struct cgraph_edge *direct, *indirect;
1443   struct ipa_ref *ref;
1444
1445   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1446
1447   if (!cgraph_maybe_hot_edge_p (e))
1448     return false;
1449
1450   /* See if IP optimizations found something potentially useful about the
1451      function.  For now we look only for CONST/PURE flags.  Almost everything
1452      else we propagate is useless.  */
1453   if (avail >= AVAIL_AVAILABLE)
1454     {
1455       int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
1456       if (ecf_flags & ECF_CONST)
1457         {
1458           cgraph_speculative_call_info (e, direct, indirect, ref);
1459           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1460             return true;
1461         }
1462       else if (ecf_flags & ECF_PURE)
1463         {
1464           cgraph_speculative_call_info (e, direct, indirect, ref);
1465           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1466             return true;
1467         }
1468     }
1469   /* If we did not managed to inline the function nor redirect
1470      to an ipa-cp clone (that are seen by having local flag set),
1471      it is probably pointless to inline it unless hardware is missing
1472      indirect call predictor.  */
1473   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1474     return false;
1475   /* For overwritable targets there is not much to do.  */
1476   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1477     return false;
1478   /* OK, speculation seems interesting.  */
1479   return true;
1480 }
1481
1482 /* We know that EDGE is not going to be inlined.
1483    See if we can remove speculation.  */
1484
1485 static void
1486 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1487 {
1488   if (edge->speculative && !speculation_useful_p (edge, false))
1489     {
1490       struct cgraph_node *node = edge->caller;
1491       struct cgraph_node *where = node->global.inlined_to
1492                                   ? node->global.inlined_to : node;
1493       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1494
1495       cgraph_resolve_speculation (edge, NULL);
1496       reset_edge_caches (where);
1497       inline_update_overall_summary (where);
1498       update_caller_keys (edge_heap, where,
1499                           updated_nodes, NULL);
1500       update_callee_keys (edge_heap, where,
1501                           updated_nodes);
1502       BITMAP_FREE (updated_nodes);
1503     }
1504 }
1505
1506 /* We use greedy algorithm for inlining of small functions:
1507    All inline candidates are put into prioritized heap ordered in
1508    increasing badness.
1509
1510    The inlining of small functions is bounded by unit growth parameters.  */
1511
1512 static void
1513 inline_small_functions (void)
1514 {
1515   struct cgraph_node *node;
1516   struct cgraph_edge *edge;
1517   fibheap_t edge_heap = fibheap_new ();
1518   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1519   int min_size, max_size;
1520   vec<cgraph_edge_p> new_indirect_edges = vNULL;
1521   int initial_size = 0;
1522   struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1523   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1524
1525   if (flag_indirect_inlining)
1526     new_indirect_edges.create (8);
1527
1528   edge_removal_hook_holder
1529     = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1530
1531   /* Compute overall unit size and other global parameters used by badness
1532      metrics.  */
1533
1534   max_count = 0;
1535   ipa_reduced_postorder (order, true, true, NULL);
1536   free (order);
1537
1538   FOR_EACH_DEFINED_FUNCTION (node)
1539     if (!node->global.inlined_to)
1540       {
1541         if (cgraph_function_with_gimple_body_p (node)
1542             || node->thunk.thunk_p)
1543           {
1544             struct inline_summary *info = inline_summary (node);
1545             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->symbol.aux;
1546
1547             if (!DECL_EXTERNAL (node->symbol.decl))
1548               initial_size += info->size;
1549             info->growth = estimate_growth (node);
1550             if (dfs && dfs->next_cycle)
1551               {
1552                 struct cgraph_node *n2;
1553                 int id = dfs->scc_no + 1;
1554                 for (n2 = node; n2;
1555                      n2 = ((struct ipa_dfs_info *) node->symbol.aux)->next_cycle)
1556                   {
1557                     struct inline_summary *info2 = inline_summary (n2);
1558                     if (info2->scc_no)
1559                       break;
1560                     info2->scc_no = id;
1561                   }
1562               }
1563           }
1564
1565         for (edge = node->callers; edge; edge = edge->next_caller)
1566           if (max_count < edge->count)
1567             max_count = edge->count;
1568       }
1569   sreal_init (&max_count_real, max_count, 0);
1570   sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1571   sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1572   ipa_free_postorder_info ();
1573   initialize_growth_caches ();
1574
1575   if (dump_file)
1576     fprintf (dump_file,
1577              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1578              initial_size);
1579
1580   overall_size = initial_size;
1581   max_size = compute_max_insns (overall_size);
1582   min_size = overall_size;
1583
1584   /* Populate the heeap with all edges we might inline.  */
1585
1586   FOR_EACH_DEFINED_FUNCTION (node)
1587     {
1588       bool update = false;
1589       struct cgraph_edge *next;
1590
1591       if (dump_file)
1592         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1593                  cgraph_node_name (node), node->symbol.order);
1594
1595       for (edge = node->callees; edge; edge = next)
1596         {
1597           next = edge->next_callee;
1598           if (edge->inline_failed
1599               && !edge->aux
1600               && can_inline_edge_p (edge, true)
1601               && want_inline_small_function_p (edge, true)
1602               && edge->inline_failed)
1603             {
1604               gcc_assert (!edge->aux);
1605               update_edge_key (edge_heap, edge);
1606             }
1607           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1608             {
1609               cgraph_resolve_speculation (edge, NULL);
1610               update = true;
1611             }
1612         }
1613       if (update)
1614         {
1615           struct cgraph_node *where = node->global.inlined_to
1616                                       ? node->global.inlined_to : node;
1617           inline_update_overall_summary (where);
1618           reset_node_growth_cache (where);
1619           reset_edge_caches (where);
1620           update_caller_keys (edge_heap, where,
1621                               updated_nodes, NULL);
1622           bitmap_clear (updated_nodes);
1623         }
1624     }
1625
1626   gcc_assert (in_lto_p
1627               || !max_count
1628               || (profile_info && flag_branch_probabilities));
1629
1630   while (!fibheap_empty (edge_heap))
1631     {
1632       int old_size = overall_size;
1633       struct cgraph_node *where, *callee;
1634       int badness = fibheap_min_key (edge_heap);
1635       int current_badness;
1636       int cached_badness;
1637       int growth;
1638
1639       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1640       gcc_assert (edge->aux);
1641       edge->aux = NULL;
1642       if (!edge->inline_failed)
1643         continue;
1644
1645       /* Be sure that caches are maintained consistent.
1646          We can not make this ENABLE_CHECKING only because it cause different
1647          updates of the fibheap queue.  */
1648       cached_badness = edge_badness (edge, false);
1649       reset_edge_growth_cache (edge);
1650       reset_node_growth_cache (edge->callee);
1651
1652       /* When updating the edge costs, we only decrease badness in the keys.
1653          Increases of badness are handled lazilly; when we see key with out
1654          of date value on it, we re-insert it now.  */
1655       current_badness = edge_badness (edge, false);
1656       gcc_assert (cached_badness == current_badness);
1657       gcc_assert (current_badness >= badness);
1658       if (current_badness != badness)
1659         {
1660           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1661           continue;
1662         }
1663
1664       if (!can_inline_edge_p (edge, true))
1665         {
1666           resolve_noninline_speculation (edge_heap, edge);
1667           continue;
1668         }
1669
1670       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1671       growth = estimate_edge_growth (edge);
1672       if (dump_file)
1673         {
1674           fprintf (dump_file,
1675                    "\nConsidering %s/%i with %i size\n",
1676                    cgraph_node_name (callee), callee->symbol.order,
1677                    inline_summary (callee)->size);
1678           fprintf (dump_file,
1679                    " to be inlined into %s/%i in %s:%i\n"
1680                    " Estimated growth after inlined into all is %+i insns.\n"
1681                    " Estimated badness is %i, frequency %.2f.\n",
1682                    cgraph_node_name (edge->caller), edge->caller->symbol.order,
1683                    flag_wpa ? "unknown"
1684                    : gimple_filename ((const_gimple) edge->call_stmt),
1685                    flag_wpa ? -1
1686                    : gimple_lineno ((const_gimple) edge->call_stmt),
1687                    estimate_growth (callee),
1688                    badness,
1689                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1690           if (edge->count)
1691             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1692                      edge->count);
1693           if (dump_flags & TDF_DETAILS)
1694             edge_badness (edge, true);
1695         }
1696
1697       if (overall_size + growth > max_size
1698           && !DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
1699         {
1700           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1701           report_inline_failed_reason (edge);
1702           resolve_noninline_speculation (edge_heap, edge);
1703           continue;
1704         }
1705
1706       if (!want_inline_small_function_p (edge, true))
1707         {
1708           resolve_noninline_speculation (edge_heap, edge);
1709           continue;
1710         }
1711
1712       /* Heuristics for inlining small functions works poorly for
1713          recursive calls where we do efect similar to loop unrolling.
1714          When inliing such edge seems profitable, leave decision on
1715          specific inliner.  */
1716       if (cgraph_edge_recursive_p (edge))
1717         {
1718           where = edge->caller;
1719           if (where->global.inlined_to)
1720             where = where->global.inlined_to;
1721           if (!recursive_inlining (edge,
1722                                    flag_indirect_inlining
1723                                    ? &new_indirect_edges : NULL))
1724             {
1725               edge->inline_failed = CIF_RECURSIVE_INLINING;
1726               resolve_noninline_speculation (edge_heap, edge);
1727               continue;
1728             }
1729           reset_edge_caches (where);
1730           /* Recursive inliner inlines all recursive calls of the function
1731              at once. Consequently we need to update all callee keys.  */
1732           if (flag_indirect_inlining)
1733             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1734           update_callee_keys (edge_heap, where, updated_nodes);
1735           bitmap_clear (updated_nodes);
1736         }
1737       else
1738         {
1739           struct cgraph_node *outer_node = NULL;
1740           int depth = 0;
1741
1742           /* Consider the case where self recursive function A is inlined into B.
1743              This is desired optimization in some cases, since it leads to effect
1744              similar of loop peeling and we might completely optimize out the
1745              recursive call.  However we must be extra selective.  */
1746
1747           where = edge->caller;
1748           while (where->global.inlined_to)
1749             {
1750               if (where->symbol.decl == callee->symbol.decl)
1751                 outer_node = where, depth++;
1752               where = where->callers->caller;
1753             }
1754           if (outer_node
1755               && !want_inline_self_recursive_call_p (edge, outer_node,
1756                                                      true, depth))
1757             {
1758               edge->inline_failed
1759                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
1760                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1761               resolve_noninline_speculation (edge_heap, edge);
1762               continue;
1763             }
1764           else if (depth && dump_file)
1765             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1766
1767           gcc_checking_assert (!callee->global.inlined_to);
1768           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1769           if (flag_indirect_inlining)
1770             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1771
1772           reset_edge_caches (edge->callee);
1773           reset_node_growth_cache (callee);
1774
1775           update_callee_keys (edge_heap, where, updated_nodes);
1776         }
1777       where = edge->caller;
1778       if (where->global.inlined_to)
1779         where = where->global.inlined_to;
1780
1781       /* Our profitability metric can depend on local properties
1782          such as number of inlinable calls and size of the function body.
1783          After inlining these properties might change for the function we
1784          inlined into (since it's body size changed) and for the functions
1785          called by function we inlined (since number of it inlinable callers
1786          might change).  */
1787       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1788       bitmap_clear (updated_nodes);
1789
1790       if (dump_file)
1791         {
1792           fprintf (dump_file,
1793                    " Inlined into %s which now has time %i and size %i,"
1794                    "net change of %+i.\n",
1795                    cgraph_node_name (edge->caller),
1796                    inline_summary (edge->caller)->time,
1797                    inline_summary (edge->caller)->size,
1798                    overall_size - old_size);
1799         }
1800       if (min_size > overall_size)
1801         {
1802           min_size = overall_size;
1803           max_size = compute_max_insns (min_size);
1804
1805           if (dump_file)
1806             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1807         }
1808     }
1809
1810   free_growth_caches ();
1811   new_indirect_edges.release ();
1812   fibheap_delete (edge_heap);
1813   if (dump_file)
1814     fprintf (dump_file,
1815              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1816              initial_size, overall_size,
1817              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1818   BITMAP_FREE (updated_nodes);
1819   cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1820 }
1821
1822 /* Flatten NODE.  Performed both during early inlining and
1823    at IPA inlining time.  */
1824
1825 static void
1826 flatten_function (struct cgraph_node *node, bool early)
1827 {
1828   struct cgraph_edge *e;
1829
1830   /* We shouldn't be called recursively when we are being processed.  */
1831   gcc_assert (node->symbol.aux == NULL);
1832
1833   node->symbol.aux = (void *) node;
1834
1835   for (e = node->callees; e; e = e->next_callee)
1836     {
1837       struct cgraph_node *orig_callee;
1838       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1839
1840       /* We've hit cycle?  It is time to give up.  */
1841       if (callee->symbol.aux)
1842         {
1843           if (dump_file)
1844             fprintf (dump_file,
1845                      "Not inlining %s into %s to avoid cycle.\n",
1846                      xstrdup (cgraph_node_name (callee)),
1847                      xstrdup (cgraph_node_name (e->caller)));
1848           e->inline_failed = CIF_RECURSIVE_INLINING;
1849           continue;
1850         }
1851
1852       /* When the edge is already inlined, we just need to recurse into
1853          it in order to fully flatten the leaves.  */
1854       if (!e->inline_failed)
1855         {
1856           flatten_function (callee, early);
1857           continue;
1858         }
1859
1860       /* Flatten attribute needs to be processed during late inlining. For
1861          extra code quality we however do flattening during early optimization,
1862          too.  */
1863       if (!early
1864           ? !can_inline_edge_p (e, true)
1865           : !can_early_inline_edge_p (e))
1866         continue;
1867
1868       if (cgraph_edge_recursive_p (e))
1869         {
1870           if (dump_file)
1871             fprintf (dump_file, "Not inlining: recursive call.\n");
1872           continue;
1873         }
1874
1875       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->symbol.decl))
1876           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
1877         {
1878           if (dump_file)
1879             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1880           continue;
1881         }
1882
1883       /* Inline the edge and flatten the inline clone.  Avoid
1884          recursing through the original node if the node was cloned.  */
1885       if (dump_file)
1886         fprintf (dump_file, " Inlining %s into %s.\n",
1887                  xstrdup (cgraph_node_name (callee)),
1888                  xstrdup (cgraph_node_name (e->caller)));
1889       orig_callee = callee;
1890       inline_call (e, true, NULL, NULL, false);
1891       if (e->callee != orig_callee)
1892         orig_callee->symbol.aux = (void *) node;
1893       flatten_function (e->callee, early);
1894       if (e->callee != orig_callee)
1895         orig_callee->symbol.aux = NULL;
1896     }
1897
1898   node->symbol.aux = NULL;
1899   if (!node->global.inlined_to)
1900     inline_update_overall_summary (node);
1901 }
1902
1903 /* Count number of callers of NODE and store it into DATA (that
1904    points to int.  Worker for cgraph_for_node_and_aliases.  */
1905
1906 static bool
1907 sum_callers (struct cgraph_node *node, void *data)
1908 {
1909   struct cgraph_edge *e;
1910   int *num_calls = (int *)data;
1911
1912   for (e = node->callers; e; e = e->next_caller)
1913     (*num_calls)++;
1914   return false;
1915 }
1916
1917 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1918    DATA points to number of calls originally found so we avoid infinite
1919    recursion.  */
1920
1921 static bool
1922 inline_to_all_callers (struct cgraph_node *node, void *data)
1923 {
1924   int *num_calls = (int *)data;
1925   while (node->callers && !node->global.inlined_to)
1926     {
1927       struct cgraph_node *caller = node->callers->caller;
1928
1929       if (dump_file)
1930         {
1931           fprintf (dump_file,
1932                    "\nInlining %s size %i.\n",
1933                    cgraph_node_name (node),
1934                    inline_summary (node)->size);
1935           fprintf (dump_file,
1936                    " Called once from %s %i insns.\n",
1937                    cgraph_node_name (node->callers->caller),
1938                    inline_summary (node->callers->caller)->size);
1939         }
1940
1941       inline_call (node->callers, true, NULL, NULL, true);
1942       if (dump_file)
1943         fprintf (dump_file,
1944                  " Inlined into %s which now has %i size\n",
1945                  cgraph_node_name (caller),
1946                  inline_summary (caller)->size);
1947       if (!(*num_calls)--)
1948         {
1949           if (dump_file)
1950             fprintf (dump_file, "New calls found; giving up.\n");
1951           return true;
1952         }
1953     }
1954   return false;
1955 }
1956
1957 /* Decide on the inlining.  We do so in the topological order to avoid
1958    expenses on updating data structures.  */
1959
1960 static unsigned int
1961 ipa_inline (void)
1962 {
1963   struct cgraph_node *node;
1964   int nnodes;
1965   struct cgraph_node **order =
1966     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1967   int i;
1968   int cold;
1969   bool remove_functions = false;
1970
1971   if (!optimize)
1972     return 0;
1973
1974   if (in_lto_p && optimize)
1975     ipa_update_after_lto_read ();
1976
1977   if (dump_file)
1978     dump_inline_summaries (dump_file);
1979
1980   nnodes = ipa_reverse_postorder (order);
1981
1982   FOR_EACH_FUNCTION (node)
1983     node->symbol.aux = 0;
1984
1985   if (dump_file)
1986     fprintf (dump_file, "\nFlattening functions:\n");
1987
1988   /* In the first pass handle functions to be flattened.  Do this with
1989      a priority so none of our later choices will make this impossible.  */
1990   for (i = nnodes - 1; i >= 0; i--)
1991     {
1992       node = order[i];
1993
1994       /* Handle nodes to be flattened.
1995          Ideally when processing callees we stop inlining at the
1996          entry of cycles, possibly cloning that entry point and
1997          try to flatten itself turning it into a self-recursive
1998          function.  */
1999       if (lookup_attribute ("flatten",
2000                             DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
2001         {
2002           if (dump_file)
2003             fprintf (dump_file,
2004                      "Flattening %s\n", cgraph_node_name (node));
2005           flatten_function (node, false);
2006         }
2007     }
2008
2009   inline_small_functions ();
2010
2011   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
2012      functions and virtual functions so we really know what is called once.  */
2013   symtab_remove_unreachable_nodes (false, dump_file);
2014   free (order);
2015
2016   /* Inline functions with a property that after inlining into all callers the
2017      code size will shrink because the out-of-line copy is eliminated.
2018      We do this regardless on the callee size as long as function growth limits
2019      are met.  */
2020   if (dump_file)
2021     fprintf (dump_file,
2022              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2023
2024   /* Inlining one function called once has good chance of preventing
2025      inlining other function into the same callee.  Ideally we should
2026      work in priority order, but probably inlining hot functions first
2027      is good cut without the extra pain of maintaining the queue.
2028
2029      ??? this is not really fitting the bill perfectly: inlining function
2030      into callee often leads to better optimization of callee due to
2031      increased context for optimization.
2032      For example if main() function calls a function that outputs help
2033      and then function that does the main optmization, we should inline
2034      the second with priority even if both calls are cold by themselves.
2035
2036      We probably want to implement new predicate replacing our use of
2037      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2038      to be hot.  */
2039   for (cold = 0; cold <= 1; cold ++)
2040     {
2041       FOR_EACH_DEFINED_FUNCTION (node)
2042         {
2043           struct cgraph_edge *edge, *next;
2044           bool update=false;
2045
2046           for (edge = node->callees; edge; edge = next)
2047             {
2048               next = edge->next_callee;
2049               if (edge->speculative && !speculation_useful_p (edge, false))
2050                 {
2051                   cgraph_resolve_speculation (edge, NULL);
2052                   update = true;
2053                   remove_functions = true;
2054                 }
2055             }
2056           if (update)
2057             {
2058               struct cgraph_node *where = node->global.inlined_to
2059                                           ? node->global.inlined_to : node;
2060               reset_node_growth_cache (where);
2061               reset_edge_caches (where);
2062               inline_update_overall_summary (where);
2063             }
2064           if (flag_inline_functions_called_once
2065               && want_inline_function_to_all_callers_p (node, cold))
2066             {
2067               int num_calls = 0;
2068               cgraph_for_node_and_aliases (node, sum_callers,
2069                                            &num_calls, true);
2070               cgraph_for_node_and_aliases (node, inline_to_all_callers,
2071                                            &num_calls, true);
2072               remove_functions = true;
2073             }
2074         }
2075     }
2076
2077   /* Free ipa-prop structures if they are no longer needed.  */
2078   if (optimize)
2079     ipa_free_all_structures_after_iinln ();
2080
2081   if (dump_file)
2082     fprintf (dump_file,
2083              "\nInlined %i calls, eliminated %i functions\n\n",
2084              ncalls_inlined, nfunctions_inlined);
2085
2086   if (dump_file)
2087     dump_inline_summaries (dump_file);
2088   /* In WPA we use inline summaries for partitioning process.  */
2089   if (!flag_wpa)
2090     inline_free_summary ();
2091   return remove_functions ? TODO_remove_functions : 0;
2092 }
2093
2094 /* Inline always-inline function calls in NODE.  */
2095
2096 static bool
2097 inline_always_inline_functions (struct cgraph_node *node)
2098 {
2099   struct cgraph_edge *e;
2100   bool inlined = false;
2101
2102   for (e = node->callees; e; e = e->next_callee)
2103     {
2104       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2105       if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
2106         continue;
2107
2108       if (cgraph_edge_recursive_p (e))
2109         {
2110           if (dump_file)
2111             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2112                      cgraph_node_name (e->callee));
2113           e->inline_failed = CIF_RECURSIVE_INLINING;
2114           continue;
2115         }
2116
2117       if (!can_early_inline_edge_p (e))
2118         {
2119           /* Set inlined to true if the callee is marked "always_inline" but
2120              is not inlinable.  This will allow flagging an error later in
2121              expand_call_inline in tree-inline.c.  */
2122           if (lookup_attribute ("always_inline",
2123                                  DECL_ATTRIBUTES (callee->symbol.decl)) != NULL)
2124             inlined = true;
2125           continue;
2126         }
2127
2128       if (dump_file)
2129         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2130                  xstrdup (cgraph_node_name (e->callee)),
2131                  xstrdup (cgraph_node_name (e->caller)));
2132       inline_call (e, true, NULL, NULL, false);
2133       inlined = true;
2134     }
2135   if (inlined)
2136     inline_update_overall_summary (node);
2137
2138   return inlined;
2139 }
2140
2141 /* Decide on the inlining.  We do so in the topological order to avoid
2142    expenses on updating data structures.  */
2143
2144 static bool
2145 early_inline_small_functions (struct cgraph_node *node)
2146 {
2147   struct cgraph_edge *e;
2148   bool inlined = false;
2149
2150   for (e = node->callees; e; e = e->next_callee)
2151     {
2152       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2153       if (!inline_summary (callee)->inlinable
2154           || !e->inline_failed)
2155         continue;
2156
2157       /* Do not consider functions not declared inline.  */
2158       if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
2159           && !flag_inline_small_functions
2160           && !flag_inline_functions)
2161         continue;
2162
2163       if (dump_file)
2164         fprintf (dump_file, "Considering inline candidate %s.\n",
2165                  cgraph_node_name (callee));
2166
2167       if (!can_early_inline_edge_p (e))
2168         continue;
2169
2170       if (cgraph_edge_recursive_p (e))
2171         {
2172           if (dump_file)
2173             fprintf (dump_file, "  Not inlining: recursive call.\n");
2174           continue;
2175         }
2176
2177       if (!want_early_inline_function_p (e))
2178         continue;
2179
2180       if (dump_file)
2181         fprintf (dump_file, " Inlining %s into %s.\n",
2182                  xstrdup (cgraph_node_name (callee)),
2183                  xstrdup (cgraph_node_name (e->caller)));
2184       inline_call (e, true, NULL, NULL, true);
2185       inlined = true;
2186     }
2187
2188   return inlined;
2189 }
2190
2191 /* Do inlining of small functions.  Doing so early helps profiling and other
2192    passes to be somewhat more effective and avoids some code duplication in
2193    later real inlining pass for testcases with very many function calls.  */
2194 static unsigned int
2195 early_inliner (void)
2196 {
2197   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2198   struct cgraph_edge *edge;
2199   unsigned int todo = 0;
2200   int iterations = 0;
2201   bool inlined = false;
2202
2203   if (seen_error ())
2204     return 0;
2205
2206   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2207      happens when some pass decides to construct new function and
2208      cgraph_add_new_function calls lowering passes and early optimization on
2209      it.  This may confuse ourself when early inliner decide to inline call to
2210      function clone, because function clones don't have parameter list in
2211      ipa-prop matching their signature.  */
2212   if (ipa_node_params_vector.exists ())
2213     return 0;
2214
2215 #ifdef ENABLE_CHECKING
2216   verify_cgraph_node (node);
2217 #endif
2218   ipa_remove_all_references (&node->symbol.ref_list);
2219
2220   /* Even when not optimizing or not inlining inline always-inline
2221      functions.  */
2222   inlined = inline_always_inline_functions (node);
2223
2224   if (!optimize
2225       || flag_no_inline
2226       || !flag_early_inlining
2227       /* Never inline regular functions into always-inline functions
2228          during incremental inlining.  This sucks as functions calling
2229          always inline functions will get less optimized, but at the
2230          same time inlining of functions calling always inline
2231          function into an always inline function might introduce
2232          cycles of edges to be always inlined in the callgraph.
2233
2234          We might want to be smarter and just avoid this type of inlining.  */
2235       || DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl))
2236     ;
2237   else if (lookup_attribute ("flatten",
2238                              DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
2239     {
2240       /* When the function is marked to be flattened, recursively inline
2241          all calls in it.  */
2242       if (dump_file)
2243         fprintf (dump_file,
2244                  "Flattening %s\n", cgraph_node_name (node));
2245       flatten_function (node, true);
2246       inlined = true;
2247     }
2248   else
2249     {
2250       /* We iterate incremental inlining to get trivial cases of indirect
2251          inlining.  */
2252       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2253              && early_inline_small_functions (node))
2254         {
2255           timevar_push (TV_INTEGRATION);
2256           todo |= optimize_inline_calls (current_function_decl);
2257
2258           /* Technically we ought to recompute inline parameters so the new
2259              iteration of early inliner works as expected.  We however have
2260              values approximately right and thus we only need to update edge
2261              info that might be cleared out for newly discovered edges.  */
2262           for (edge = node->callees; edge; edge = edge->next_callee)
2263             {
2264               struct inline_edge_summary *es = inline_edge_summary (edge);
2265               es->call_stmt_size
2266                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2267               es->call_stmt_time
2268                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2269               if (edge->callee->symbol.decl
2270                   && !gimple_check_call_matching_types (
2271                       edge->call_stmt, edge->callee->symbol.decl, false))
2272                 edge->call_stmt_cannot_inline_p = true;
2273             }
2274           timevar_pop (TV_INTEGRATION);
2275           iterations++;
2276           inlined = false;
2277         }
2278       if (dump_file)
2279         fprintf (dump_file, "Iterations: %i\n", iterations);
2280     }
2281
2282   if (inlined)
2283     {
2284       timevar_push (TV_INTEGRATION);
2285       todo |= optimize_inline_calls (current_function_decl);
2286       timevar_pop (TV_INTEGRATION);
2287     }
2288
2289   cfun->always_inline_functions_inlined = true;
2290
2291   return todo;
2292 }
2293
2294 namespace {
2295
2296 const pass_data pass_data_early_inline =
2297 {
2298   GIMPLE_PASS, /* type */
2299   "einline", /* name */
2300   OPTGROUP_INLINE, /* optinfo_flags */
2301   false, /* has_gate */
2302   true, /* has_execute */
2303   TV_EARLY_INLINING, /* tv_id */
2304   PROP_ssa, /* properties_required */
2305   0, /* properties_provided */
2306   0, /* properties_destroyed */
2307   0, /* todo_flags_start */
2308   0, /* todo_flags_finish */
2309 };
2310
2311 class pass_early_inline : public gimple_opt_pass
2312 {
2313 public:
2314   pass_early_inline(gcc::context *ctxt)
2315     : gimple_opt_pass(pass_data_early_inline, ctxt)
2316   {}
2317
2318   /* opt_pass methods: */
2319   unsigned int execute () { return early_inliner (); }
2320
2321 }; // class pass_early_inline
2322
2323 } // anon namespace
2324
2325 gimple_opt_pass *
2326 make_pass_early_inline (gcc::context *ctxt)
2327 {
2328   return new pass_early_inline (ctxt);
2329 }
2330
2331
2332 /* When to run IPA inlining.  Inlining of always-inline functions
2333    happens during early inlining.
2334
2335    Enable inlining unconditoinally, because callgraph redirection
2336    happens here.   */
2337
2338 static bool
2339 gate_ipa_inline (void)
2340 {
2341   return true;
2342 }
2343
2344 namespace {
2345
2346 const pass_data pass_data_ipa_inline =
2347 {
2348   IPA_PASS, /* type */
2349   "inline", /* name */
2350   OPTGROUP_INLINE, /* optinfo_flags */
2351   true, /* has_gate */
2352   true, /* has_execute */
2353   TV_IPA_INLINING, /* tv_id */
2354   0, /* properties_required */
2355   0, /* properties_provided */
2356   0, /* properties_destroyed */
2357   TODO_remove_functions, /* todo_flags_start */
2358   ( TODO_dump_symtab ), /* todo_flags_finish */
2359 };
2360
2361 class pass_ipa_inline : public ipa_opt_pass_d
2362 {
2363 public:
2364   pass_ipa_inline(gcc::context *ctxt)
2365     : ipa_opt_pass_d(pass_data_ipa_inline, ctxt,
2366                      inline_generate_summary, /* generate_summary */
2367                      inline_write_summary, /* write_summary */
2368                      inline_read_summary, /* read_summary */
2369                      NULL, /* write_optimization_summary */
2370                      NULL, /* read_optimization_summary */
2371                      NULL, /* stmt_fixup */
2372                      0, /* function_transform_todo_flags_start */
2373                      inline_transform, /* function_transform */
2374                      NULL) /* variable_transform */
2375   {}
2376
2377   /* opt_pass methods: */
2378   bool gate () { return gate_ipa_inline (); }
2379   unsigned int execute () { return ipa_inline (); }
2380
2381 }; // class pass_ipa_inline
2382
2383 } // anon namespace
2384
2385 ipa_opt_pass_d *
2386 make_pass_ipa_inline (gcc::context *ctxt)
2387 {
2388   return new pass_ipa_inline (ctxt);
2389 }