gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "tree-inline.h"
  98 #include "langhooks.h"
  99 #include "flags.h"
 100 #include "cgraph.h"
 101 #include "diagnostic.h"
 102 #include "gimple-pretty-print.h"
 103 #include "params.h"
 104 #include "fibheap.h"
 105 #include "intl.h"
 106 #include "tree-pass.h"
 107 #include "coverage.h"
 108 #include "ggc.h"
 109 #include "rtl.h"
 110 #include "tree-flow.h"
 111 #include "ipa-prop.h"
 112 #include "except.h"
 113 #include "target.h"
 114 #include "ipa-inline.h"
 115 #include "ipa-utils.h"
 116 #include "sreal.h"
 117
 118 /* Statistics we collect about inlining algorithm.  */
 119 static int overall_size;
 120 static gcov_type max_count;
 121 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 122
 123 /* Return false when inlining edge E would lead to violating
 124    limits on function unit growth or stack usage growth.
 125
 126    The relative function body growth limit is present generally
 127    to avoid problems with non-linear behavior of the compiler.
 128    To allow inlining huge functions into tiny wrapper, the limit
 129    is always based on the bigger of the two functions considered.
 130
 131    For stack growth limits we always base the growth in stack usage
 132    of the callers.  We want to prevent applications from segfaulting
 133    on stack overflow when functions with huge stack frames gets
 134    inlined. */
 135
 136 static bool
 137 caller_growth_limits (struct cgraph_edge *e)
 138 {
 139   struct cgraph_node *to = e->caller;
 140   struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
 141   int newsize;
 142   int limit = 0;
 143   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 144   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 145
 146   /* Look for function e->caller is inlined to.  While doing
 147      so work out the largest function body on the way.  As
 148      described above, we want to base our function growth
 149      limits based on that.  Not on the self size of the
 150      outer function, not on the self size of inline code
 151      we immediately inline to.  This is the most relaxed
 152      interpretation of the rule "do not grow large functions
 153      too much in order to prevent compiler from exploding".  */
 154   while (true)
 155     {
 156       info = inline_summary (to);
 157       if (limit < info->self_size)
 158         limit = info->self_size;
 159       if (stack_size_limit < info->estimated_self_stack_size)
 160         stack_size_limit = info->estimated_self_stack_size;
 161       if (to->global.inlined_to)
 162         to = to->callers->caller;
 163       else
 164         break;
 165     }
 166
 167   what_info = inline_summary (what);
 168
 169   if (limit < what_info->self_size)
 170     limit = what_info->self_size;
 171
 172   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 173
 174   /* Check the size after inlining against the function limits.  But allow
 175      the function to shrink if it went over the limits by forced inlining.  */
 176   newsize = estimate_size_after_inlining (to, e);
 177   if (newsize >= info->size
 178       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 179       && newsize > limit)
 180     {
 181       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 182       return false;
 183     }
 184
 185   if (!what_info->estimated_stack_size)
 186     return true;
 187
 188   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 189      due to large i/o datastructures used by the Fortran front-end.
 190      We ought to ignore this limit when we know that the edge is executed
 191      on every invocation of the caller (i.e. its call statement dominates
 192      exit block).  We do not track this information, yet.  */
 193   stack_size_limit += ((gcov_type)stack_size_limit
 194                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 195
 196   inlined_stack = (outer_info->stack_frame_offset
 197                    + outer_info->estimated_self_stack_size
 198                    + what_info->estimated_stack_size);
 199   /* Check new stack consumption with stack consumption at the place
 200      stack is used.  */
 201   if (inlined_stack > stack_size_limit
 202       /* If function already has large stack usage from sibling
 203          inline call, we can inline, too.
 204          This bit overoptimistically assume that we are good at stack
 205          packing.  */
 206       && inlined_stack > info->estimated_stack_size
 207       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 208     {
 209       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 210       return false;
 211     }
 212   return true;
 213 }
 214
 215 /* Dump info about why inlining has failed.  */
 216
 217 static void
 218 report_inline_failed_reason (struct cgraph_edge *e)
 219 {
 220   if (dump_file)
 221     {
 222       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 223                xstrdup (cgraph_node_name (e->caller)), e->caller->symbol.order,
 224                xstrdup (cgraph_node_name (e->callee)), e->callee->symbol.order,
 225                cgraph_inline_failed_string (e->inline_failed));
 226     }
 227 }
 228
 229 /* Decide if we can inline the edge and possibly update
 230    inline_failed reason.
 231    We check whether inlining is possible at all and whether
 232    caller growth limits allow doing so.
 233
 234    if REPORT is true, output reason to the dump file.
 235
 236    if DISREGARD_LIMITES is true, ignore size limits.*/
 237
 238 static bool
 239 can_inline_edge_p (struct cgraph_edge *e, bool report,
 240                    bool disregard_limits = false)
 241 {
 242   bool inlinable = true;
 243   enum availability avail;
 244   struct cgraph_node *callee
 245     = cgraph_function_or_thunk_node (e->callee, &avail);
 246   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->symbol.decl);
 247   tree callee_tree
 248     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->symbol.decl) : NULL;
 249   struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->symbol.decl);
 250   struct function *callee_cfun
 251     = callee ? DECL_STRUCT_FUNCTION (callee->symbol.decl) : NULL;
 252
 253   if (!caller_cfun && e->caller->clone_of)
 254     caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->symbol.decl);
 255
 256   if (!callee_cfun && callee && callee->clone_of)
 257     callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->symbol.decl);
 258
 259   gcc_assert (e->inline_failed);
 260
 261   if (!callee || !callee->symbol.definition)
 262     {
 263       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 264       inlinable = false;
 265     }
 266   else if (!inline_summary (callee)->inlinable)
 267     {
 268       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 269       inlinable = false;
 270     }
 271   else if (avail <= AVAIL_OVERWRITABLE)
 272     {
 273       e->inline_failed = CIF_OVERWRITABLE;
 274       inlinable = false;
 275     }
 276   else if (e->call_stmt_cannot_inline_p)
 277     {
 278       e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 279       inlinable = false;
 280     }
 281   /* Don't inline if the functions have different EH personalities.  */
 282   else if (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
 283            && DECL_FUNCTION_PERSONALITY (callee->symbol.decl)
 284            && (DECL_FUNCTION_PERSONALITY (e->caller->symbol.decl)
 285                != DECL_FUNCTION_PERSONALITY (callee->symbol.decl)))
 286     {
 287       e->inline_failed = CIF_EH_PERSONALITY;
 288       inlinable = false;
 289     }
 290   /* TM pure functions should not be inlined into non-TM_pure
 291      functions.  */
 292   else if (is_tm_pure (callee->symbol.decl)
 293            && !is_tm_pure (e->caller->symbol.decl))
 294     {
 295       e->inline_failed = CIF_UNSPECIFIED;
 296       inlinable = false;
 297     }
 298   /* Don't inline if the callee can throw non-call exceptions but the
 299      caller cannot.
 300      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 301      Move the flag into cgraph node or mirror it in the inline summary.  */
 302   else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
 303            && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
 304     {
 305       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 306       inlinable = false;
 307     }
 308   /* Check compatibility of target optimization options.  */
 309   else if (!targetm.target_option.can_inline_p (e->caller->symbol.decl,
 310                                                 callee->symbol.decl))
 311     {
 312       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 313       inlinable = false;
 314     }
 315   /* Check if caller growth allows the inlining.  */
 316   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl)
 317            && !disregard_limits
 318            && !lookup_attribute ("flatten",
 319                                  DECL_ATTRIBUTES
 320                                    (e->caller->global.inlined_to
 321                                     ? e->caller->global.inlined_to->symbol.decl
 322                                     : e->caller->symbol.decl))
 323            && !caller_growth_limits (e))
 324     inlinable = false;
 325   /* Don't inline a function with a higher optimization level than the
 326      caller.  FIXME: this is really just tip of iceberg of handling
 327      optimization attribute.  */
 328   else if (caller_tree != callee_tree)
 329     {
 330       struct cl_optimization *caller_opt
 331         = TREE_OPTIMIZATION ((caller_tree)
 332                              ? caller_tree
 333                              : optimization_default_node);
 334
 335       struct cl_optimization *callee_opt
 336         = TREE_OPTIMIZATION ((callee_tree)
 337                              ? callee_tree
 338                              : optimization_default_node);
 339
 340       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 341            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 342           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 343           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->symbol.decl))
 344         {
 345           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 346           inlinable = false;
 347         }
 348     }
 349
 350   if (!inlinable && report)
 351     report_inline_failed_reason (e);
 352   return inlinable;
 353 }
 354
 355
 356 /* Return true if the edge E is inlinable during early inlining.  */
 357
 358 static bool
 359 can_early_inline_edge_p (struct cgraph_edge *e)
 360 {
 361   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
 362                                                               NULL);
 363   /* Early inliner might get called at WPA stage when IPA pass adds new
 364      function.  In this case we can not really do any of early inlining
 365      because function bodies are missing.  */
 366   if (!gimple_has_body_p (callee->symbol.decl))
 367     {
 368       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 369       return false;
 370     }
 371   /* In early inliner some of callees may not be in SSA form yet
 372      (i.e. the callgraph is cyclic and we did not process
 373      the callee by early inliner, yet).  We don't have CIF code for this
 374      case; later we will re-do the decision in the real inliner.  */
 375   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->symbol.decl))
 376       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
 377     {
 378       if (dump_file)
 379         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 380       return false;
 381     }
 382   if (!can_inline_edge_p (e, true))
 383     return false;
 384   return true;
 385 }
 386
 387
 388 /* Return number of calls in N.  Ignore cheap builtins.  */
 389
 390 static int
 391 num_calls (struct cgraph_node *n)
 392 {
 393   struct cgraph_edge *e;
 394   int num = 0;
 395
 396   for (e = n->callees; e; e = e->next_callee)
 397     if (!is_inexpensive_builtin (e->callee->symbol.decl))
 398       num++;
 399   return num;
 400 }
 401
 402
 403 /* Return true if we are interested in inlining small function.  */
 404
 405 static bool
 406 want_early_inline_function_p (struct cgraph_edge *e)
 407 {
 408   bool want_inline = true;
 409   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 410
 411   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 412     ;
 413   else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 414            && !flag_inline_small_functions)
 415     {
 416       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 417       report_inline_failed_reason (e);
 418       want_inline = false;
 419     }
 420   else
 421     {
 422       int growth = estimate_edge_growth (e);
 423       int n;
 424
 425       if (growth <= 0)
 426         ;
 427       else if (!cgraph_maybe_hot_edge_p (e)
 428                && growth > 0)
 429         {
 430           if (dump_file)
 431             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 432                      "call is cold and code would grow by %i\n",
 433                      xstrdup (cgraph_node_name (e->caller)),
 434                      e->caller->symbol.order,
 435                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 436                      growth);
 437           want_inline = false;
 438         }
 439       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 440         {
 441           if (dump_file)
 442             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 443                      "growth %i exceeds --param early-inlining-insns\n",
 444                      xstrdup (cgraph_node_name (e->caller)),
 445                      e->caller->symbol.order,
 446                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 447                      growth);
 448           want_inline = false;
 449         }
 450       else if ((n = num_calls (callee)) != 0
 451                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 452         {
 453           if (dump_file)
 454             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 455                      "growth %i exceeds --param early-inlining-insns "
 456                      "divided by number of calls\n",
 457                      xstrdup (cgraph_node_name (e->caller)),
 458                      e->caller->symbol.order,
 459                      xstrdup (cgraph_node_name (callee)), callee->symbol.order,
 460                      growth);
 461           want_inline = false;
 462         }
 463     }
 464   return want_inline;
 465 }
 466
 467 /* Compute time of the edge->caller + edge->callee execution when inlining
 468    does not happen.  */
 469
 470 inline gcov_type
 471 compute_uninlined_call_time (struct inline_summary *callee_info,
 472                              struct cgraph_edge *edge)
 473 {
 474   gcov_type uninlined_call_time =
 475     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 476           CGRAPH_FREQ_BASE);
 477   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 478                                           ? edge->caller->global.inlined_to
 479                                           : edge->caller)->time;
 480   return uninlined_call_time + caller_time;
 481 }
 482
 483 /* Same as compute_uinlined_call_time but compute time when inlining
 484    does happen.  */
 485
 486 inline gcov_type
 487 compute_inlined_call_time (struct cgraph_edge *edge,
 488                            int edge_time)
 489 {
 490   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 491                                           ? edge->caller->global.inlined_to
 492                                           : edge->caller)->time;
 493   gcov_type time = (caller_time
 494                     + RDIV (((gcov_type) edge_time
 495                              - inline_edge_summary (edge)->call_stmt_time)
 496                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 497   /* Possible one roundoff error, but watch for overflows.  */
 498   gcc_checking_assert (time >= INT_MIN / 2);
 499   if (time < 0)
 500     time = 0;
 501   return time;
 502 }
 503
 504 /* Return true if the speedup for inlining E is bigger than
 505    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 506
 507 static bool
 508 big_speedup_p (struct cgraph_edge *e)
 509 {
 510   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 511                                                 e);
 512   gcov_type inlined_time = compute_inlined_call_time (e,
 513                                                       estimate_edge_time (e));
 514   if (time - inlined_time
 515       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 516     return true;
 517   return false;
 518 }
 519
 520 /* Return true if we are interested in inlining small function.
 521    When REPORT is true, report reason to dump file.  */
 522
 523 static bool
 524 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 525 {
 526   bool want_inline = true;
 527   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 528
 529   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 530     ;
 531   else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 532            && !flag_inline_small_functions)
 533     {
 534       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 535       want_inline = false;
 536     }
 537   else
 538     {
 539       int growth = estimate_edge_growth (e);
 540       inline_hints hints = estimate_edge_hints (e);
 541       bool big_speedup = big_speedup_p (e);
 542
 543       if (growth <= 0)
 544         ;
 545       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 546          hints suggests that inlining given function is very profitable.  */
 547       else if (DECL_DECLARED_INLINE_P (callee->symbol.decl)
 548                && growth >= MAX_INLINE_INSNS_SINGLE
 549                && !big_speedup
 550                && !(hints & (INLINE_HINT_indirect_call
 551                              | INLINE_HINT_loop_iterations
 552                              | INLINE_HINT_array_index
 553                              | INLINE_HINT_loop_stride)))
 554         {
 555           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 556           want_inline = false;
 557         }
 558       /* Before giving up based on fact that caller size will grow, allow
 559          functions that are called few times and eliminating the offline
 560          copy will lead to overall code size reduction.
 561          Not all of these will be handled by subsequent inlining of functions
 562          called once: in particular weak functions are not handled or funcitons
 563          that inline to multiple calls but a lot of bodies is optimized out.
 564          Finally we want to inline earlier to allow inlining of callbacks.
 565
 566          This is slightly wrong on aggressive side:  it is entirely possible
 567          that function is called many times with a context where inlining
 568          reduces code size and few times with a context where inlining increase
 569          code size.  Resoluting growth estimate will be negative even if it
 570          would make more sense to keep offline copy and do not inline into the
 571          call sites that makes the code size grow.
 572
 573          When badness orders the calls in a way that code reducing calls come
 574          first, this situation is not a problem at all: after inlining all
 575          "good" calls, we will realize that keeping the function around is
 576          better.  */
 577       else if (growth <= MAX_INLINE_INSNS_SINGLE
 578                /* Unlike for functions called once, we play unsafe with
 579                   COMDATs.  We can allow that since we know functions
 580                   in consideration are small (and thus risk is small) and
 581                   moreover grow estimates already accounts that COMDAT
 582                   functions may or may not disappear when eliminated from
 583                   current unit. With good probability making aggressive
 584                   choice in all units is going to make overall program
 585                   smaller.
 586
 587                   Consequently we ask cgraph_can_remove_if_no_direct_calls_p
 588                   instead of
 589                   cgraph_will_be_removed_from_program_if_no_direct_calls  */
 590                 && !DECL_EXTERNAL (callee->symbol.decl)
 591                 && cgraph_can_remove_if_no_direct_calls_p (callee)
 592                 && estimate_growth (callee) <= 0)
 593         ;
 594       else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 595                && !flag_inline_functions)
 596         {
 597           e->inline_failed = CIF_NOT_DECLARED_INLINED;
 598           want_inline = false;
 599         }
 600       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 601          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 602          inlining given function is very profitable.  */
 603       else if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
 604                && !big_speedup
 605                && growth >= ((hints & (INLINE_HINT_indirect_call
 606                                        | INLINE_HINT_loop_iterations
 607                                        | INLINE_HINT_array_index
 608                                        | INLINE_HINT_loop_stride))
 609                              ? MAX (MAX_INLINE_INSNS_AUTO,
 610                                     MAX_INLINE_INSNS_SINGLE)
 611                              : MAX_INLINE_INSNS_AUTO))
 612         {
 613           e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 614           want_inline = false;
 615         }
 616       /* If call is cold, do not inline when function body would grow. */
 617       else if (!cgraph_maybe_hot_edge_p (e))
 618         {
 619           e->inline_failed = CIF_UNLIKELY_CALL;
 620           want_inline = false;
 621         }
 622     }
 623   if (!want_inline && report)
 624     report_inline_failed_reason (e);
 625   return want_inline;
 626 }
 627
 628 /* EDGE is self recursive edge.
 629    We hand two cases - when function A is inlining into itself
 630    or when function A is being inlined into another inliner copy of function
 631    A within function B.
 632
 633    In first case OUTER_NODE points to the toplevel copy of A, while
 634    in the second case OUTER_NODE points to the outermost copy of A in B.
 635
 636    In both cases we want to be extra selective since
 637    inlining the call will just introduce new recursive calls to appear.  */
 638
 639 static bool
 640 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 641                                    struct cgraph_node *outer_node,
 642                                    bool peeling,
 643                                    int depth)
 644 {
 645   char const *reason = NULL;
 646   bool want_inline = true;
 647   int caller_freq = CGRAPH_FREQ_BASE;
 648   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 649
 650   if (DECL_DECLARED_INLINE_P (edge->caller->symbol.decl))
 651     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 652
 653   if (!cgraph_maybe_hot_edge_p (edge))
 654     {
 655       reason = "recursive call is cold";
 656       want_inline = false;
 657     }
 658   else if (max_count && !outer_node->count)
 659     {
 660       reason = "not executed in profile";
 661       want_inline = false;
 662     }
 663   else if (depth > max_depth)
 664     {
 665       reason = "--param max-inline-recursive-depth exceeded.";
 666       want_inline = false;
 667     }
 668
 669   if (outer_node->global.inlined_to)
 670     caller_freq = outer_node->callers->frequency;
 671
 672   if (!want_inline)
 673     ;
 674   /* Inlining of self recursive function into copy of itself within other function
 675      is transformation similar to loop peeling.
 676
 677      Peeling is profitable if we can inline enough copies to make probability
 678      of actual call to the self recursive function very small.  Be sure that
 679      the probability of recursion is small.
 680
 681      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 682      This way the expected number of recision is at most max_depth.  */
 683   else if (peeling)
 684     {
 685       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 686                                          / max_depth);
 687       int i;
 688       for (i = 1; i < depth; i++)
 689         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 690       if (max_count
 691           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 692               >= max_prob))
 693         {
 694           reason = "profile of recursive call is too large";
 695           want_inline = false;
 696         }
 697       if (!max_count
 698           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 699               >= max_prob))
 700         {
 701           reason = "frequency of recursive call is too large";
 702           want_inline = false;
 703         }
 704     }
 705   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 706      depth is large.  We reduce function call overhead and increase chances that
 707      things fit in hardware return predictor.
 708
 709      Recursive inlining might however increase cost of stack frame setup
 710      actually slowing down functions whose recursion tree is wide rather than
 711      deep.
 712
 713      Deciding reliably on when to do recursive inlining without profile feedback
 714      is tricky.  For now we disable recursive inlining when probability of self
 715      recursion is low.
 716
 717      Recursive inlining of self recursive call within loop also results in large loop
 718      depths that generally optimize badly.  We may want to throttle down inlining
 719      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 720      methods.  */
 721   else
 722     {
 723       if (max_count
 724           && (edge->count * 100 / outer_node->count
 725               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 726         {
 727           reason = "profile of recursive call is too small";
 728           want_inline = false;
 729         }
 730       else if (!max_count
 731                && (edge->frequency * 100 / caller_freq
 732                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 733         {
 734           reason = "frequency of recursive call is too small";
 735           want_inline = false;
 736         }
 737     }
 738   if (!want_inline && dump_file)
 739     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 740   return want_inline;
 741 }
 742
 743 /* Return true when NODE has caller other than EDGE.
 744    Worker for cgraph_for_node_and_aliases.  */
 745
 746 static bool
 747 check_caller_edge (struct cgraph_node *node, void *edge)
 748 {
 749   return (node->callers
 750           && node->callers != edge);
 751 }
 752
 753
 754 /* Decide if inlining NODE would reduce unit size by eliminating
 755    the offline copy of function.
 756    When COLD is true the cold calls are considered, too.  */
 757
 758 static bool
 759 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 760 {
 761    struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
 762    struct cgraph_edge *e;
 763    bool has_hot_call = false;
 764
 765    /* Does it have callers?  */
 766    if (!node->callers)
 767      return false;
 768    /* Already inlined?  */
 769    if (function->global.inlined_to)
 770      return false;
 771    if (cgraph_function_or_thunk_node (node, NULL) != node)
 772      return false;
 773    /* Inlining into all callers would increase size?  */
 774    if (estimate_growth (node) > 0)
 775      return false;
 776    /* Maybe other aliases has more direct calls.  */
 777    if (cgraph_for_node_and_aliases (node, check_caller_edge, node->callers, true))
 778      return false;
 779    /* All inlines must be possible.  */
 780    for (e = node->callers; e; e = e->next_caller)
 781      {
 782        if (!can_inline_edge_p (e, true))
 783          return false;
 784        if (!has_hot_call && cgraph_maybe_hot_edge_p (e))
 785          has_hot_call = 1;
 786      }
 787
 788    if (!cold && !has_hot_call)
 789      return false;
 790    return true;
 791 }
 792
 793 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 794
 795 /* Return relative time improvement for inlining EDGE in range
 796    1...RELATIVE_TIME_BENEFIT_RANGE  */
 797
 798 static inline int
 799 relative_time_benefit (struct inline_summary *callee_info,
 800                        struct cgraph_edge *edge,
 801                        int edge_time)
 802 {
 803   gcov_type relbenefit;
 804   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 805   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 806
 807   /* Inlining into extern inline function is not a win.  */
 808   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 809                      ? edge->caller->global.inlined_to->symbol.decl
 810                      : edge->caller->symbol.decl))
 811     return 1;
 812
 813   /* Watch overflows.  */
 814   gcc_checking_assert (uninlined_call_time >= 0);
 815   gcc_checking_assert (inlined_call_time >= 0);
 816   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 817
 818   /* Compute relative time benefit, i.e. how much the call becomes faster.
 819      ??? perhaps computing how much the caller+calle together become faster
 820      would lead to more realistic results.  */
 821   if (!uninlined_call_time)
 822     uninlined_call_time = 1;
 823   relbenefit =
 824     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 825           uninlined_call_time);
 826   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 827   gcc_checking_assert (relbenefit >= 0);
 828   relbenefit = MAX (relbenefit, 1);
 829   return relbenefit;
 830 }
 831
 832
 833 /* A cost model driving the inlining heuristics in a way so the edges with
 834    smallest badness are inlined first.  After each inlining is performed
 835    the costs of all caller edges of nodes affected are recomputed so the
 836    metrics may accurately depend on values such as number of inlinable callers
 837    of the function or function body size.  */
 838
 839 static int
 840 edge_badness (struct cgraph_edge *edge, bool dump)
 841 {
 842   gcov_type badness;
 843   int growth, edge_time;
 844   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 845                                                               NULL);
 846   struct inline_summary *callee_info = inline_summary (callee);
 847   inline_hints hints;
 848
 849   if (DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
 850     return INT_MIN;
 851
 852   growth = estimate_edge_growth (edge);
 853   edge_time = estimate_edge_time (edge);
 854   hints = estimate_edge_hints (edge);
 855   gcc_checking_assert (edge_time >= 0);
 856   gcc_checking_assert (edge_time <= callee_info->time);
 857   gcc_checking_assert (growth <= callee_info->size);
 858
 859   if (dump)
 860     {
 861       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 862                xstrdup (cgraph_node_name (edge->caller)),
 863                edge->caller->symbol.order,
 864                xstrdup (cgraph_node_name (callee)),
 865                edge->callee->symbol.order);
 866       fprintf (dump_file, "      size growth %i, time %i ",
 867                growth,
 868                edge_time);
 869       dump_inline_hints (dump_file, hints);
 870       if (big_speedup_p (edge))
 871         fprintf (dump_file, " big_speedup");
 872       fprintf (dump_file, "\n");
 873     }
 874
 875   /* Always prefer inlining saving code size.  */
 876   if (growth <= 0)
 877     {
 878       badness = INT_MIN / 2 + growth;
 879       if (dump)
 880         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 881                  growth);
 882     }
 883
 884   /* When profiling is available, compute badness as:
 885
 886                 relative_edge_count * relative_time_benefit
 887      goodness = -------------------------------------------
 888                 growth_f_caller
 889      badness = -goodness
 890
 891     The fraction is upside down, because on edge counts and time beneits
 892     the bounds are known. Edge growth is essentially unlimited.  */
 893
 894   else if (max_count)
 895     {
 896       sreal tmp, relbenefit_real, growth_real;
 897       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 898
 899       sreal_init(&relbenefit_real, relbenefit, 0);
 900       sreal_init(&growth_real, growth, 0);
 901
 902       /* relative_edge_count.  */
 903       sreal_init (&tmp, edge->count, 0);
 904       sreal_div (&tmp, &tmp, &max_count_real);
 905
 906       /* relative_time_benefit.  */
 907       sreal_mul (&tmp, &tmp, &relbenefit_real);
 908       sreal_div (&tmp, &tmp, &max_relbenefit_real);
 909
 910       /* growth_f_caller.  */
 911       sreal_mul (&tmp, &tmp, &half_int_min_real);
 912       sreal_div (&tmp, &tmp, &growth_real);
 913
 914       badness = -1 * sreal_to_int (&tmp);
 915
 916       /* Be sure that insanity of the profile won't lead to increasing counts
 917          in the scalling and thus to overflow in the computation above.  */
 918       gcc_assert (max_count >= edge->count);
 919       if (dump)
 920         {
 921           fprintf (dump_file,
 922                    "      %i (relative %f): profile info. Relative count %f"
 923                    " * Relative benefit %f\n",
 924                    (int) badness, (double) badness / INT_MIN,
 925                    (double) edge->count / max_count,
 926                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 927         }
 928     }
 929
 930   /* When function local profile is available. Compute badness as:
 931
 932                  relative_time_benefit
 933      goodness =  ---------------------------------
 934                  growth_of_caller * overall_growth
 935
 936      badness = - goodness
 937
 938      compensated by the inline hints.
 939   */
 940   else if (flag_guess_branch_prob)
 941     {
 942       badness = (relative_time_benefit (callee_info, edge, edge_time)
 943                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
 944       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
 945       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
 946       if ((hints & (INLINE_HINT_indirect_call
 947                     | INLINE_HINT_loop_iterations
 948                     | INLINE_HINT_array_index
 949                     | INLINE_HINT_loop_stride))
 950           || callee_info->growth <= 0)
 951         badness *= 8;
 952       if (hints & (INLINE_HINT_same_scc))
 953         badness /= 16;
 954       else if (hints & (INLINE_HINT_in_scc))
 955         badness /= 8;
 956       else if (hints & (INLINE_HINT_cross_module))
 957         badness /= 2;
 958       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
 959       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
 960         badness *= 16;
 961       if (dump)
 962         {
 963           fprintf (dump_file,
 964                    "      %i: guessed profile. frequency %f,"
 965                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
 966                    " overall growth %i (current) %i (original)\n",
 967                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
 968                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
 969                    / RELATIVE_TIME_BENEFIT_RANGE,
 970                    (int)compute_uninlined_call_time (callee_info, edge),
 971                    (int)compute_inlined_call_time (edge, edge_time),
 972                    estimate_growth (callee),
 973                    callee_info->growth);
 974         }
 975     }
 976   /* When function local profile is not available or it does not give
 977      useful information (ie frequency is zero), base the cost on
 978      loop nest and overall size growth, so we optimize for overall number
 979      of functions fully inlined in program.  */
 980   else
 981     {
 982       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
 983       badness = growth * 256;
 984
 985       /* Decrease badness if call is nested.  */
 986       if (badness > 0)
 987         badness >>= nest;
 988       else
 989         {
 990           badness <<= nest;
 991         }
 992       if (dump)
 993         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
 994                  nest);
 995     }
 996
 997   /* Ensure that we did not overflow in all the fixed point math above.  */
 998   gcc_assert (badness >= INT_MIN);
 999   gcc_assert (badness <= INT_MAX - 1);
1000   /* Make recursive inlining happen always after other inlining is done.  */
1001   if (cgraph_edge_recursive_p (edge))
1002     return badness + 1;
1003   else
1004     return badness;
1005 }
1006
1007 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1008 static inline void
1009 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1010 {
1011   int badness = edge_badness (edge, false);
1012   if (edge->aux)
1013     {
1014       fibnode_t n = (fibnode_t) edge->aux;
1015       gcc_checking_assert (n->data == edge);
1016
1017       /* fibheap_replace_key only decrease the keys.
1018          When we increase the key we do not update heap
1019          and instead re-insert the element once it becomes
1020          a minimum of heap.  */
1021       if (badness < n->key)
1022         {
1023           if (dump_file && (dump_flags & TDF_DETAILS))
1024             {
1025               fprintf (dump_file,
1026                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1027                        xstrdup (cgraph_node_name (edge->caller)),
1028                        edge->caller->symbol.order,
1029                        xstrdup (cgraph_node_name (edge->callee)),
1030                        edge->callee->symbol.order,
1031                        (int)n->key,
1032                        badness);
1033             }
1034           fibheap_replace_key (heap, n, badness);
1035           gcc_checking_assert (n->key == badness);
1036         }
1037     }
1038   else
1039     {
1040        if (dump_file && (dump_flags & TDF_DETAILS))
1041          {
1042            fprintf (dump_file,
1043                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1044                     xstrdup (cgraph_node_name (edge->caller)),
1045                     edge->caller->symbol.order,
1046                     xstrdup (cgraph_node_name (edge->callee)),
1047                     edge->callee->symbol.order,
1048                     badness);
1049          }
1050       edge->aux = fibheap_insert (heap, badness, edge);
1051     }
1052 }
1053
1054
1055 /* NODE was inlined.
1056    All caller edges needs to be resetted because
1057    size estimates change. Similarly callees needs reset
1058    because better context may be known.  */
1059
1060 static void
1061 reset_edge_caches (struct cgraph_node *node)
1062 {
1063   struct cgraph_edge *edge;
1064   struct cgraph_edge *e = node->callees;
1065   struct cgraph_node *where = node;
1066   int i;
1067   struct ipa_ref *ref;
1068
1069   if (where->global.inlined_to)
1070     where = where->global.inlined_to;
1071
1072   /* WHERE body size has changed, the cached growth is invalid.  */
1073   reset_node_growth_cache (where);
1074
1075   for (edge = where->callers; edge; edge = edge->next_caller)
1076     if (edge->inline_failed)
1077       reset_edge_growth_cache (edge);
1078   for (i = 0; ipa_ref_list_referring_iterate (&where->symbol.ref_list,
1079                                               i, ref); i++)
1080     if (ref->use == IPA_REF_ALIAS)
1081       reset_edge_caches (ipa_ref_referring_node (ref));
1082
1083   if (!e)
1084     return;
1085
1086   while (true)
1087     if (!e->inline_failed && e->callee->callees)
1088       e = e->callee->callees;
1089     else
1090       {
1091         if (e->inline_failed)
1092           reset_edge_growth_cache (e);
1093         if (e->next_callee)
1094           e = e->next_callee;
1095         else
1096           {
1097             do
1098               {
1099                 if (e->caller == node)
1100                   return;
1101                 e = e->caller->callers;
1102               }
1103             while (!e->next_callee);
1104             e = e->next_callee;
1105           }
1106       }
1107 }
1108
1109 /* Recompute HEAP nodes for each of caller of NODE.
1110    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1111    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1112    it is inlinable. Otherwise check all edges.  */
1113
1114 static void
1115 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1116                     bitmap updated_nodes,
1117                     struct cgraph_edge *check_inlinablity_for)
1118 {
1119   struct cgraph_edge *edge;
1120   int i;
1121   struct ipa_ref *ref;
1122
1123   if ((!node->symbol.alias && !inline_summary (node)->inlinable)
1124       || node->global.inlined_to)
1125     return;
1126   if (!bitmap_set_bit (updated_nodes, node->uid))
1127     return;
1128
1129   for (i = 0; ipa_ref_list_referring_iterate (&node->symbol.ref_list,
1130                                               i, ref); i++)
1131     if (ref->use == IPA_REF_ALIAS)
1132       {
1133         struct cgraph_node *alias = ipa_ref_referring_node (ref);
1134         update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1135       }
1136
1137   for (edge = node->callers; edge; edge = edge->next_caller)
1138     if (edge->inline_failed)
1139       {
1140         if (!check_inlinablity_for
1141             || check_inlinablity_for == edge)
1142           {
1143             if (can_inline_edge_p (edge, false)
1144                 && want_inline_small_function_p (edge, false))
1145               update_edge_key (heap, edge);
1146             else if (edge->aux)
1147               {
1148                 report_inline_failed_reason (edge);
1149                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1150                 edge->aux = NULL;
1151               }
1152           }
1153         else if (edge->aux)
1154           update_edge_key (heap, edge);
1155       }
1156 }
1157
1158 /* Recompute HEAP nodes for each uninlined call in NODE.
1159    This is used when we know that edge badnesses are going only to increase
1160    (we introduced new call site) and thus all we need is to insert newly
1161    created edges into heap.  */
1162
1163 static void
1164 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1165                     bitmap updated_nodes)
1166 {
1167   struct cgraph_edge *e = node->callees;
1168
1169   if (!e)
1170     return;
1171   while (true)
1172     if (!e->inline_failed && e->callee->callees)
1173       e = e->callee->callees;
1174     else
1175       {
1176         enum availability avail;
1177         struct cgraph_node *callee;
1178         /* We do not reset callee growth cache here.  Since we added a new call,
1179            growth chould have just increased and consequentely badness metric
1180            don't need updating.  */
1181         if (e->inline_failed
1182             && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1183             && inline_summary (callee)->inlinable
1184             && avail >= AVAIL_AVAILABLE
1185             && !bitmap_bit_p (updated_nodes, callee->uid))
1186           {
1187             if (can_inline_edge_p (e, false)
1188                 && want_inline_small_function_p (e, false))
1189               update_edge_key (heap, e);
1190             else if (e->aux)
1191               {
1192                 report_inline_failed_reason (e);
1193                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1194                 e->aux = NULL;
1195               }
1196           }
1197         if (e->next_callee)
1198           e = e->next_callee;
1199         else
1200           {
1201             do
1202               {
1203                 if (e->caller == node)
1204                   return;
1205                 e = e->caller->callers;
1206               }
1207             while (!e->next_callee);
1208             e = e->next_callee;
1209           }
1210       }
1211 }
1212
1213 /* Enqueue all recursive calls from NODE into priority queue depending on
1214    how likely we want to recursively inline the call.  */
1215
1216 static void
1217 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1218                         fibheap_t heap)
1219 {
1220   struct cgraph_edge *e;
1221   enum availability avail;
1222
1223   for (e = where->callees; e; e = e->next_callee)
1224     if (e->callee == node
1225         || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1226             && avail > AVAIL_OVERWRITABLE))
1227       {
1228         /* When profile feedback is available, prioritize by expected number
1229            of calls.  */
1230         fibheap_insert (heap,
1231                         !max_count ? -e->frequency
1232                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1233                         e);
1234       }
1235   for (e = where->callees; e; e = e->next_callee)
1236     if (!e->inline_failed)
1237       lookup_recursive_calls (node, e->callee, heap);
1238 }
1239
1240 /* Decide on recursive inlining: in the case function has recursive calls,
1241    inline until body size reaches given argument.  If any new indirect edges
1242    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1243    is NULL.  */
1244
1245 static bool
1246 recursive_inlining (struct cgraph_edge *edge,
1247                     vec<cgraph_edge_p> *new_edges)
1248 {
1249   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1250   fibheap_t heap;
1251   struct cgraph_node *node;
1252   struct cgraph_edge *e;
1253   struct cgraph_node *master_clone = NULL, *next;
1254   int depth = 0;
1255   int n = 0;
1256
1257   node = edge->caller;
1258   if (node->global.inlined_to)
1259     node = node->global.inlined_to;
1260
1261   if (DECL_DECLARED_INLINE_P (node->symbol.decl))
1262     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1263
1264   /* Make sure that function is small enough to be considered for inlining.  */
1265   if (estimate_size_after_inlining (node, edge)  >= limit)
1266     return false;
1267   heap = fibheap_new ();
1268   lookup_recursive_calls (node, node, heap);
1269   if (fibheap_empty (heap))
1270     {
1271       fibheap_delete (heap);
1272       return false;
1273     }
1274
1275   if (dump_file)
1276     fprintf (dump_file,
1277              "  Performing recursive inlining on %s\n",
1278              cgraph_node_name (node));
1279
1280   /* Do the inlining and update list of recursive call during process.  */
1281   while (!fibheap_empty (heap))
1282     {
1283       struct cgraph_edge *curr
1284         = (struct cgraph_edge *) fibheap_extract_min (heap);
1285       struct cgraph_node *cnode, *dest = curr->callee;
1286
1287       if (!can_inline_edge_p (curr, true))
1288         continue;
1289
1290       /* MASTER_CLONE is produced in the case we already started modified
1291          the function. Be sure to redirect edge to the original body before
1292          estimating growths otherwise we will be seeing growths after inlining
1293          the already modified body.  */
1294       if (master_clone)
1295         {
1296           cgraph_redirect_edge_callee (curr, master_clone);
1297           reset_edge_growth_cache (curr);
1298         }
1299
1300       if (estimate_size_after_inlining (node, curr) > limit)
1301         {
1302           cgraph_redirect_edge_callee (curr, dest);
1303           reset_edge_growth_cache (curr);
1304           break;
1305         }
1306
1307       depth = 1;
1308       for (cnode = curr->caller;
1309            cnode->global.inlined_to; cnode = cnode->callers->caller)
1310         if (node->symbol.decl
1311             == cgraph_function_or_thunk_node (curr->callee, NULL)->symbol.decl)
1312           depth++;
1313
1314       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1315         {
1316           cgraph_redirect_edge_callee (curr, dest);
1317           reset_edge_growth_cache (curr);
1318           continue;
1319         }
1320
1321       if (dump_file)
1322         {
1323           fprintf (dump_file,
1324                    "   Inlining call of depth %i", depth);
1325           if (node->count)
1326             {
1327               fprintf (dump_file, " called approx. %.2f times per call",
1328                        (double)curr->count / node->count);
1329             }
1330           fprintf (dump_file, "\n");
1331         }
1332       if (!master_clone)
1333         {
1334           /* We need original clone to copy around.  */
1335           master_clone = cgraph_clone_node (node, node->symbol.decl,
1336                                             node->count, CGRAPH_FREQ_BASE,
1337                                             false, vNULL, true, NULL);
1338           for (e = master_clone->callees; e; e = e->next_callee)
1339             if (!e->inline_failed)
1340               clone_inlined_nodes (e, true, false, NULL);
1341           cgraph_redirect_edge_callee (curr, master_clone);
1342           reset_edge_growth_cache (curr);
1343         }
1344
1345       inline_call (curr, false, new_edges, &overall_size, true);
1346       lookup_recursive_calls (node, curr->callee, heap);
1347       n++;
1348     }
1349
1350   if (!fibheap_empty (heap) && dump_file)
1351     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1352   fibheap_delete (heap);
1353
1354   if (!master_clone)
1355     return false;
1356
1357   if (dump_file)
1358     fprintf (dump_file,
1359              "\n   Inlined %i times, "
1360              "body grown from size %i to %i, time %i to %i\n", n,
1361              inline_summary (master_clone)->size, inline_summary (node)->size,
1362              inline_summary (master_clone)->time, inline_summary (node)->time);
1363
1364   /* Remove master clone we used for inlining.  We rely that clones inlined
1365      into master clone gets queued just before master clone so we don't
1366      need recursion.  */
1367   for (node = cgraph_first_function (); node != master_clone;
1368        node = next)
1369     {
1370       next = cgraph_next_function (node);
1371       if (node->global.inlined_to == master_clone)
1372         cgraph_remove_node (node);
1373     }
1374   cgraph_remove_node (master_clone);
1375   return true;
1376 }
1377
1378
1379 /* Given whole compilation unit estimate of INSNS, compute how large we can
1380    allow the unit to grow.  */
1381
1382 static int
1383 compute_max_insns (int insns)
1384 {
1385   int max_insns = insns;
1386   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1387     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1388
1389   return ((HOST_WIDEST_INT) max_insns
1390           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1391 }
1392
1393
1394 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1395
1396 static void
1397 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1398 {
1399   while (new_edges.length () > 0)
1400     {
1401       struct cgraph_edge *edge = new_edges.pop ();
1402
1403       gcc_assert (!edge->aux);
1404       if (edge->inline_failed
1405           && can_inline_edge_p (edge, true)
1406           && want_inline_small_function_p (edge, true))
1407         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1408     }
1409 }
1410
1411 /* Remove EDGE from the fibheap.  */
1412
1413 static void
1414 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1415 {
1416   if (e->callee)
1417     reset_node_growth_cache (e->callee);
1418   if (e->aux)
1419     {
1420       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1421       e->aux = NULL;
1422     }
1423 }
1424
1425 /* Return true if speculation of edge E seems useful.
1426    If ANTICIPATE_INLINING is true, be conservative and hope that E
1427    may get inlined.  */
1428
1429 bool
1430 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1431 {
1432   enum availability avail;
1433   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1434   struct cgraph_edge *direct, *indirect;
1435   struct ipa_ref *ref;
1436
1437   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1438
1439   if (!cgraph_maybe_hot_edge_p (e))
1440     return false;
1441
1442   /* See if IP optimizations found something potentially useful about the
1443      function.  For now we look only for CONST/PURE flags.  Almost everything
1444      else we propagate is useless.  */
1445   if (avail >= AVAIL_AVAILABLE)
1446     {
1447       int ecf_flags = flags_from_decl_or_type (target->symbol.decl);
1448       if (ecf_flags & ECF_CONST)
1449         {
1450           cgraph_speculative_call_info (e, direct, indirect, ref);
1451           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1452             return true;
1453         }
1454       else if (ecf_flags & ECF_PURE)
1455         {
1456           cgraph_speculative_call_info (e, direct, indirect, ref);
1457           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1458             return true;
1459         }
1460     }
1461   /* If we did not managed to inline the function nor redirect
1462      to an ipa-cp clone (that are seen by having local flag set),
1463      it is probably pointless to inline it unless hardware is missing
1464      indirect call predictor.  */
1465   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1466     return false;
1467   /* For overwritable targets there is not much to do.  */
1468   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1469     return false;
1470   /* OK, speculation seems interesting.  */
1471   return true;
1472 }
1473
1474 /* We know that EDGE is not going to be inlined.
1475    See if we can remove speculation.  */
1476
1477 static void
1478 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1479 {
1480   if (edge->speculative && !speculation_useful_p (edge, false))
1481     {
1482       struct cgraph_node *node = edge->caller;
1483       struct cgraph_node *where = node->global.inlined_to
1484                                   ? node->global.inlined_to : node;
1485       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1486
1487       cgraph_resolve_speculation (edge, NULL);
1488       reset_edge_caches (where);
1489       inline_update_overall_summary (where);
1490       update_caller_keys (edge_heap, where,
1491                           updated_nodes, NULL);
1492       update_callee_keys (edge_heap, where,
1493                           updated_nodes);
1494       BITMAP_FREE (updated_nodes);
1495     }
1496 }
1497
1498 /* We use greedy algorithm for inlining of small functions:
1499    All inline candidates are put into prioritized heap ordered in
1500    increasing badness.
1501
1502    The inlining of small functions is bounded by unit growth parameters.  */
1503
1504 static void
1505 inline_small_functions (void)
1506 {
1507   struct cgraph_node *node;
1508   struct cgraph_edge *edge;
1509   fibheap_t edge_heap = fibheap_new ();
1510   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1511   int min_size, max_size;
1512   vec<cgraph_edge_p> new_indirect_edges = vNULL;
1513   int initial_size = 0;
1514   struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1515   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1516
1517   if (flag_indirect_inlining)
1518     new_indirect_edges.create (8);
1519
1520   edge_removal_hook_holder
1521     = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1522
1523   /* Compute overall unit size and other global parameters used by badness
1524      metrics.  */
1525
1526   max_count = 0;
1527   ipa_reduced_postorder (order, true, true, NULL);
1528   free (order);
1529
1530   FOR_EACH_DEFINED_FUNCTION (node)
1531     if (!node->global.inlined_to)
1532       {
1533         if (cgraph_function_with_gimple_body_p (node)
1534             || node->thunk.thunk_p)
1535           {
1536             struct inline_summary *info = inline_summary (node);
1537             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->symbol.aux;
1538
1539             if (!DECL_EXTERNAL (node->symbol.decl))
1540               initial_size += info->size;
1541             info->growth = estimate_growth (node);
1542             if (dfs && dfs->next_cycle)
1543               {
1544                 struct cgraph_node *n2;
1545                 int id = dfs->scc_no + 1;
1546                 for (n2 = node; n2;
1547                      n2 = ((struct ipa_dfs_info *) node->symbol.aux)->next_cycle)
1548                   {
1549                     struct inline_summary *info2 = inline_summary (n2);
1550                     if (info2->scc_no)
1551                       break;
1552                     info2->scc_no = id;
1553                   }
1554               }
1555           }
1556
1557         for (edge = node->callers; edge; edge = edge->next_caller)
1558           if (max_count < edge->count)
1559             max_count = edge->count;
1560       }
1561   sreal_init (&max_count_real, max_count, 0);
1562   sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1563   sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1564   ipa_free_postorder_info ();
1565   initialize_growth_caches ();
1566
1567   if (dump_file)
1568     fprintf (dump_file,
1569              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1570              initial_size);
1571
1572   overall_size = initial_size;
1573   max_size = compute_max_insns (overall_size);
1574   min_size = overall_size;
1575
1576   /* Populate the heeap with all edges we might inline.  */
1577
1578   FOR_EACH_DEFINED_FUNCTION (node)
1579     {
1580       bool update = false;
1581       struct cgraph_edge *next;
1582
1583       if (dump_file)
1584         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1585                  cgraph_node_name (node), node->symbol.order);
1586
1587       for (edge = node->callees; edge; edge = next)
1588         {
1589           next = edge->next_callee;
1590           if (edge->inline_failed
1591               && !edge->aux
1592               && can_inline_edge_p (edge, true)
1593               && want_inline_small_function_p (edge, true)
1594               && edge->inline_failed)
1595             {
1596               gcc_assert (!edge->aux);
1597               update_edge_key (edge_heap, edge);
1598             }
1599           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1600             {
1601               cgraph_resolve_speculation (edge, NULL);
1602               update = true;
1603             }
1604         }
1605       if (update)
1606         {
1607           struct cgraph_node *where = node->global.inlined_to
1608                                       ? node->global.inlined_to : node;
1609           inline_update_overall_summary (where);
1610           reset_node_growth_cache (where);
1611           reset_edge_caches (where);
1612           update_caller_keys (edge_heap, where,
1613                               updated_nodes, NULL);
1614           bitmap_clear (updated_nodes);
1615         }
1616     }
1617
1618   gcc_assert (in_lto_p
1619               || !max_count
1620               || (profile_info && flag_branch_probabilities));
1621
1622   while (!fibheap_empty (edge_heap))
1623     {
1624       int old_size = overall_size;
1625       struct cgraph_node *where, *callee;
1626       int badness = fibheap_min_key (edge_heap);
1627       int current_badness;
1628       int cached_badness;
1629       int growth;
1630
1631       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1632       gcc_assert (edge->aux);
1633       edge->aux = NULL;
1634       if (!edge->inline_failed)
1635         continue;
1636
1637       /* Be sure that caches are maintained consistent.
1638          We can not make this ENABLE_CHECKING only because it cause different
1639          updates of the fibheap queue.  */
1640       cached_badness = edge_badness (edge, false);
1641       reset_edge_growth_cache (edge);
1642       reset_node_growth_cache (edge->callee);
1643
1644       /* When updating the edge costs, we only decrease badness in the keys.
1645          Increases of badness are handled lazilly; when we see key with out
1646          of date value on it, we re-insert it now.  */
1647       current_badness = edge_badness (edge, false);
1648       gcc_assert (cached_badness == current_badness);
1649       gcc_assert (current_badness >= badness);
1650       if (current_badness != badness)
1651         {
1652           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1653           continue;
1654         }
1655
1656       if (!can_inline_edge_p (edge, true))
1657         {
1658           resolve_noninline_speculation (edge_heap, edge);
1659           continue;
1660         }
1661
1662       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1663       growth = estimate_edge_growth (edge);
1664       if (dump_file)
1665         {
1666           fprintf (dump_file,
1667                    "\nConsidering %s/%i with %i size\n",
1668                    cgraph_node_name (callee), callee->symbol.order,
1669                    inline_summary (callee)->size);
1670           fprintf (dump_file,
1671                    " to be inlined into %s/%i in %s:%i\n"
1672                    " Estimated growth after inlined into all is %+i insns.\n"
1673                    " Estimated badness is %i, frequency %.2f.\n",
1674                    cgraph_node_name (edge->caller), edge->caller->symbol.order,
1675                    flag_wpa ? "unknown"
1676                    : gimple_filename ((const_gimple) edge->call_stmt),
1677                    flag_wpa ? -1
1678                    : gimple_lineno ((const_gimple) edge->call_stmt),
1679                    estimate_growth (callee),
1680                    badness,
1681                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1682           if (edge->count)
1683             fprintf (dump_file," Called "HOST_WIDEST_INT_PRINT_DEC"x\n",
1684                      edge->count);
1685           if (dump_flags & TDF_DETAILS)
1686             edge_badness (edge, true);
1687         }
1688
1689       if (overall_size + growth > max_size
1690           && !DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
1691         {
1692           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1693           report_inline_failed_reason (edge);
1694           resolve_noninline_speculation (edge_heap, edge);
1695           continue;
1696         }
1697
1698       if (!want_inline_small_function_p (edge, true))
1699         {
1700           resolve_noninline_speculation (edge_heap, edge);
1701           continue;
1702         }
1703
1704       /* Heuristics for inlining small functions works poorly for
1705          recursive calls where we do efect similar to loop unrolling.
1706          When inliing such edge seems profitable, leave decision on
1707          specific inliner.  */
1708       if (cgraph_edge_recursive_p (edge))
1709         {
1710           where = edge->caller;
1711           if (where->global.inlined_to)
1712             where = where->global.inlined_to;
1713           if (!recursive_inlining (edge,
1714                                    flag_indirect_inlining
1715                                    ? &new_indirect_edges : NULL))
1716             {
1717               edge->inline_failed = CIF_RECURSIVE_INLINING;
1718               resolve_noninline_speculation (edge_heap, edge);
1719               continue;
1720             }
1721           reset_edge_caches (where);
1722           /* Recursive inliner inlines all recursive calls of the function
1723              at once. Consequently we need to update all callee keys.  */
1724           if (flag_indirect_inlining)
1725             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1726           update_callee_keys (edge_heap, where, updated_nodes);
1727           bitmap_clear (updated_nodes);
1728         }
1729       else
1730         {
1731           struct cgraph_node *outer_node = NULL;
1732           int depth = 0;
1733
1734           /* Consider the case where self recursive function A is inlined into B.
1735              This is desired optimization in some cases, since it leads to effect
1736              similar of loop peeling and we might completely optimize out the
1737              recursive call.  However we must be extra selective.  */
1738
1739           where = edge->caller;
1740           while (where->global.inlined_to)
1741             {
1742               if (where->symbol.decl == callee->symbol.decl)
1743                 outer_node = where, depth++;
1744               where = where->callers->caller;
1745             }
1746           if (outer_node
1747               && !want_inline_self_recursive_call_p (edge, outer_node,
1748                                                      true, depth))
1749             {
1750               edge->inline_failed
1751                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->symbol.decl)
1752                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1753               resolve_noninline_speculation (edge_heap, edge);
1754               continue;
1755             }
1756           else if (depth && dump_file)
1757             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1758
1759           gcc_checking_assert (!callee->global.inlined_to);
1760           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1761           if (flag_indirect_inlining)
1762             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1763
1764           reset_edge_caches (edge->callee);
1765           reset_node_growth_cache (callee);
1766
1767           update_callee_keys (edge_heap, where, updated_nodes);
1768         }
1769       where = edge->caller;
1770       if (where->global.inlined_to)
1771         where = where->global.inlined_to;
1772
1773       /* Our profitability metric can depend on local properties
1774          such as number of inlinable calls and size of the function body.
1775          After inlining these properties might change for the function we
1776          inlined into (since it's body size changed) and for the functions
1777          called by function we inlined (since number of it inlinable callers
1778          might change).  */
1779       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1780       bitmap_clear (updated_nodes);
1781
1782       if (dump_file)
1783         {
1784           fprintf (dump_file,
1785                    " Inlined into %s which now has time %i and size %i,"
1786                    "net change of %+i.\n",
1787                    cgraph_node_name (edge->caller),
1788                    inline_summary (edge->caller)->time,
1789                    inline_summary (edge->caller)->size,
1790                    overall_size - old_size);
1791         }
1792       if (min_size > overall_size)
1793         {
1794           min_size = overall_size;
1795           max_size = compute_max_insns (min_size);
1796
1797           if (dump_file)
1798             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1799         }
1800     }
1801
1802   free_growth_caches ();
1803   new_indirect_edges.release ();
1804   fibheap_delete (edge_heap);
1805   if (dump_file)
1806     fprintf (dump_file,
1807              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1808              initial_size, overall_size,
1809              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1810   BITMAP_FREE (updated_nodes);
1811   cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1812 }
1813
1814 /* Flatten NODE.  Performed both during early inlining and
1815    at IPA inlining time.  */
1816
1817 static void
1818 flatten_function (struct cgraph_node *node, bool early)
1819 {
1820   struct cgraph_edge *e;
1821
1822   /* We shouldn't be called recursively when we are being processed.  */
1823   gcc_assert (node->symbol.aux == NULL);
1824
1825   node->symbol.aux = (void *) node;
1826
1827   for (e = node->callees; e; e = e->next_callee)
1828     {
1829       struct cgraph_node *orig_callee;
1830       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1831
1832       /* We've hit cycle?  It is time to give up.  */
1833       if (callee->symbol.aux)
1834         {
1835           if (dump_file)
1836             fprintf (dump_file,
1837                      "Not inlining %s into %s to avoid cycle.\n",
1838                      xstrdup (cgraph_node_name (callee)),
1839                      xstrdup (cgraph_node_name (e->caller)));
1840           e->inline_failed = CIF_RECURSIVE_INLINING;
1841           continue;
1842         }
1843
1844       /* When the edge is already inlined, we just need to recurse into
1845          it in order to fully flatten the leaves.  */
1846       if (!e->inline_failed)
1847         {
1848           flatten_function (callee, early);
1849           continue;
1850         }
1851
1852       /* Flatten attribute needs to be processed during late inlining. For
1853          extra code quality we however do flattening during early optimization,
1854          too.  */
1855       if (!early
1856           ? !can_inline_edge_p (e, true)
1857           : !can_early_inline_edge_p (e))
1858         continue;
1859
1860       if (cgraph_edge_recursive_p (e))
1861         {
1862           if (dump_file)
1863             fprintf (dump_file, "Not inlining: recursive call.\n");
1864           continue;
1865         }
1866
1867       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->symbol.decl))
1868           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->symbol.decl)))
1869         {
1870           if (dump_file)
1871             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1872           continue;
1873         }
1874
1875       /* Inline the edge and flatten the inline clone.  Avoid
1876          recursing through the original node if the node was cloned.  */
1877       if (dump_file)
1878         fprintf (dump_file, " Inlining %s into %s.\n",
1879                  xstrdup (cgraph_node_name (callee)),
1880                  xstrdup (cgraph_node_name (e->caller)));
1881       orig_callee = callee;
1882       inline_call (e, true, NULL, NULL, false);
1883       if (e->callee != orig_callee)
1884         orig_callee->symbol.aux = (void *) node;
1885       flatten_function (e->callee, early);
1886       if (e->callee != orig_callee)
1887         orig_callee->symbol.aux = NULL;
1888     }
1889
1890   node->symbol.aux = NULL;
1891   if (!node->global.inlined_to)
1892     inline_update_overall_summary (node);
1893 }
1894
1895 /* Decide on the inlining.  We do so in the topological order to avoid
1896    expenses on updating data structures.  */
1897
1898 static unsigned int
1899 ipa_inline (void)
1900 {
1901   struct cgraph_node *node;
1902   int nnodes;
1903   struct cgraph_node **order =
1904     XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1905   int i;
1906   int cold;
1907
1908   if (in_lto_p && optimize)
1909     ipa_update_after_lto_read ();
1910
1911   if (dump_file)
1912     dump_inline_summaries (dump_file);
1913
1914   nnodes = ipa_reverse_postorder (order);
1915
1916   FOR_EACH_FUNCTION (node)
1917     node->symbol.aux = 0;
1918
1919   if (dump_file)
1920     fprintf (dump_file, "\nFlattening functions:\n");
1921
1922   /* In the first pass handle functions to be flattened.  Do this with
1923      a priority so none of our later choices will make this impossible.  */
1924   for (i = nnodes - 1; i >= 0; i--)
1925     {
1926       node = order[i];
1927
1928       /* Handle nodes to be flattened.
1929          Ideally when processing callees we stop inlining at the
1930          entry of cycles, possibly cloning that entry point and
1931          try to flatten itself turning it into a self-recursive
1932          function.  */
1933       if (lookup_attribute ("flatten",
1934                             DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
1935         {
1936           if (dump_file)
1937             fprintf (dump_file,
1938                      "Flattening %s\n", cgraph_node_name (node));
1939           flatten_function (node, false);
1940         }
1941     }
1942
1943   inline_small_functions ();
1944
1945   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
1946      functions and virtual functions so we really know what is called once.  */
1947   symtab_remove_unreachable_nodes (false, dump_file);
1948   free (order);
1949
1950   /* Inline functions with a property that after inlining into all callers the
1951      code size will shrink because the out-of-line copy is eliminated.
1952      We do this regardless on the callee size as long as function growth limits
1953      are met.  */
1954   if (dump_file)
1955     fprintf (dump_file,
1956              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
1957
1958   /* Inlining one function called once has good chance of preventing
1959      inlining other function into the same callee.  Ideally we should
1960      work in priority order, but probably inlining hot functions first
1961      is good cut without the extra pain of maintaining the queue.
1962
1963      ??? this is not really fitting the bill perfectly: inlining function
1964      into callee often leads to better optimization of callee due to
1965      increased context for optimization.
1966      For example if main() function calls a function that outputs help
1967      and then function that does the main optmization, we should inline
1968      the second with priority even if both calls are cold by themselves.
1969
1970      We probably want to implement new predicate replacing our use of
1971      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
1972      to be hot.  */
1973   for (cold = 0; cold <= 1; cold ++)
1974     {
1975       FOR_EACH_DEFINED_FUNCTION (node)
1976         {
1977           struct cgraph_edge *edge, *next;
1978           bool update=false;
1979
1980           for (edge = node->callees; edge; edge = next)
1981             {
1982               next = edge->next_callee;
1983               if (edge->speculative && !speculation_useful_p (edge, false))
1984                 {
1985                   cgraph_resolve_speculation (edge, NULL);
1986                   update = true;
1987                 }
1988             }
1989           if (update)
1990             {
1991               struct cgraph_node *where = node->global.inlined_to
1992                                           ? node->global.inlined_to : node;
1993               reset_node_growth_cache (where);
1994               reset_edge_caches (where);
1995               inline_update_overall_summary (where);
1996             }
1997           if (flag_inline_functions_called_once
1998               && want_inline_function_to_all_callers_p (node, cold))
1999             {
2000               int num_calls = 0;
2001               struct cgraph_edge *e;
2002               for (e = node->callers; e; e = e->next_caller)
2003                 num_calls++;
2004               while (node->callers && !node->global.inlined_to)
2005                 {
2006                   struct cgraph_node *caller = node->callers->caller;
2007
2008                   if (dump_file)
2009                     {
2010                       fprintf (dump_file,
2011                                "\nInlining %s size %i.\n",
2012                                cgraph_node_name (node),
2013                                inline_summary (node)->size);
2014                       fprintf (dump_file,
2015                                " Called once from %s %i insns.\n",
2016                                cgraph_node_name (node->callers->caller),
2017                                inline_summary (node->callers->caller)->size);
2018                     }
2019
2020                   inline_call (node->callers, true, NULL, NULL, true);
2021                   if (dump_file)
2022                     fprintf (dump_file,
2023                              " Inlined into %s which now has %i size\n",
2024                              cgraph_node_name (caller),
2025                              inline_summary (caller)->size);
2026                   if (!num_calls--)
2027                     {
2028                       if (dump_file)
2029                         fprintf (dump_file, "New calls found; giving up.\n");
2030                       break;
2031                     }
2032                 }
2033             }
2034         }
2035     }
2036
2037   /* Free ipa-prop structures if they are no longer needed.  */
2038   if (optimize)
2039     ipa_free_all_structures_after_iinln ();
2040
2041   if (dump_file)
2042     fprintf (dump_file,
2043              "\nInlined %i calls, eliminated %i functions\n\n",
2044              ncalls_inlined, nfunctions_inlined);
2045
2046   if (dump_file)
2047     dump_inline_summaries (dump_file);
2048   /* In WPA we use inline summaries for partitioning process.  */
2049   if (!flag_wpa)
2050     inline_free_summary ();
2051   return 0;
2052 }
2053
2054 /* Inline always-inline function calls in NODE.  */
2055
2056 static bool
2057 inline_always_inline_functions (struct cgraph_node *node)
2058 {
2059   struct cgraph_edge *e;
2060   bool inlined = false;
2061
2062   for (e = node->callees; e; e = e->next_callee)
2063     {
2064       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2065       if (!DECL_DISREGARD_INLINE_LIMITS (callee->symbol.decl))
2066         continue;
2067
2068       if (cgraph_edge_recursive_p (e))
2069         {
2070           if (dump_file)
2071             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2072                      cgraph_node_name (e->callee));
2073           e->inline_failed = CIF_RECURSIVE_INLINING;
2074           continue;
2075         }
2076
2077       if (!can_early_inline_edge_p (e))
2078         {
2079           /* Set inlined to true if the callee is marked "always_inline" but
2080              is not inlinable.  This will allow flagging an error later in
2081              expand_call_inline in tree-inline.c.  */
2082           if (lookup_attribute ("always_inline",
2083                                  DECL_ATTRIBUTES (callee->symbol.decl)) != NULL)
2084             inlined = true;
2085           continue;
2086         }
2087
2088       if (dump_file)
2089         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2090                  xstrdup (cgraph_node_name (e->callee)),
2091                  xstrdup (cgraph_node_name (e->caller)));
2092       inline_call (e, true, NULL, NULL, false);
2093       inlined = true;
2094     }
2095   if (inlined)
2096     inline_update_overall_summary (node);
2097
2098   return inlined;
2099 }
2100
2101 /* Decide on the inlining.  We do so in the topological order to avoid
2102    expenses on updating data structures.  */
2103
2104 static bool
2105 early_inline_small_functions (struct cgraph_node *node)
2106 {
2107   struct cgraph_edge *e;
2108   bool inlined = false;
2109
2110   for (e = node->callees; e; e = e->next_callee)
2111     {
2112       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2113       if (!inline_summary (callee)->inlinable
2114           || !e->inline_failed)
2115         continue;
2116
2117       /* Do not consider functions not declared inline.  */
2118       if (!DECL_DECLARED_INLINE_P (callee->symbol.decl)
2119           && !flag_inline_small_functions
2120           && !flag_inline_functions)
2121         continue;
2122
2123       if (dump_file)
2124         fprintf (dump_file, "Considering inline candidate %s.\n",
2125                  cgraph_node_name (callee));
2126
2127       if (!can_early_inline_edge_p (e))
2128         continue;
2129
2130       if (cgraph_edge_recursive_p (e))
2131         {
2132           if (dump_file)
2133             fprintf (dump_file, "  Not inlining: recursive call.\n");
2134           continue;
2135         }
2136
2137       if (!want_early_inline_function_p (e))
2138         continue;
2139
2140       if (dump_file)
2141         fprintf (dump_file, " Inlining %s into %s.\n",
2142                  xstrdup (cgraph_node_name (callee)),
2143                  xstrdup (cgraph_node_name (e->caller)));
2144       inline_call (e, true, NULL, NULL, true);
2145       inlined = true;
2146     }
2147
2148   return inlined;
2149 }
2150
2151 /* Do inlining of small functions.  Doing so early helps profiling and other
2152    passes to be somewhat more effective and avoids some code duplication in
2153    later real inlining pass for testcases with very many function calls.  */
2154 static unsigned int
2155 early_inliner (void)
2156 {
2157   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2158   struct cgraph_edge *edge;
2159   unsigned int todo = 0;
2160   int iterations = 0;
2161   bool inlined = false;
2162
2163   if (seen_error ())
2164     return 0;
2165
2166   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2167      happens when some pass decides to construct new function and
2168      cgraph_add_new_function calls lowering passes and early optimization on
2169      it.  This may confuse ourself when early inliner decide to inline call to
2170      function clone, because function clones don't have parameter list in
2171      ipa-prop matching their signature.  */
2172   if (ipa_node_params_vector.exists ())
2173     return 0;
2174
2175 #ifdef ENABLE_CHECKING
2176   verify_cgraph_node (node);
2177 #endif
2178   ipa_remove_all_references (&node->symbol.ref_list);
2179
2180   /* Even when not optimizing or not inlining inline always-inline
2181      functions.  */
2182   inlined = inline_always_inline_functions (node);
2183
2184   if (!optimize
2185       || flag_no_inline
2186       || !flag_early_inlining
2187       /* Never inline regular functions into always-inline functions
2188          during incremental inlining.  This sucks as functions calling
2189          always inline functions will get less optimized, but at the
2190          same time inlining of functions calling always inline
2191          function into an always inline function might introduce
2192          cycles of edges to be always inlined in the callgraph.
2193
2194          We might want to be smarter and just avoid this type of inlining.  */
2195       || DECL_DISREGARD_INLINE_LIMITS (node->symbol.decl))
2196     ;
2197   else if (lookup_attribute ("flatten",
2198                              DECL_ATTRIBUTES (node->symbol.decl)) != NULL)
2199     {
2200       /* When the function is marked to be flattened, recursively inline
2201          all calls in it.  */
2202       if (dump_file)
2203         fprintf (dump_file,
2204                  "Flattening %s\n", cgraph_node_name (node));
2205       flatten_function (node, true);
2206       inlined = true;
2207     }
2208   else
2209     {
2210       /* We iterate incremental inlining to get trivial cases of indirect
2211          inlining.  */
2212       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2213              && early_inline_small_functions (node))
2214         {
2215           timevar_push (TV_INTEGRATION);
2216           todo |= optimize_inline_calls (current_function_decl);
2217
2218           /* Technically we ought to recompute inline parameters so the new
2219              iteration of early inliner works as expected.  We however have
2220              values approximately right and thus we only need to update edge
2221              info that might be cleared out for newly discovered edges.  */
2222           for (edge = node->callees; edge; edge = edge->next_callee)
2223             {
2224               struct inline_edge_summary *es = inline_edge_summary (edge);
2225               es->call_stmt_size
2226                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2227               es->call_stmt_time
2228                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2229               if (edge->callee->symbol.decl
2230                   && !gimple_check_call_matching_types (
2231                       edge->call_stmt, edge->callee->symbol.decl, false))
2232                 edge->call_stmt_cannot_inline_p = true;
2233             }
2234           timevar_pop (TV_INTEGRATION);
2235           iterations++;
2236           inlined = false;
2237         }
2238       if (dump_file)
2239         fprintf (dump_file, "Iterations: %i\n", iterations);
2240     }
2241
2242   if (inlined)
2243     {
2244       timevar_push (TV_INTEGRATION);
2245       todo |= optimize_inline_calls (current_function_decl);
2246       timevar_pop (TV_INTEGRATION);
2247     }
2248
2249   cfun->always_inline_functions_inlined = true;
2250
2251   return todo;
2252 }
2253
2254 namespace {
2255
2256 const pass_data pass_data_early_inline =
2257 {
2258   GIMPLE_PASS, /* type */
2259   "einline", /* name */
2260   OPTGROUP_INLINE, /* optinfo_flags */
2261   false, /* has_gate */
2262   true, /* has_execute */
2263   TV_EARLY_INLINING, /* tv_id */
2264   PROP_ssa, /* properties_required */
2265   0, /* properties_provided */
2266   0, /* properties_destroyed */
2267   0, /* todo_flags_start */
2268   0, /* todo_flags_finish */
2269 };
2270
2271 class pass_early_inline : public gimple_opt_pass
2272 {
2273 public:
2274   pass_early_inline(gcc::context *ctxt)
2275     : gimple_opt_pass(pass_data_early_inline, ctxt)
2276   {}
2277
2278   /* opt_pass methods: */
2279   unsigned int execute () { return early_inliner (); }
2280
2281 }; // class pass_early_inline
2282
2283 } // anon namespace
2284
2285 gimple_opt_pass *
2286 make_pass_early_inline (gcc::context *ctxt)
2287 {
2288   return new pass_early_inline (ctxt);
2289 }
2290
2291
2292 /* When to run IPA inlining.  Inlining of always-inline functions
2293    happens during early inlining.
2294
2295    Enable inlining unconditoinally at -flto.  We need size estimates to
2296    drive partitioning.  */
2297
2298 static bool
2299 gate_ipa_inline (void)
2300 {
2301   return optimize || flag_lto || flag_wpa;
2302 }
2303
2304 namespace {
2305
2306 const pass_data pass_data_ipa_inline =
2307 {
2308   IPA_PASS, /* type */
2309   "inline", /* name */
2310   OPTGROUP_INLINE, /* optinfo_flags */
2311   true, /* has_gate */
2312   true, /* has_execute */
2313   TV_IPA_INLINING, /* tv_id */
2314   0, /* properties_required */
2315   0, /* properties_provided */
2316   0, /* properties_destroyed */
2317   TODO_remove_functions, /* todo_flags_start */
2318   ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
2319 };
2320
2321 class pass_ipa_inline : public ipa_opt_pass_d
2322 {
2323 public:
2324   pass_ipa_inline(gcc::context *ctxt)
2325     : ipa_opt_pass_d(pass_data_ipa_inline, ctxt,
2326                      inline_generate_summary, /* generate_summary */
2327                      inline_write_summary, /* write_summary */
2328                      inline_read_summary, /* read_summary */
2329                      NULL, /* write_optimization_summary */
2330                      NULL, /* read_optimization_summary */
2331                      NULL, /* stmt_fixup */
2332                      0, /* function_transform_todo_flags_start */
2333                      inline_transform, /* function_transform */
2334                      NULL) /* variable_transform */
2335   {}
2336
2337   /* opt_pass methods: */
2338   bool gate () { return gate_ipa_inline (); }
2339   unsigned int execute () { return ipa_inline (); }
2340
2341 }; // class pass_ipa_inline
2342
2343 } // anon namespace
2344
2345 ipa_opt_pass_d *
2346 make_pass_ipa_inline (gcc::context *ctxt)
2347 {
2348   return new pass_ipa_inline (ctxt);
2349 }