gcc/ipa-inline.c

   1 /* Inlining decision heuristics.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3    Contributed by Jan Hubicka
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /*  Inlining decision heuristics
  22
  23     The implementation of inliner is organized as follows:
  24
  25     inlining heuristics limits
  26
  27       can_inline_edge_p allow to check that particular inlining is allowed
  28       by the limits specified by user (allowed function growth, growth and so
  29       on).
  30
  31       Functions are inlined when it is obvious the result is profitable (such
  32       as functions called once or when inlining reduce code size).
  33       In addition to that we perform inlining of small functions and recursive
  34       inlining.
  35
  36     inlining heuristics
  37
  38        The inliner itself is split into two passes:
  39
  40        pass_early_inlining
  41
  42          Simple local inlining pass inlining callees into current function.
  43          This pass makes no use of whole unit analysis and thus it can do only
  44          very simple decisions based on local properties.
  45
  46          The strength of the pass is that it is run in topological order
  47          (reverse postorder) on the callgraph. Functions are converted into SSA
  48          form just before this pass and optimized subsequently. As a result, the
  49          callees of the function seen by the early inliner was already optimized
  50          and results of early inlining adds a lot of optimization opportunities
  51          for the local optimization.
  52
  53          The pass handle the obvious inlining decisions within the compilation
  54          unit - inlining auto inline functions, inlining for size and
  55          flattening.
  56
  57          main strength of the pass is the ability to eliminate abstraction
  58          penalty in C++ code (via combination of inlining and early
  59          optimization) and thus improve quality of analysis done by real IPA
  60          optimizers.
  61
  62          Because of lack of whole unit knowledge, the pass can not really make
  63          good code size/performance tradeoffs.  It however does very simple
  64          speculative inlining allowing code size to grow by
  65          EARLY_INLINING_INSNS when callee is leaf function.  In this case the
  66          optimizations performed later are very likely to eliminate the cost.
  67
  68        pass_ipa_inline
  69
  70          This is the real inliner able to handle inlining with whole program
  71          knowledge. It performs following steps:
  72
  73          1) inlining of small functions.  This is implemented by greedy
  74          algorithm ordering all inlinable cgraph edges by their badness and
  75          inlining them in this order as long as inline limits allows doing so.
  76
  77          This heuristics is not very good on inlining recursive calls. Recursive
  78          calls can be inlined with results similar to loop unrolling. To do so,
  79          special purpose recursive inliner is executed on function when
  80          recursive edge is met as viable candidate.
  81
  82          2) Unreachable functions are removed from callgraph.  Inlining leads
  83          to devirtualization and other modification of callgraph so functions
  84          may become unreachable during the process. Also functions declared as
  85          extern inline or virtual functions are removed, since after inlining
  86          we no longer need the offline bodies.
  87
  88          3) Functions called once and not exported from the unit are inlined.
  89          This should almost always lead to reduction of code size by eliminating
  90          the need for offline copy of the function.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "tm.h"
  96 #include "tree.h"
  97 #include "trans-mem.h"
  98 #include "calls.h"
  99 #include "tree-inline.h"
 100 #include "langhooks.h"
 101 #include "flags.h"
 102 #include "diagnostic.h"
 103 #include "gimple-pretty-print.h"
 104 #include "params.h"
 105 #include "fibheap.h"
 106 #include "intl.h"
 107 #include "tree-pass.h"
 108 #include "coverage.h"
 109 #include "rtl.h"
 110 #include "bitmap.h"
 111 #include "basic-block.h"
 112 #include "tree-ssa-alias.h"
 113 #include "internal-fn.h"
 114 #include "gimple-expr.h"
 115 #include "is-a.h"
 116 #include "gimple.h"
 117 #include "gimple-ssa.h"
 118 #include "ipa-prop.h"
 119 #include "except.h"
 120 #include "target.h"
 121 #include "ipa-inline.h"
 122 #include "ipa-utils.h"
 123 #include "sreal.h"
 124 #include "cilk.h"
 125
 126 /* Statistics we collect about inlining algorithm.  */
 127 static int overall_size;
 128 static gcov_type max_count;
 129 static sreal max_count_real, max_relbenefit_real, half_int_min_real;
 130 static gcov_type spec_rem;
 131
 132 /* Return false when inlining edge E would lead to violating
 133    limits on function unit growth or stack usage growth.
 134
 135    The relative function body growth limit is present generally
 136    to avoid problems with non-linear behavior of the compiler.
 137    To allow inlining huge functions into tiny wrapper, the limit
 138    is always based on the bigger of the two functions considered.
 139
 140    For stack growth limits we always base the growth in stack usage
 141    of the callers.  We want to prevent applications from segfaulting
 142    on stack overflow when functions with huge stack frames gets
 143    inlined. */
 144
 145 static bool
 146 caller_growth_limits (struct cgraph_edge *e)
 147 {
 148   struct cgraph_node *to = e->caller;
 149   struct cgraph_node *what = cgraph_function_or_thunk_node (e->callee, NULL);
 150   int newsize;
 151   int limit = 0;
 152   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
 153   struct inline_summary *info, *what_info, *outer_info = inline_summary (to);
 154
 155   /* Look for function e->caller is inlined to.  While doing
 156      so work out the largest function body on the way.  As
 157      described above, we want to base our function growth
 158      limits based on that.  Not on the self size of the
 159      outer function, not on the self size of inline code
 160      we immediately inline to.  This is the most relaxed
 161      interpretation of the rule "do not grow large functions
 162      too much in order to prevent compiler from exploding".  */
 163   while (true)
 164     {
 165       info = inline_summary (to);
 166       if (limit < info->self_size)
 167         limit = info->self_size;
 168       if (stack_size_limit < info->estimated_self_stack_size)
 169         stack_size_limit = info->estimated_self_stack_size;
 170       if (to->global.inlined_to)
 171         to = to->callers->caller;
 172       else
 173         break;
 174     }
 175
 176   what_info = inline_summary (what);
 177
 178   if (limit < what_info->self_size)
 179     limit = what_info->self_size;
 180
 181   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
 182
 183   /* Check the size after inlining against the function limits.  But allow
 184      the function to shrink if it went over the limits by forced inlining.  */
 185   newsize = estimate_size_after_inlining (to, e);
 186   if (newsize >= info->size
 187       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
 188       && newsize > limit)
 189     {
 190       e->inline_failed = CIF_LARGE_FUNCTION_GROWTH_LIMIT;
 191       return false;
 192     }
 193
 194   if (!what_info->estimated_stack_size)
 195     return true;
 196
 197   /* FIXME: Stack size limit often prevents inlining in Fortran programs
 198      due to large i/o datastructures used by the Fortran front-end.
 199      We ought to ignore this limit when we know that the edge is executed
 200      on every invocation of the caller (i.e. its call statement dominates
 201      exit block).  We do not track this information, yet.  */
 202   stack_size_limit += ((gcov_type)stack_size_limit
 203                        * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
 204
 205   inlined_stack = (outer_info->stack_frame_offset
 206                    + outer_info->estimated_self_stack_size
 207                    + what_info->estimated_stack_size);
 208   /* Check new stack consumption with stack consumption at the place
 209      stack is used.  */
 210   if (inlined_stack > stack_size_limit
 211       /* If function already has large stack usage from sibling
 212          inline call, we can inline, too.
 213          This bit overoptimistically assume that we are good at stack
 214          packing.  */
 215       && inlined_stack > info->estimated_stack_size
 216       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
 217     {
 218       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
 219       return false;
 220     }
 221   return true;
 222 }
 223
 224 /* Dump info about why inlining has failed.  */
 225
 226 static void
 227 report_inline_failed_reason (struct cgraph_edge *e)
 228 {
 229   if (dump_file)
 230     {
 231       fprintf (dump_file, "  not inlinable: %s/%i -> %s/%i, %s\n",
 232                xstrdup (e->caller->name ()), e->caller->order,
 233                xstrdup (e->callee->name ()), e->callee->order,
 234                cgraph_inline_failed_string (e->inline_failed));
 235     }
 236 }
 237
 238  /* Decide whether sanitizer-related attributes allow inlining. */
 239
 240 static bool
 241 sanitize_attrs_match_for_inline_p (const_tree caller, const_tree callee)
 242 {
 243   /* Don't care if sanitizer is disabled */
 244   if (!(flag_sanitize & SANITIZE_ADDRESS))
 245     return true;
 246
 247   if (!caller || !callee)
 248     return true;
 249
 250   return !!lookup_attribute ("no_sanitize_address",
 251       DECL_ATTRIBUTES (caller)) ==
 252       !!lookup_attribute ("no_sanitize_address",
 253       DECL_ATTRIBUTES (callee));
 254 }
 255
 256  /* Decide if we can inline the edge and possibly update
 257    inline_failed reason.
 258    We check whether inlining is possible at all and whether
 259    caller growth limits allow doing so.
 260
 261    if REPORT is true, output reason to the dump file.
 262
 263    if DISREGARD_LIMITS is true, ignore size limits.*/
 264
 265 static bool
 266 can_inline_edge_p (struct cgraph_edge *e, bool report,
 267                    bool disregard_limits = false)
 268 {
 269   bool inlinable = true;
 270   enum availability avail;
 271   struct cgraph_node *callee
 272     = cgraph_function_or_thunk_node (e->callee, &avail);
 273   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (e->caller->decl);
 274   tree callee_tree
 275     = callee ? DECL_FUNCTION_SPECIFIC_OPTIMIZATION (callee->decl) : NULL;
 276   struct function *caller_cfun = DECL_STRUCT_FUNCTION (e->caller->decl);
 277   struct function *callee_cfun
 278     = callee ? DECL_STRUCT_FUNCTION (callee->decl) : NULL;
 279
 280   if (!caller_cfun && e->caller->clone_of)
 281     caller_cfun = DECL_STRUCT_FUNCTION (e->caller->clone_of->decl);
 282
 283   if (!callee_cfun && callee && callee->clone_of)
 284     callee_cfun = DECL_STRUCT_FUNCTION (callee->clone_of->decl);
 285
 286   gcc_assert (e->inline_failed);
 287
 288   if (!callee || !callee->definition)
 289     {
 290       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 291       inlinable = false;
 292     }
 293   else if (callee->calls_comdat_local)
 294     {
 295       e->inline_failed = CIF_USES_COMDAT_LOCAL;
 296       inlinable = false;
 297     }
 298   else if (!inline_summary (callee)->inlinable
 299            || (caller_cfun && fn_contains_cilk_spawn_p (caller_cfun)))
 300     {
 301       e->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
 302       inlinable = false;
 303     }
 304   else if (avail <= AVAIL_OVERWRITABLE)
 305     {
 306       e->inline_failed = CIF_OVERWRITABLE;
 307       inlinable = false;
 308     }
 309   else if (e->call_stmt_cannot_inline_p)
 310     {
 311       if (e->inline_failed != CIF_FUNCTION_NOT_OPTIMIZED)
 312         e->inline_failed = CIF_MISMATCHED_ARGUMENTS;
 313       inlinable = false;
 314     }
 315   /* Don't inline if the functions have different EH personalities.  */
 316   else if (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 317            && DECL_FUNCTION_PERSONALITY (callee->decl)
 318            && (DECL_FUNCTION_PERSONALITY (e->caller->decl)
 319                != DECL_FUNCTION_PERSONALITY (callee->decl)))
 320     {
 321       e->inline_failed = CIF_EH_PERSONALITY;
 322       inlinable = false;
 323     }
 324   /* TM pure functions should not be inlined into non-TM_pure
 325      functions.  */
 326   else if (is_tm_pure (callee->decl)
 327            && !is_tm_pure (e->caller->decl))
 328     {
 329       e->inline_failed = CIF_UNSPECIFIED;
 330       inlinable = false;
 331     }
 332   /* Don't inline if the callee can throw non-call exceptions but the
 333      caller cannot.
 334      FIXME: this is obviously wrong for LTO where STRUCT_FUNCTION is missing.
 335      Move the flag into cgraph node or mirror it in the inline summary.  */
 336   else if (callee_cfun && callee_cfun->can_throw_non_call_exceptions
 337            && !(caller_cfun && caller_cfun->can_throw_non_call_exceptions))
 338     {
 339       e->inline_failed = CIF_NON_CALL_EXCEPTIONS;
 340       inlinable = false;
 341     }
 342   /* Check compatibility of target optimization options.  */
 343   else if (!targetm.target_option.can_inline_p (e->caller->decl,
 344                                                 callee->decl))
 345     {
 346       e->inline_failed = CIF_TARGET_OPTION_MISMATCH;
 347       inlinable = false;
 348     }
 349   /* Don't inline a function with mismatched sanitization attributes. */
 350   else if (!sanitize_attrs_match_for_inline_p (e->caller->decl, callee->decl))
 351     {
 352       e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
 353       inlinable = false;
 354     }
 355   /* Check if caller growth allows the inlining.  */
 356   else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
 357            && !disregard_limits
 358            && !lookup_attribute ("flatten",
 359                                  DECL_ATTRIBUTES
 360                                    (e->caller->global.inlined_to
 361                                     ? e->caller->global.inlined_to->decl
 362                                     : e->caller->decl))
 363            && !caller_growth_limits (e))
 364     inlinable = false;
 365   /* Don't inline a function with a higher optimization level than the
 366      caller.  FIXME: this is really just tip of iceberg of handling
 367      optimization attribute.  */
 368   else if (caller_tree != callee_tree)
 369     {
 370       struct cl_optimization *caller_opt
 371         = TREE_OPTIMIZATION ((caller_tree)
 372                              ? caller_tree
 373                              : optimization_default_node);
 374
 375       struct cl_optimization *callee_opt
 376         = TREE_OPTIMIZATION ((callee_tree)
 377                              ? callee_tree
 378                              : optimization_default_node);
 379
 380       if (((caller_opt->x_optimize > callee_opt->x_optimize)
 381            || (caller_opt->x_optimize_size != callee_opt->x_optimize_size))
 382           /* gcc.dg/pr43564.c.  Look at forced inline even in -O0.  */
 383           && !DECL_DISREGARD_INLINE_LIMITS (e->callee->decl))
 384         {
 385           e->inline_failed = CIF_OPTIMIZATION_MISMATCH;
 386           inlinable = false;
 387         }
 388     }
 389
 390   if (!inlinable && report)
 391     report_inline_failed_reason (e);
 392   return inlinable;
 393 }
 394
 395
 396 /* Return true if the edge E is inlinable during early inlining.  */
 397
 398 static bool
 399 can_early_inline_edge_p (struct cgraph_edge *e)
 400 {
 401   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee,
 402                                                               NULL);
 403   /* Early inliner might get called at WPA stage when IPA pass adds new
 404      function.  In this case we can not really do any of early inlining
 405      because function bodies are missing.  */
 406   if (!gimple_has_body_p (callee->decl))
 407     {
 408       e->inline_failed = CIF_BODY_NOT_AVAILABLE;
 409       return false;
 410     }
 411   /* In early inliner some of callees may not be in SSA form yet
 412      (i.e. the callgraph is cyclic and we did not process
 413      the callee by early inliner, yet).  We don't have CIF code for this
 414      case; later we will re-do the decision in the real inliner.  */
 415   if (!gimple_in_ssa_p (DECL_STRUCT_FUNCTION (e->caller->decl))
 416       || !gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
 417     {
 418       if (dump_file)
 419         fprintf (dump_file, "  edge not inlinable: not in SSA form\n");
 420       return false;
 421     }
 422   if (!can_inline_edge_p (e, true))
 423     return false;
 424   return true;
 425 }
 426
 427
 428 /* Return number of calls in N.  Ignore cheap builtins.  */
 429
 430 static int
 431 num_calls (struct cgraph_node *n)
 432 {
 433   struct cgraph_edge *e;
 434   int num = 0;
 435
 436   for (e = n->callees; e; e = e->next_callee)
 437     if (!is_inexpensive_builtin (e->callee->decl))
 438       num++;
 439   return num;
 440 }
 441
 442
 443 /* Return true if we are interested in inlining small function.  */
 444
 445 static bool
 446 want_early_inline_function_p (struct cgraph_edge *e)
 447 {
 448   bool want_inline = true;
 449   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 450
 451   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 452     ;
 453   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 454            && !flag_inline_small_functions)
 455     {
 456       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 457       report_inline_failed_reason (e);
 458       want_inline = false;
 459     }
 460   else
 461     {
 462       int growth = estimate_edge_growth (e);
 463       int n;
 464
 465       if (growth <= 0)
 466         ;
 467       else if (!cgraph_maybe_hot_edge_p (e)
 468                && growth > 0)
 469         {
 470           if (dump_file)
 471             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 472                      "call is cold and code would grow by %i\n",
 473                      xstrdup (e->caller->name ()),
 474                      e->caller->order,
 475                      xstrdup (callee->name ()), callee->order,
 476                      growth);
 477           want_inline = false;
 478         }
 479       else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 480         {
 481           if (dump_file)
 482             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 483                      "growth %i exceeds --param early-inlining-insns\n",
 484                      xstrdup (e->caller->name ()),
 485                      e->caller->order,
 486                      xstrdup (callee->name ()), callee->order,
 487                      growth);
 488           want_inline = false;
 489         }
 490       else if ((n = num_calls (callee)) != 0
 491                && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS))
 492         {
 493           if (dump_file)
 494             fprintf (dump_file, "  will not early inline: %s/%i->%s/%i, "
 495                      "growth %i exceeds --param early-inlining-insns "
 496                      "divided by number of calls\n",
 497                      xstrdup (e->caller->name ()),
 498                      e->caller->order,
 499                      xstrdup (callee->name ()), callee->order,
 500                      growth);
 501           want_inline = false;
 502         }
 503     }
 504   return want_inline;
 505 }
 506
 507 /* Compute time of the edge->caller + edge->callee execution when inlining
 508    does not happen.  */
 509
 510 inline gcov_type
 511 compute_uninlined_call_time (struct inline_summary *callee_info,
 512                              struct cgraph_edge *edge)
 513 {
 514   gcov_type uninlined_call_time =
 515     RDIV ((gcov_type)callee_info->time * MAX (edge->frequency, 1),
 516           CGRAPH_FREQ_BASE);
 517   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 518                                           ? edge->caller->global.inlined_to
 519                                           : edge->caller)->time;
 520   return uninlined_call_time + caller_time;
 521 }
 522
 523 /* Same as compute_uinlined_call_time but compute time when inlining
 524    does happen.  */
 525
 526 inline gcov_type
 527 compute_inlined_call_time (struct cgraph_edge *edge,
 528                            int edge_time)
 529 {
 530   gcov_type caller_time = inline_summary (edge->caller->global.inlined_to
 531                                           ? edge->caller->global.inlined_to
 532                                           : edge->caller)->time;
 533   gcov_type time = (caller_time
 534                     + RDIV (((gcov_type) edge_time
 535                              - inline_edge_summary (edge)->call_stmt_time)
 536                     * MAX (edge->frequency, 1), CGRAPH_FREQ_BASE));
 537   /* Possible one roundoff error, but watch for overflows.  */
 538   gcc_checking_assert (time >= INT_MIN / 2);
 539   if (time < 0)
 540     time = 0;
 541   return time;
 542 }
 543
 544 /* Return true if the speedup for inlining E is bigger than
 545    PARAM_MAX_INLINE_MIN_SPEEDUP.  */
 546
 547 static bool
 548 big_speedup_p (struct cgraph_edge *e)
 549 {
 550   gcov_type time = compute_uninlined_call_time (inline_summary (e->callee),
 551                                                 e);
 552   gcov_type inlined_time = compute_inlined_call_time (e,
 553                                                       estimate_edge_time (e));
 554   if (time - inlined_time
 555       > RDIV (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP), 100))
 556     return true;
 557   return false;
 558 }
 559
 560 /* Return true if we are interested in inlining small function.
 561    When REPORT is true, report reason to dump file.  */
 562
 563 static bool
 564 want_inline_small_function_p (struct cgraph_edge *e, bool report)
 565 {
 566   bool want_inline = true;
 567   struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
 568
 569   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 570     ;
 571   else if (!DECL_DECLARED_INLINE_P (callee->decl)
 572            && !flag_inline_small_functions)
 573     {
 574       e->inline_failed = CIF_FUNCTION_NOT_INLINE_CANDIDATE;
 575       want_inline = false;
 576     }
 577   /* Do fast and conservative check if the function can be good
 578      inline cnadidate.  At themoment we allow inline hints to
 579      promote non-inline function to inline and we increase
 580      MAX_INLINE_INSNS_SINGLE 16fold for inline functions.  */
 581   else if ((!DECL_DECLARED_INLINE_P (callee->decl)
 582            && (!e->count || !cgraph_maybe_hot_edge_p (e)))
 583            && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
 584               > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO))
 585     {
 586       e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 587       want_inline = false;
 588     }
 589   else if ((DECL_DECLARED_INLINE_P (callee->decl) || e->count)
 590            && inline_summary (callee)->min_size - inline_edge_summary (e)->call_stmt_size
 591               > 16 * MAX_INLINE_INSNS_SINGLE)
 592     {
 593       e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl)
 594                           ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT
 595                           : CIF_MAX_INLINE_INSNS_AUTO_LIMIT);
 596       want_inline = false;
 597     }
 598   else
 599     {
 600       int growth = estimate_edge_growth (e);
 601       inline_hints hints = estimate_edge_hints (e);
 602       bool big_speedup = big_speedup_p (e);
 603
 604       if (growth <= 0)
 605         ;
 606       /* Apply MAX_INLINE_INSNS_SINGLE limit.  Do not do so when
 607          hints suggests that inlining given function is very profitable.  */
 608       else if (DECL_DECLARED_INLINE_P (callee->decl)
 609                && growth >= MAX_INLINE_INSNS_SINGLE
 610                && ((!big_speedup
 611                     && !(hints & (INLINE_HINT_indirect_call
 612                                   | INLINE_HINT_known_hot
 613                                   | INLINE_HINT_loop_iterations
 614                                   | INLINE_HINT_array_index
 615                                   | INLINE_HINT_loop_stride)))
 616                    || growth >= MAX_INLINE_INSNS_SINGLE * 16))
 617         {
 618           e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT;
 619           want_inline = false;
 620         }
 621       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 622                && !flag_inline_functions)
 623         {
 624           /* growth_likely_positive is expensive, always test it last.  */
 625           if (growth >= MAX_INLINE_INSNS_SINGLE
 626               || growth_likely_positive (callee, growth))
 627             {
 628               e->inline_failed = CIF_NOT_DECLARED_INLINED;
 629               want_inline = false;
 630             }
 631         }
 632       /* Apply MAX_INLINE_INSNS_AUTO limit for functions not declared inline
 633          Upgrade it to MAX_INLINE_INSNS_SINGLE when hints suggests that
 634          inlining given function is very profitable.  */
 635       else if (!DECL_DECLARED_INLINE_P (callee->decl)
 636                && !big_speedup
 637                && !(hints & INLINE_HINT_known_hot)
 638                && growth >= ((hints & (INLINE_HINT_indirect_call
 639                                        | INLINE_HINT_loop_iterations
 640                                        | INLINE_HINT_array_index
 641                                        | INLINE_HINT_loop_stride))
 642                              ? MAX (MAX_INLINE_INSNS_AUTO,
 643                                     MAX_INLINE_INSNS_SINGLE)
 644                              : MAX_INLINE_INSNS_AUTO))
 645         {
 646           /* growth_likely_positive is expensive, always test it last.  */
 647           if (growth >= MAX_INLINE_INSNS_SINGLE
 648               || growth_likely_positive (callee, growth))
 649             {
 650               e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT;
 651               want_inline = false;
 652             }
 653         }
 654       /* If call is cold, do not inline when function body would grow. */
 655       else if (!cgraph_maybe_hot_edge_p (e)
 656                && (growth >= MAX_INLINE_INSNS_SINGLE
 657                    || growth_likely_positive (callee, growth)))
 658         {
 659           e->inline_failed = CIF_UNLIKELY_CALL;
 660           want_inline = false;
 661         }
 662     }
 663   if (!want_inline && report)
 664     report_inline_failed_reason (e);
 665   return want_inline;
 666 }
 667
 668 /* EDGE is self recursive edge.
 669    We hand two cases - when function A is inlining into itself
 670    or when function A is being inlined into another inliner copy of function
 671    A within function B.
 672
 673    In first case OUTER_NODE points to the toplevel copy of A, while
 674    in the second case OUTER_NODE points to the outermost copy of A in B.
 675
 676    In both cases we want to be extra selective since
 677    inlining the call will just introduce new recursive calls to appear.  */
 678
 679 static bool
 680 want_inline_self_recursive_call_p (struct cgraph_edge *edge,
 681                                    struct cgraph_node *outer_node,
 682                                    bool peeling,
 683                                    int depth)
 684 {
 685   char const *reason = NULL;
 686   bool want_inline = true;
 687   int caller_freq = CGRAPH_FREQ_BASE;
 688   int max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH_AUTO);
 689
 690   if (DECL_DECLARED_INLINE_P (edge->caller->decl))
 691     max_depth = PARAM_VALUE (PARAM_MAX_INLINE_RECURSIVE_DEPTH);
 692
 693   if (!cgraph_maybe_hot_edge_p (edge))
 694     {
 695       reason = "recursive call is cold";
 696       want_inline = false;
 697     }
 698   else if (max_count && !outer_node->count)
 699     {
 700       reason = "not executed in profile";
 701       want_inline = false;
 702     }
 703   else if (depth > max_depth)
 704     {
 705       reason = "--param max-inline-recursive-depth exceeded.";
 706       want_inline = false;
 707     }
 708
 709   if (outer_node->global.inlined_to)
 710     caller_freq = outer_node->callers->frequency;
 711
 712   if (!caller_freq)
 713     {
 714       reason = "function is inlined and unlikely";
 715       want_inline = false;
 716     }
 717
 718   if (!want_inline)
 719     ;
 720   /* Inlining of self recursive function into copy of itself within other function
 721      is transformation similar to loop peeling.
 722
 723      Peeling is profitable if we can inline enough copies to make probability
 724      of actual call to the self recursive function very small.  Be sure that
 725      the probability of recursion is small.
 726
 727      We ensure that the frequency of recursing is at most 1 - (1/max_depth).
 728      This way the expected number of recision is at most max_depth.  */
 729   else if (peeling)
 730     {
 731       int max_prob = CGRAPH_FREQ_BASE - ((CGRAPH_FREQ_BASE + max_depth - 1)
 732                                          / max_depth);
 733       int i;
 734       for (i = 1; i < depth; i++)
 735         max_prob = max_prob * max_prob / CGRAPH_FREQ_BASE;
 736       if (max_count
 737           && (edge->count * CGRAPH_FREQ_BASE / outer_node->count
 738               >= max_prob))
 739         {
 740           reason = "profile of recursive call is too large";
 741           want_inline = false;
 742         }
 743       if (!max_count
 744           && (edge->frequency * CGRAPH_FREQ_BASE / caller_freq
 745               >= max_prob))
 746         {
 747           reason = "frequency of recursive call is too large";
 748           want_inline = false;
 749         }
 750     }
 751   /* Recursive inlining, i.e. equivalent of unrolling, is profitable if recursion
 752      depth is large.  We reduce function call overhead and increase chances that
 753      things fit in hardware return predictor.
 754
 755      Recursive inlining might however increase cost of stack frame setup
 756      actually slowing down functions whose recursion tree is wide rather than
 757      deep.
 758
 759      Deciding reliably on when to do recursive inlining without profile feedback
 760      is tricky.  For now we disable recursive inlining when probability of self
 761      recursion is low.
 762
 763      Recursive inlining of self recursive call within loop also results in large loop
 764      depths that generally optimize badly.  We may want to throttle down inlining
 765      in those cases.  In particular this seems to happen in one of libstdc++ rb tree
 766      methods.  */
 767   else
 768     {
 769       if (max_count
 770           && (edge->count * 100 / outer_node->count
 771               <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 772         {
 773           reason = "profile of recursive call is too small";
 774           want_inline = false;
 775         }
 776       else if (!max_count
 777                && (edge->frequency * 100 / caller_freq
 778                    <= PARAM_VALUE (PARAM_MIN_INLINE_RECURSIVE_PROBABILITY)))
 779         {
 780           reason = "frequency of recursive call is too small";
 781           want_inline = false;
 782         }
 783     }
 784   if (!want_inline && dump_file)
 785     fprintf (dump_file, "   not inlining recursively: %s\n", reason);
 786   return want_inline;
 787 }
 788
 789 /* Return true when NODE has uninlinable caller;
 790    set HAS_HOT_CALL if it has hot call.
 791    Worker for cgraph_for_node_and_aliases.  */
 792
 793 static bool
 794 check_callers (struct cgraph_node *node, void *has_hot_call)
 795 {
 796   struct cgraph_edge *e;
 797    for (e = node->callers; e; e = e->next_caller)
 798      {
 799        if (!can_inline_edge_p (e, true))
 800          return true;
 801        if (!(*(bool *)has_hot_call) && cgraph_maybe_hot_edge_p (e))
 802          *(bool *)has_hot_call = true;
 803      }
 804   return false;
 805 }
 806
 807 /* If NODE has a caller, return true.  */
 808
 809 static bool
 810 has_caller_p (struct cgraph_node *node, void *data ATTRIBUTE_UNUSED)
 811 {
 812   if (node->callers)
 813     return true;
 814   return false;
 815 }
 816
 817 /* Decide if inlining NODE would reduce unit size by eliminating
 818    the offline copy of function.
 819    When COLD is true the cold calls are considered, too.  */
 820
 821 static bool
 822 want_inline_function_to_all_callers_p (struct cgraph_node *node, bool cold)
 823 {
 824    struct cgraph_node *function = cgraph_function_or_thunk_node (node, NULL);
 825    bool has_hot_call = false;
 826
 827    /* Does it have callers?  */
 828    if (!cgraph_for_node_and_aliases (node, has_caller_p, NULL, true))
 829      return false;
 830    /* Already inlined?  */
 831    if (function->global.inlined_to)
 832      return false;
 833    if (cgraph_function_or_thunk_node (node, NULL) != node)
 834      return false;
 835    /* Inlining into all callers would increase size?  */
 836    if (estimate_growth (node) > 0)
 837      return false;
 838    /* All inlines must be possible.  */
 839    if (cgraph_for_node_and_aliases (node, check_callers, &has_hot_call, true))
 840      return false;
 841    if (!cold && !has_hot_call)
 842      return false;
 843    return true;
 844 }
 845
 846 #define RELATIVE_TIME_BENEFIT_RANGE (INT_MAX / 64)
 847
 848 /* Return relative time improvement for inlining EDGE in range
 849    1...RELATIVE_TIME_BENEFIT_RANGE  */
 850
 851 static inline int
 852 relative_time_benefit (struct inline_summary *callee_info,
 853                        struct cgraph_edge *edge,
 854                        int edge_time)
 855 {
 856   gcov_type relbenefit;
 857   gcov_type uninlined_call_time = compute_uninlined_call_time (callee_info, edge);
 858   gcov_type inlined_call_time = compute_inlined_call_time (edge, edge_time);
 859
 860   /* Inlining into extern inline function is not a win.  */
 861   if (DECL_EXTERNAL (edge->caller->global.inlined_to
 862                      ? edge->caller->global.inlined_to->decl
 863                      : edge->caller->decl))
 864     return 1;
 865
 866   /* Watch overflows.  */
 867   gcc_checking_assert (uninlined_call_time >= 0);
 868   gcc_checking_assert (inlined_call_time >= 0);
 869   gcc_checking_assert (uninlined_call_time >= inlined_call_time);
 870
 871   /* Compute relative time benefit, i.e. how much the call becomes faster.
 872      ??? perhaps computing how much the caller+calle together become faster
 873      would lead to more realistic results.  */
 874   if (!uninlined_call_time)
 875     uninlined_call_time = 1;
 876   relbenefit =
 877     RDIV (((gcov_type)uninlined_call_time - inlined_call_time) * RELATIVE_TIME_BENEFIT_RANGE,
 878           uninlined_call_time);
 879   relbenefit = MIN (relbenefit, RELATIVE_TIME_BENEFIT_RANGE);
 880   gcc_checking_assert (relbenefit >= 0);
 881   relbenefit = MAX (relbenefit, 1);
 882   return relbenefit;
 883 }
 884
 885
 886 /* A cost model driving the inlining heuristics in a way so the edges with
 887    smallest badness are inlined first.  After each inlining is performed
 888    the costs of all caller edges of nodes affected are recomputed so the
 889    metrics may accurately depend on values such as number of inlinable callers
 890    of the function or function body size.  */
 891
 892 static int
 893 edge_badness (struct cgraph_edge *edge, bool dump)
 894 {
 895   gcov_type badness;
 896   int growth, edge_time;
 897   struct cgraph_node *callee = cgraph_function_or_thunk_node (edge->callee,
 898                                                               NULL);
 899   struct inline_summary *callee_info = inline_summary (callee);
 900   inline_hints hints;
 901
 902   if (DECL_DISREGARD_INLINE_LIMITS (callee->decl))
 903     return INT_MIN;
 904
 905   growth = estimate_edge_growth (edge);
 906   edge_time = estimate_edge_time (edge);
 907   hints = estimate_edge_hints (edge);
 908   gcc_checking_assert (edge_time >= 0);
 909   gcc_checking_assert (edge_time <= callee_info->time);
 910   gcc_checking_assert (growth <= callee_info->size);
 911
 912   if (dump)
 913     {
 914       fprintf (dump_file, "    Badness calculation for %s/%i -> %s/%i\n",
 915                xstrdup (edge->caller->name ()),
 916                edge->caller->order,
 917                xstrdup (callee->name ()),
 918                edge->callee->order);
 919       fprintf (dump_file, "      size growth %i, time %i ",
 920                growth,
 921                edge_time);
 922       dump_inline_hints (dump_file, hints);
 923       if (big_speedup_p (edge))
 924         fprintf (dump_file, " big_speedup");
 925       fprintf (dump_file, "\n");
 926     }
 927
 928   /* Always prefer inlining saving code size.  */
 929   if (growth <= 0)
 930     {
 931       badness = INT_MIN / 2 + growth;
 932       if (dump)
 933         fprintf (dump_file, "      %i: Growth %i <= 0\n", (int) badness,
 934                  growth);
 935     }
 936
 937   /* When profiling is available, compute badness as:
 938
 939                 relative_edge_count * relative_time_benefit
 940      goodness = -------------------------------------------
 941                 growth_f_caller
 942      badness = -goodness
 943
 944     The fraction is upside down, because on edge counts and time beneits
 945     the bounds are known. Edge growth is essentially unlimited.  */
 946
 947   else if (max_count)
 948     {
 949       sreal tmp, relbenefit_real, growth_real;
 950       int relbenefit = relative_time_benefit (callee_info, edge, edge_time);
 951       /* Capping edge->count to max_count. edge->count can be larger than
 952          max_count if an inline adds new edges which increase max_count
 953          after max_count is computed.  */
 954       gcov_type edge_count = edge->count > max_count ? max_count : edge->count;
 955
 956       sreal_init (&relbenefit_real, relbenefit, 0);
 957       sreal_init (&growth_real, growth, 0);
 958
 959       /* relative_edge_count.  */
 960       sreal_init (&tmp, edge_count, 0);
 961       sreal_div (&tmp, &tmp, &max_count_real);
 962
 963       /* relative_time_benefit.  */
 964       sreal_mul (&tmp, &tmp, &relbenefit_real);
 965       sreal_div (&tmp, &tmp, &max_relbenefit_real);
 966
 967       /* growth_f_caller.  */
 968       sreal_mul (&tmp, &tmp, &half_int_min_real);
 969       sreal_div (&tmp, &tmp, &growth_real);
 970
 971       badness = -1 * sreal_to_int (&tmp);
 972
 973       if (dump)
 974         {
 975           fprintf (dump_file,
 976                    "      %i (relative %f): profile info. Relative count %f%s"
 977                    " * Relative benefit %f\n",
 978                    (int) badness, (double) badness / INT_MIN,
 979                    (double) edge_count / max_count,
 980                    edge->count > max_count ? " (capped to max_count)" : "",
 981                    relbenefit * 100.0 / RELATIVE_TIME_BENEFIT_RANGE);
 982         }
 983     }
 984
 985   /* When function local profile is available. Compute badness as:
 986
 987                  relative_time_benefit
 988      goodness =  ---------------------------------
 989                  growth_of_caller * overall_growth
 990
 991      badness = - goodness
 992
 993      compensated by the inline hints.
 994   */
 995   else if (flag_guess_branch_prob)
 996     {
 997       badness = (relative_time_benefit (callee_info, edge, edge_time)
 998                  * (INT_MIN / 16 / RELATIVE_TIME_BENEFIT_RANGE));
 999       badness /= (MIN (65536/2, growth) * MIN (65536/2, MAX (1, callee_info->growth)));
1000       gcc_checking_assert (badness <=0 && badness >= INT_MIN / 16);
1001       if ((hints & (INLINE_HINT_indirect_call
1002                     | INLINE_HINT_loop_iterations
1003                     | INLINE_HINT_array_index
1004                     | INLINE_HINT_loop_stride))
1005           || callee_info->growth <= 0)
1006         badness *= 8;
1007       if (hints & (INLINE_HINT_same_scc))
1008         badness /= 16;
1009       else if (hints & (INLINE_HINT_in_scc))
1010         badness /= 8;
1011       else if (hints & (INLINE_HINT_cross_module))
1012         badness /= 2;
1013       gcc_checking_assert (badness <= 0 && badness >= INT_MIN / 2);
1014       if ((hints & INLINE_HINT_declared_inline) && badness >= INT_MIN / 32)
1015         badness *= 16;
1016       if (dump)
1017         {
1018           fprintf (dump_file,
1019                    "      %i: guessed profile. frequency %f,"
1020                    " benefit %f%%, time w/o inlining %i, time w inlining %i"
1021                    " overall growth %i (current) %i (original)\n",
1022                    (int) badness, (double)edge->frequency / CGRAPH_FREQ_BASE,
1023                    relative_time_benefit (callee_info, edge, edge_time) * 100.0
1024                    / RELATIVE_TIME_BENEFIT_RANGE,
1025                    (int)compute_uninlined_call_time (callee_info, edge),
1026                    (int)compute_inlined_call_time (edge, edge_time),
1027                    estimate_growth (callee),
1028                    callee_info->growth);
1029         }
1030     }
1031   /* When function local profile is not available or it does not give
1032      useful information (ie frequency is zero), base the cost on
1033      loop nest and overall size growth, so we optimize for overall number
1034      of functions fully inlined in program.  */
1035   else
1036     {
1037       int nest = MIN (inline_edge_summary (edge)->loop_depth, 8);
1038       badness = growth * 256;
1039
1040       /* Decrease badness if call is nested.  */
1041       if (badness > 0)
1042         badness >>= nest;
1043       else
1044         {
1045           badness <<= nest;
1046         }
1047       if (dump)
1048         fprintf (dump_file, "      %i: no profile. nest %i\n", (int) badness,
1049                  nest);
1050     }
1051
1052   /* Ensure that we did not overflow in all the fixed point math above.  */
1053   gcc_assert (badness >= INT_MIN);
1054   gcc_assert (badness <= INT_MAX - 1);
1055   /* Make recursive inlining happen always after other inlining is done.  */
1056   if (cgraph_edge_recursive_p (edge))
1057     return badness + 1;
1058   else
1059     return badness;
1060 }
1061
1062 /* Recompute badness of EDGE and update its key in HEAP if needed.  */
1063 static inline void
1064 update_edge_key (fibheap_t heap, struct cgraph_edge *edge)
1065 {
1066   int badness = edge_badness (edge, false);
1067   if (edge->aux)
1068     {
1069       fibnode_t n = (fibnode_t) edge->aux;
1070       gcc_checking_assert (n->data == edge);
1071
1072       /* fibheap_replace_key only decrease the keys.
1073          When we increase the key we do not update heap
1074          and instead re-insert the element once it becomes
1075          a minimum of heap.  */
1076       if (badness < n->key)
1077         {
1078           if (dump_file && (dump_flags & TDF_DETAILS))
1079             {
1080               fprintf (dump_file,
1081                        "  decreasing badness %s/%i -> %s/%i, %i to %i\n",
1082                        xstrdup (edge->caller->name ()),
1083                        edge->caller->order,
1084                        xstrdup (edge->callee->name ()),
1085                        edge->callee->order,
1086                        (int)n->key,
1087                        badness);
1088             }
1089           fibheap_replace_key (heap, n, badness);
1090           gcc_checking_assert (n->key == badness);
1091         }
1092     }
1093   else
1094     {
1095        if (dump_file && (dump_flags & TDF_DETAILS))
1096          {
1097            fprintf (dump_file,
1098                     "  enqueuing call %s/%i -> %s/%i, badness %i\n",
1099                     xstrdup (edge->caller->name ()),
1100                     edge->caller->order,
1101                     xstrdup (edge->callee->name ()),
1102                     edge->callee->order,
1103                     badness);
1104          }
1105       edge->aux = fibheap_insert (heap, badness, edge);
1106     }
1107 }
1108
1109
1110 /* NODE was inlined.
1111    All caller edges needs to be resetted because
1112    size estimates change. Similarly callees needs reset
1113    because better context may be known.  */
1114
1115 static void
1116 reset_edge_caches (struct cgraph_node *node)
1117 {
1118   struct cgraph_edge *edge;
1119   struct cgraph_edge *e = node->callees;
1120   struct cgraph_node *where = node;
1121   int i;
1122   struct ipa_ref *ref;
1123
1124   if (where->global.inlined_to)
1125     where = where->global.inlined_to;
1126
1127   /* WHERE body size has changed, the cached growth is invalid.  */
1128   reset_node_growth_cache (where);
1129
1130   for (edge = where->callers; edge; edge = edge->next_caller)
1131     if (edge->inline_failed)
1132       reset_edge_growth_cache (edge);
1133   for (i = 0; ipa_ref_list_referring_iterate (&where->ref_list,
1134                                               i, ref); i++)
1135     if (ref->use == IPA_REF_ALIAS)
1136       reset_edge_caches (ipa_ref_referring_node (ref));
1137
1138   if (!e)
1139     return;
1140
1141   while (true)
1142     if (!e->inline_failed && e->callee->callees)
1143       e = e->callee->callees;
1144     else
1145       {
1146         if (e->inline_failed)
1147           reset_edge_growth_cache (e);
1148         if (e->next_callee)
1149           e = e->next_callee;
1150         else
1151           {
1152             do
1153               {
1154                 if (e->caller == node)
1155                   return;
1156                 e = e->caller->callers;
1157               }
1158             while (!e->next_callee);
1159             e = e->next_callee;
1160           }
1161       }
1162 }
1163
1164 /* Recompute HEAP nodes for each of caller of NODE.
1165    UPDATED_NODES track nodes we already visited, to avoid redundant work.
1166    When CHECK_INLINABLITY_FOR is set, re-check for specified edge that
1167    it is inlinable. Otherwise check all edges.  */
1168
1169 static void
1170 update_caller_keys (fibheap_t heap, struct cgraph_node *node,
1171                     bitmap updated_nodes,
1172                     struct cgraph_edge *check_inlinablity_for)
1173 {
1174   struct cgraph_edge *edge;
1175   int i;
1176   struct ipa_ref *ref;
1177
1178   if ((!node->alias && !inline_summary (node)->inlinable)
1179       || node->global.inlined_to)
1180     return;
1181   if (!bitmap_set_bit (updated_nodes, node->uid))
1182     return;
1183
1184   for (i = 0; ipa_ref_list_referring_iterate (&node->ref_list,
1185                                               i, ref); i++)
1186     if (ref->use == IPA_REF_ALIAS)
1187       {
1188         struct cgraph_node *alias = ipa_ref_referring_node (ref);
1189         update_caller_keys (heap, alias, updated_nodes, check_inlinablity_for);
1190       }
1191
1192   for (edge = node->callers; edge; edge = edge->next_caller)
1193     if (edge->inline_failed)
1194       {
1195         if (!check_inlinablity_for
1196             || check_inlinablity_for == edge)
1197           {
1198             if (can_inline_edge_p (edge, false)
1199                 && want_inline_small_function_p (edge, false))
1200               update_edge_key (heap, edge);
1201             else if (edge->aux)
1202               {
1203                 report_inline_failed_reason (edge);
1204                 fibheap_delete_node (heap, (fibnode_t) edge->aux);
1205                 edge->aux = NULL;
1206               }
1207           }
1208         else if (edge->aux)
1209           update_edge_key (heap, edge);
1210       }
1211 }
1212
1213 /* Recompute HEAP nodes for each uninlined call in NODE.
1214    This is used when we know that edge badnesses are going only to increase
1215    (we introduced new call site) and thus all we need is to insert newly
1216    created edges into heap.  */
1217
1218 static void
1219 update_callee_keys (fibheap_t heap, struct cgraph_node *node,
1220                     bitmap updated_nodes)
1221 {
1222   struct cgraph_edge *e = node->callees;
1223
1224   if (!e)
1225     return;
1226   while (true)
1227     if (!e->inline_failed && e->callee->callees)
1228       e = e->callee->callees;
1229     else
1230       {
1231         enum availability avail;
1232         struct cgraph_node *callee;
1233         /* We do not reset callee growth cache here.  Since we added a new call,
1234            growth chould have just increased and consequentely badness metric
1235            don't need updating.  */
1236         if (e->inline_failed
1237             && (callee = cgraph_function_or_thunk_node (e->callee, &avail))
1238             && inline_summary (callee)->inlinable
1239             && avail >= AVAIL_AVAILABLE
1240             && !bitmap_bit_p (updated_nodes, callee->uid))
1241           {
1242             if (can_inline_edge_p (e, false)
1243                 && want_inline_small_function_p (e, false))
1244               update_edge_key (heap, e);
1245             else if (e->aux)
1246               {
1247                 report_inline_failed_reason (e);
1248                 fibheap_delete_node (heap, (fibnode_t) e->aux);
1249                 e->aux = NULL;
1250               }
1251           }
1252         if (e->next_callee)
1253           e = e->next_callee;
1254         else
1255           {
1256             do
1257               {
1258                 if (e->caller == node)
1259                   return;
1260                 e = e->caller->callers;
1261               }
1262             while (!e->next_callee);
1263             e = e->next_callee;
1264           }
1265       }
1266 }
1267
1268 /* Enqueue all recursive calls from NODE into priority queue depending on
1269    how likely we want to recursively inline the call.  */
1270
1271 static void
1272 lookup_recursive_calls (struct cgraph_node *node, struct cgraph_node *where,
1273                         fibheap_t heap)
1274 {
1275   struct cgraph_edge *e;
1276   enum availability avail;
1277
1278   for (e = where->callees; e; e = e->next_callee)
1279     if (e->callee == node
1280         || (cgraph_function_or_thunk_node (e->callee, &avail) == node
1281             && avail > AVAIL_OVERWRITABLE))
1282       {
1283         /* When profile feedback is available, prioritize by expected number
1284            of calls.  */
1285         fibheap_insert (heap,
1286                         !max_count ? -e->frequency
1287                         : -(e->count / ((max_count + (1<<24) - 1) / (1<<24))),
1288                         e);
1289       }
1290   for (e = where->callees; e; e = e->next_callee)
1291     if (!e->inline_failed)
1292       lookup_recursive_calls (node, e->callee, heap);
1293 }
1294
1295 /* Decide on recursive inlining: in the case function has recursive calls,
1296    inline until body size reaches given argument.  If any new indirect edges
1297    are discovered in the process, add them to *NEW_EDGES, unless NEW_EDGES
1298    is NULL.  */
1299
1300 static bool
1301 recursive_inlining (struct cgraph_edge *edge,
1302                     vec<cgraph_edge_p> *new_edges)
1303 {
1304   int limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE_AUTO);
1305   fibheap_t heap;
1306   struct cgraph_node *node;
1307   struct cgraph_edge *e;
1308   struct cgraph_node *master_clone = NULL, *next;
1309   int depth = 0;
1310   int n = 0;
1311
1312   node = edge->caller;
1313   if (node->global.inlined_to)
1314     node = node->global.inlined_to;
1315
1316   if (DECL_DECLARED_INLINE_P (node->decl))
1317     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
1318
1319   /* Make sure that function is small enough to be considered for inlining.  */
1320   if (estimate_size_after_inlining (node, edge)  >= limit)
1321     return false;
1322   heap = fibheap_new ();
1323   lookup_recursive_calls (node, node, heap);
1324   if (fibheap_empty (heap))
1325     {
1326       fibheap_delete (heap);
1327       return false;
1328     }
1329
1330   if (dump_file)
1331     fprintf (dump_file,
1332              "  Performing recursive inlining on %s\n",
1333              node->name ());
1334
1335   /* Do the inlining and update list of recursive call during process.  */
1336   while (!fibheap_empty (heap))
1337     {
1338       struct cgraph_edge *curr
1339         = (struct cgraph_edge *) fibheap_extract_min (heap);
1340       struct cgraph_node *cnode, *dest = curr->callee;
1341
1342       if (!can_inline_edge_p (curr, true))
1343         continue;
1344
1345       /* MASTER_CLONE is produced in the case we already started modified
1346          the function. Be sure to redirect edge to the original body before
1347          estimating growths otherwise we will be seeing growths after inlining
1348          the already modified body.  */
1349       if (master_clone)
1350         {
1351           cgraph_redirect_edge_callee (curr, master_clone);
1352           reset_edge_growth_cache (curr);
1353         }
1354
1355       if (estimate_size_after_inlining (node, curr) > limit)
1356         {
1357           cgraph_redirect_edge_callee (curr, dest);
1358           reset_edge_growth_cache (curr);
1359           break;
1360         }
1361
1362       depth = 1;
1363       for (cnode = curr->caller;
1364            cnode->global.inlined_to; cnode = cnode->callers->caller)
1365         if (node->decl
1366             == cgraph_function_or_thunk_node (curr->callee, NULL)->decl)
1367           depth++;
1368
1369       if (!want_inline_self_recursive_call_p (curr, node, false, depth))
1370         {
1371           cgraph_redirect_edge_callee (curr, dest);
1372           reset_edge_growth_cache (curr);
1373           continue;
1374         }
1375
1376       if (dump_file)
1377         {
1378           fprintf (dump_file,
1379                    "   Inlining call of depth %i", depth);
1380           if (node->count)
1381             {
1382               fprintf (dump_file, " called approx. %.2f times per call",
1383                        (double)curr->count / node->count);
1384             }
1385           fprintf (dump_file, "\n");
1386         }
1387       if (!master_clone)
1388         {
1389           /* We need original clone to copy around.  */
1390           master_clone = cgraph_clone_node (node, node->decl,
1391                                             node->count, CGRAPH_FREQ_BASE,
1392                                             false, vNULL, true, NULL, NULL);
1393           for (e = master_clone->callees; e; e = e->next_callee)
1394             if (!e->inline_failed)
1395               clone_inlined_nodes (e, true, false, NULL, CGRAPH_FREQ_BASE);
1396           cgraph_redirect_edge_callee (curr, master_clone);
1397           reset_edge_growth_cache (curr);
1398         }
1399
1400       inline_call (curr, false, new_edges, &overall_size, true);
1401       lookup_recursive_calls (node, curr->callee, heap);
1402       n++;
1403     }
1404
1405   if (!fibheap_empty (heap) && dump_file)
1406     fprintf (dump_file, "    Recursive inlining growth limit met.\n");
1407   fibheap_delete (heap);
1408
1409   if (!master_clone)
1410     return false;
1411
1412   if (dump_file)
1413     fprintf (dump_file,
1414              "\n   Inlined %i times, "
1415              "body grown from size %i to %i, time %i to %i\n", n,
1416              inline_summary (master_clone)->size, inline_summary (node)->size,
1417              inline_summary (master_clone)->time, inline_summary (node)->time);
1418
1419   /* Remove master clone we used for inlining.  We rely that clones inlined
1420      into master clone gets queued just before master clone so we don't
1421      need recursion.  */
1422   for (node = cgraph_first_function (); node != master_clone;
1423        node = next)
1424     {
1425       next = cgraph_next_function (node);
1426       if (node->global.inlined_to == master_clone)
1427         cgraph_remove_node (node);
1428     }
1429   cgraph_remove_node (master_clone);
1430   return true;
1431 }
1432
1433
1434 /* Given whole compilation unit estimate of INSNS, compute how large we can
1435    allow the unit to grow.  */
1436
1437 static int
1438 compute_max_insns (int insns)
1439 {
1440   int max_insns = insns;
1441   if (max_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
1442     max_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
1443
1444   return ((int64_t) max_insns
1445           * (100 + PARAM_VALUE (PARAM_INLINE_UNIT_GROWTH)) / 100);
1446 }
1447
1448
1449 /* Compute badness of all edges in NEW_EDGES and add them to the HEAP.  */
1450
1451 static void
1452 add_new_edges_to_heap (fibheap_t heap, vec<cgraph_edge_p> new_edges)
1453 {
1454   while (new_edges.length () > 0)
1455     {
1456       struct cgraph_edge *edge = new_edges.pop ();
1457
1458       gcc_assert (!edge->aux);
1459       if (edge->inline_failed
1460           && can_inline_edge_p (edge, true)
1461           && want_inline_small_function_p (edge, true))
1462         edge->aux = fibheap_insert (heap, edge_badness (edge, false), edge);
1463     }
1464 }
1465
1466 /* Remove EDGE from the fibheap.  */
1467
1468 static void
1469 heap_edge_removal_hook (struct cgraph_edge *e, void *data)
1470 {
1471   if (e->callee)
1472     reset_node_growth_cache (e->callee);
1473   if (e->aux)
1474     {
1475       fibheap_delete_node ((fibheap_t)data, (fibnode_t)e->aux);
1476       e->aux = NULL;
1477     }
1478 }
1479
1480 /* Return true if speculation of edge E seems useful.
1481    If ANTICIPATE_INLINING is true, be conservative and hope that E
1482    may get inlined.  */
1483
1484 bool
1485 speculation_useful_p (struct cgraph_edge *e, bool anticipate_inlining)
1486 {
1487   enum availability avail;
1488   struct cgraph_node *target = cgraph_function_or_thunk_node (e->callee, &avail);
1489   struct cgraph_edge *direct, *indirect;
1490   struct ipa_ref *ref;
1491
1492   gcc_assert (e->speculative && !e->indirect_unknown_callee);
1493
1494   if (!cgraph_maybe_hot_edge_p (e))
1495     return false;
1496
1497   /* See if IP optimizations found something potentially useful about the
1498      function.  For now we look only for CONST/PURE flags.  Almost everything
1499      else we propagate is useless.  */
1500   if (avail >= AVAIL_AVAILABLE)
1501     {
1502       int ecf_flags = flags_from_decl_or_type (target->decl);
1503       if (ecf_flags & ECF_CONST)
1504         {
1505           cgraph_speculative_call_info (e, direct, indirect, ref);
1506           if (!(indirect->indirect_info->ecf_flags & ECF_CONST))
1507             return true;
1508         }
1509       else if (ecf_flags & ECF_PURE)
1510         {
1511           cgraph_speculative_call_info (e, direct, indirect, ref);
1512           if (!(indirect->indirect_info->ecf_flags & ECF_PURE))
1513             return true;
1514         }
1515     }
1516   /* If we did not managed to inline the function nor redirect
1517      to an ipa-cp clone (that are seen by having local flag set),
1518      it is probably pointless to inline it unless hardware is missing
1519      indirect call predictor.  */
1520   if (!anticipate_inlining && e->inline_failed && !target->local.local)
1521     return false;
1522   /* For overwritable targets there is not much to do.  */
1523   if (e->inline_failed && !can_inline_edge_p (e, false, true))
1524     return false;
1525   /* OK, speculation seems interesting.  */
1526   return true;
1527 }
1528
1529 /* We know that EDGE is not going to be inlined.
1530    See if we can remove speculation.  */
1531
1532 static void
1533 resolve_noninline_speculation (fibheap_t edge_heap, struct cgraph_edge *edge)
1534 {
1535   if (edge->speculative && !speculation_useful_p (edge, false))
1536     {
1537       struct cgraph_node *node = edge->caller;
1538       struct cgraph_node *where = node->global.inlined_to
1539                                   ? node->global.inlined_to : node;
1540       bitmap updated_nodes = BITMAP_ALLOC (NULL);
1541
1542       spec_rem += edge->count;
1543       cgraph_resolve_speculation (edge, NULL);
1544       reset_edge_caches (where);
1545       inline_update_overall_summary (where);
1546       update_caller_keys (edge_heap, where,
1547                           updated_nodes, NULL);
1548       update_callee_keys (edge_heap, where,
1549                           updated_nodes);
1550       BITMAP_FREE (updated_nodes);
1551     }
1552 }
1553
1554 /* We use greedy algorithm for inlining of small functions:
1555    All inline candidates are put into prioritized heap ordered in
1556    increasing badness.
1557
1558    The inlining of small functions is bounded by unit growth parameters.  */
1559
1560 static void
1561 inline_small_functions (void)
1562 {
1563   struct cgraph_node *node;
1564   struct cgraph_edge *edge;
1565   fibheap_t edge_heap = fibheap_new ();
1566   bitmap updated_nodes = BITMAP_ALLOC (NULL);
1567   int min_size, max_size;
1568   auto_vec<cgraph_edge_p> new_indirect_edges;
1569   int initial_size = 0;
1570   struct cgraph_node **order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
1571   struct cgraph_edge_hook_list *edge_removal_hook_holder;
1572
1573   if (flag_indirect_inlining)
1574     new_indirect_edges.create (8);
1575
1576   edge_removal_hook_holder
1577     = cgraph_add_edge_removal_hook (&heap_edge_removal_hook, edge_heap);
1578
1579   /* Compute overall unit size and other global parameters used by badness
1580      metrics.  */
1581
1582   max_count = 0;
1583   ipa_reduced_postorder (order, true, true, NULL);
1584   free (order);
1585
1586   FOR_EACH_DEFINED_FUNCTION (node)
1587     if (!node->global.inlined_to)
1588       {
1589         if (cgraph_function_with_gimple_body_p (node)
1590             || node->thunk.thunk_p)
1591           {
1592             struct inline_summary *info = inline_summary (node);
1593             struct ipa_dfs_info *dfs = (struct ipa_dfs_info *) node->aux;
1594
1595             /* Do not account external functions, they will be optimized out
1596                if not inlined.  Also only count the non-cold portion of program.  */
1597             if (!DECL_EXTERNAL (node->decl)
1598                 && node->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED)
1599               initial_size += info->size;
1600             info->growth = estimate_growth (node);
1601             if (dfs && dfs->next_cycle)
1602               {
1603                 struct cgraph_node *n2;
1604                 int id = dfs->scc_no + 1;
1605                 for (n2 = node; n2;
1606                      n2 = ((struct ipa_dfs_info *) node->aux)->next_cycle)
1607                   {
1608                     struct inline_summary *info2 = inline_summary (n2);
1609                     if (info2->scc_no)
1610                       break;
1611                     info2->scc_no = id;
1612                   }
1613               }
1614           }
1615
1616         for (edge = node->callers; edge; edge = edge->next_caller)
1617           if (max_count < edge->count)
1618             max_count = edge->count;
1619       }
1620   sreal_init (&max_count_real, max_count, 0);
1621   sreal_init (&max_relbenefit_real, RELATIVE_TIME_BENEFIT_RANGE, 0);
1622   sreal_init (&half_int_min_real, INT_MAX / 2, 0);
1623   ipa_free_postorder_info ();
1624   initialize_growth_caches ();
1625
1626   if (dump_file)
1627     fprintf (dump_file,
1628              "\nDeciding on inlining of small functions.  Starting with size %i.\n",
1629              initial_size);
1630
1631   overall_size = initial_size;
1632   max_size = compute_max_insns (overall_size);
1633   min_size = overall_size;
1634
1635   /* Populate the heap with all edges we might inline.  */
1636
1637   FOR_EACH_DEFINED_FUNCTION (node)
1638     {
1639       bool update = false;
1640       struct cgraph_edge *next;
1641
1642       if (dump_file)
1643         fprintf (dump_file, "Enqueueing calls in %s/%i.\n",
1644                  node->name (), node->order);
1645
1646       for (edge = node->callees; edge; edge = next)
1647         {
1648           next = edge->next_callee;
1649           if (edge->inline_failed
1650               && !edge->aux
1651               && can_inline_edge_p (edge, true)
1652               && want_inline_small_function_p (edge, true)
1653               && edge->inline_failed)
1654             {
1655               gcc_assert (!edge->aux);
1656               update_edge_key (edge_heap, edge);
1657             }
1658           if (edge->speculative && !speculation_useful_p (edge, edge->aux != NULL))
1659             {
1660               cgraph_resolve_speculation (edge, NULL);
1661               update = true;
1662             }
1663         }
1664       if (update)
1665         {
1666           struct cgraph_node *where = node->global.inlined_to
1667                                       ? node->global.inlined_to : node;
1668           inline_update_overall_summary (where);
1669           reset_node_growth_cache (where);
1670           reset_edge_caches (where);
1671           update_caller_keys (edge_heap, where,
1672                               updated_nodes, NULL);
1673           bitmap_clear (updated_nodes);
1674         }
1675     }
1676
1677   gcc_assert (in_lto_p
1678               || !max_count
1679               || (profile_info && flag_branch_probabilities));
1680
1681   while (!fibheap_empty (edge_heap))
1682     {
1683       int old_size = overall_size;
1684       struct cgraph_node *where, *callee;
1685       int badness = fibheap_min_key (edge_heap);
1686       int current_badness;
1687       int cached_badness;
1688       int growth;
1689
1690       edge = (struct cgraph_edge *) fibheap_extract_min (edge_heap);
1691       gcc_assert (edge->aux);
1692       edge->aux = NULL;
1693       if (!edge->inline_failed || !edge->callee->analyzed)
1694         continue;
1695
1696       /* Be sure that caches are maintained consistent.
1697          We can not make this ENABLE_CHECKING only because it cause different
1698          updates of the fibheap queue.  */
1699       cached_badness = edge_badness (edge, false);
1700       reset_edge_growth_cache (edge);
1701       reset_node_growth_cache (edge->callee);
1702
1703       /* When updating the edge costs, we only decrease badness in the keys.
1704          Increases of badness are handled lazilly; when we see key with out
1705          of date value on it, we re-insert it now.  */
1706       current_badness = edge_badness (edge, false);
1707       gcc_assert (cached_badness == current_badness);
1708       gcc_assert (current_badness >= badness);
1709       if (current_badness != badness)
1710         {
1711           edge->aux = fibheap_insert (edge_heap, current_badness, edge);
1712           continue;
1713         }
1714
1715       if (!can_inline_edge_p (edge, true))
1716         {
1717           resolve_noninline_speculation (edge_heap, edge);
1718           continue;
1719         }
1720
1721       callee = cgraph_function_or_thunk_node (edge->callee, NULL);
1722       growth = estimate_edge_growth (edge);
1723       if (dump_file)
1724         {
1725           fprintf (dump_file,
1726                    "\nConsidering %s/%i with %i size\n",
1727                    callee->name (), callee->order,
1728                    inline_summary (callee)->size);
1729           fprintf (dump_file,
1730                    " to be inlined into %s/%i in %s:%i\n"
1731                    " Estimated badness is %i, frequency %.2f.\n",
1732                    edge->caller->name (), edge->caller->order,
1733                    flag_wpa ? "unknown"
1734                    : gimple_filename ((const_gimple) edge->call_stmt),
1735                    flag_wpa ? -1
1736                    : gimple_lineno ((const_gimple) edge->call_stmt),
1737                    badness,
1738                    edge->frequency / (double)CGRAPH_FREQ_BASE);
1739           if (edge->count)
1740             fprintf (dump_file," Called %"PRId64"x\n",
1741                      edge->count);
1742           if (dump_flags & TDF_DETAILS)
1743             edge_badness (edge, true);
1744         }
1745
1746       if (overall_size + growth > max_size
1747           && !DECL_DISREGARD_INLINE_LIMITS (callee->decl))
1748         {
1749           edge->inline_failed = CIF_INLINE_UNIT_GROWTH_LIMIT;
1750           report_inline_failed_reason (edge);
1751           resolve_noninline_speculation (edge_heap, edge);
1752           continue;
1753         }
1754
1755       if (!want_inline_small_function_p (edge, true))
1756         {
1757           resolve_noninline_speculation (edge_heap, edge);
1758           continue;
1759         }
1760
1761       /* Heuristics for inlining small functions work poorly for
1762          recursive calls where we do effects similar to loop unrolling.
1763          When inlining such edge seems profitable, leave decision on
1764          specific inliner.  */
1765       if (cgraph_edge_recursive_p (edge))
1766         {
1767           where = edge->caller;
1768           if (where->global.inlined_to)
1769             where = where->global.inlined_to;
1770           if (!recursive_inlining (edge,
1771                                    flag_indirect_inlining
1772                                    ? &new_indirect_edges : NULL))
1773             {
1774               edge->inline_failed = CIF_RECURSIVE_INLINING;
1775               resolve_noninline_speculation (edge_heap, edge);
1776               continue;
1777             }
1778           reset_edge_caches (where);
1779           /* Recursive inliner inlines all recursive calls of the function
1780              at once. Consequently we need to update all callee keys.  */
1781           if (flag_indirect_inlining)
1782             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1783           update_callee_keys (edge_heap, where, updated_nodes);
1784           bitmap_clear (updated_nodes);
1785         }
1786       else
1787         {
1788           struct cgraph_node *outer_node = NULL;
1789           int depth = 0;
1790
1791           /* Consider the case where self recursive function A is inlined
1792              into B.  This is desired optimization in some cases, since it
1793              leads to effect similar of loop peeling and we might completely
1794              optimize out the recursive call.  However we must be extra
1795              selective.  */
1796
1797           where = edge->caller;
1798           while (where->global.inlined_to)
1799             {
1800               if (where->decl == callee->decl)
1801                 outer_node = where, depth++;
1802               where = where->callers->caller;
1803             }
1804           if (outer_node
1805               && !want_inline_self_recursive_call_p (edge, outer_node,
1806                                                      true, depth))
1807             {
1808               edge->inline_failed
1809                 = (DECL_DISREGARD_INLINE_LIMITS (edge->callee->decl)
1810                    ? CIF_RECURSIVE_INLINING : CIF_UNSPECIFIED);
1811               resolve_noninline_speculation (edge_heap, edge);
1812               continue;
1813             }
1814           else if (depth && dump_file)
1815             fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
1816
1817           gcc_checking_assert (!callee->global.inlined_to);
1818           inline_call (edge, true, &new_indirect_edges, &overall_size, true);
1819           if (flag_indirect_inlining)
1820             add_new_edges_to_heap (edge_heap, new_indirect_edges);
1821
1822           reset_edge_caches (edge->callee);
1823           reset_node_growth_cache (callee);
1824
1825           update_callee_keys (edge_heap, where, updated_nodes);
1826         }
1827       where = edge->caller;
1828       if (where->global.inlined_to)
1829         where = where->global.inlined_to;
1830
1831       /* Our profitability metric can depend on local properties
1832          such as number of inlinable calls and size of the function body.
1833          After inlining these properties might change for the function we
1834          inlined into (since it's body size changed) and for the functions
1835          called by function we inlined (since number of it inlinable callers
1836          might change).  */
1837       update_caller_keys (edge_heap, where, updated_nodes, NULL);
1838       bitmap_clear (updated_nodes);
1839
1840       if (dump_file)
1841         {
1842           fprintf (dump_file,
1843                    " Inlined into %s which now has time %i and size %i,"
1844                    "net change of %+i.\n",
1845                    edge->caller->name (),
1846                    inline_summary (edge->caller)->time,
1847                    inline_summary (edge->caller)->size,
1848                    overall_size - old_size);
1849         }
1850       if (min_size > overall_size)
1851         {
1852           min_size = overall_size;
1853           max_size = compute_max_insns (min_size);
1854
1855           if (dump_file)
1856             fprintf (dump_file, "New minimal size reached: %i\n", min_size);
1857         }
1858     }
1859
1860   free_growth_caches ();
1861   fibheap_delete (edge_heap);
1862   if (dump_file)
1863     fprintf (dump_file,
1864              "Unit growth for small function inlining: %i->%i (%i%%)\n",
1865              initial_size, overall_size,
1866              initial_size ? overall_size * 100 / (initial_size) - 100: 0);
1867   BITMAP_FREE (updated_nodes);
1868   cgraph_remove_edge_removal_hook (edge_removal_hook_holder);
1869 }
1870
1871 /* Flatten NODE.  Performed both during early inlining and
1872    at IPA inlining time.  */
1873
1874 static void
1875 flatten_function (struct cgraph_node *node, bool early)
1876 {
1877   struct cgraph_edge *e;
1878
1879   /* We shouldn't be called recursively when we are being processed.  */
1880   gcc_assert (node->aux == NULL);
1881
1882   node->aux = (void *) node;
1883
1884   for (e = node->callees; e; e = e->next_callee)
1885     {
1886       struct cgraph_node *orig_callee;
1887       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
1888
1889       /* We've hit cycle?  It is time to give up.  */
1890       if (callee->aux)
1891         {
1892           if (dump_file)
1893             fprintf (dump_file,
1894                      "Not inlining %s into %s to avoid cycle.\n",
1895                      xstrdup (callee->name ()),
1896                      xstrdup (e->caller->name ()));
1897           e->inline_failed = CIF_RECURSIVE_INLINING;
1898           continue;
1899         }
1900
1901       /* When the edge is already inlined, we just need to recurse into
1902          it in order to fully flatten the leaves.  */
1903       if (!e->inline_failed)
1904         {
1905           flatten_function (callee, early);
1906           continue;
1907         }
1908
1909       /* Flatten attribute needs to be processed during late inlining. For
1910          extra code quality we however do flattening during early optimization,
1911          too.  */
1912       if (!early
1913           ? !can_inline_edge_p (e, true)
1914           : !can_early_inline_edge_p (e))
1915         continue;
1916
1917       if (cgraph_edge_recursive_p (e))
1918         {
1919           if (dump_file)
1920             fprintf (dump_file, "Not inlining: recursive call.\n");
1921           continue;
1922         }
1923
1924       if (gimple_in_ssa_p (DECL_STRUCT_FUNCTION (node->decl))
1925           != gimple_in_ssa_p (DECL_STRUCT_FUNCTION (callee->decl)))
1926         {
1927           if (dump_file)
1928             fprintf (dump_file, "Not inlining: SSA form does not match.\n");
1929           continue;
1930         }
1931
1932       /* Inline the edge and flatten the inline clone.  Avoid
1933          recursing through the original node if the node was cloned.  */
1934       if (dump_file)
1935         fprintf (dump_file, " Inlining %s into %s.\n",
1936                  xstrdup (callee->name ()),
1937                  xstrdup (e->caller->name ()));
1938       orig_callee = callee;
1939       inline_call (e, true, NULL, NULL, false);
1940       if (e->callee != orig_callee)
1941         orig_callee->aux = (void *) node;
1942       flatten_function (e->callee, early);
1943       if (e->callee != orig_callee)
1944         orig_callee->aux = NULL;
1945     }
1946
1947   node->aux = NULL;
1948   if (!node->global.inlined_to)
1949     inline_update_overall_summary (node);
1950 }
1951
1952 /* Count number of callers of NODE and store it into DATA (that
1953    points to int.  Worker for cgraph_for_node_and_aliases.  */
1954
1955 static bool
1956 sum_callers (struct cgraph_node *node, void *data)
1957 {
1958   struct cgraph_edge *e;
1959   int *num_calls = (int *)data;
1960
1961   for (e = node->callers; e; e = e->next_caller)
1962     (*num_calls)++;
1963   return false;
1964 }
1965
1966 /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
1967    DATA points to number of calls originally found so we avoid infinite
1968    recursion.  */
1969
1970 static bool
1971 inline_to_all_callers (struct cgraph_node *node, void *data)
1972 {
1973   int *num_calls = (int *)data;
1974   bool callee_removed = false;
1975
1976   while (node->callers && !node->global.inlined_to)
1977     {
1978       struct cgraph_node *caller = node->callers->caller;
1979
1980       if (dump_file)
1981         {
1982           fprintf (dump_file,
1983                    "\nInlining %s size %i.\n",
1984                    node->name (),
1985                    inline_summary (node)->size);
1986           fprintf (dump_file,
1987                    " Called once from %s %i insns.\n",
1988                    node->callers->caller->name (),
1989                    inline_summary (node->callers->caller)->size);
1990         }
1991
1992       inline_call (node->callers, true, NULL, NULL, true, &callee_removed);
1993       if (dump_file)
1994         fprintf (dump_file,
1995                  " Inlined into %s which now has %i size\n",
1996                  caller->name (),
1997                  inline_summary (caller)->size);
1998       if (!(*num_calls)--)
1999         {
2000           if (dump_file)
2001             fprintf (dump_file, "New calls found; giving up.\n");
2002           return callee_removed;
2003         }
2004       if (callee_removed)
2005         return true;
2006     }
2007   return false;
2008 }
2009
2010 /* Output overall time estimate.  */
2011 static void
2012 dump_overall_stats (void)
2013 {
2014   int64_t sum_weighted = 0, sum = 0;
2015   struct cgraph_node *node;
2016
2017   FOR_EACH_DEFINED_FUNCTION (node)
2018     if (!node->global.inlined_to
2019         && !node->alias)
2020       {
2021         int time = inline_summary (node)->time;
2022         sum += time;
2023         sum_weighted += time * node->count;
2024       }
2025   fprintf (dump_file, "Overall time estimate: "
2026            "%"PRId64" weighted by profile: "
2027            "%"PRId64"\n", sum, sum_weighted);
2028 }
2029
2030 /* Output some useful stats about inlining.  */
2031
2032 static void
2033 dump_inline_stats (void)
2034 {
2035   int64_t inlined_cnt = 0, inlined_indir_cnt = 0;
2036   int64_t inlined_virt_cnt = 0, inlined_virt_indir_cnt = 0;
2037   int64_t noninlined_cnt = 0, noninlined_indir_cnt = 0;
2038   int64_t noninlined_virt_cnt = 0, noninlined_virt_indir_cnt = 0;
2039   int64_t  inlined_speculative = 0, inlined_speculative_ply = 0;
2040   int64_t indirect_poly_cnt = 0, indirect_cnt = 0;
2041   int64_t reason[CIF_N_REASONS][3];
2042   int i;
2043   struct cgraph_node *node;
2044
2045   memset (reason, 0, sizeof (reason));
2046   FOR_EACH_DEFINED_FUNCTION (node)
2047   {
2048     struct cgraph_edge *e;
2049     for (e = node->callees; e; e = e->next_callee)
2050       {
2051         if (e->inline_failed)
2052           {
2053             reason[(int) e->inline_failed][0] += e->count;
2054             reason[(int) e->inline_failed][1] += e->frequency;
2055             reason[(int) e->inline_failed][2] ++;
2056             if (DECL_VIRTUAL_P (e->callee->decl))
2057               {
2058                 if (e->indirect_inlining_edge)
2059                   noninlined_virt_indir_cnt += e->count;
2060                 else
2061                   noninlined_virt_cnt += e->count;
2062               }
2063             else
2064               {
2065                 if (e->indirect_inlining_edge)
2066                   noninlined_indir_cnt += e->count;
2067                 else
2068                   noninlined_cnt += e->count;
2069               }
2070           }
2071         else
2072           {
2073             if (e->speculative)
2074               {
2075                 if (DECL_VIRTUAL_P (e->callee->decl))
2076                   inlined_speculative_ply += e->count;
2077                 else
2078                   inlined_speculative += e->count;
2079               }
2080             else if (DECL_VIRTUAL_P (e->callee->decl))
2081               {
2082                 if (e->indirect_inlining_edge)
2083                   inlined_virt_indir_cnt += e->count;
2084                 else
2085                   inlined_virt_cnt += e->count;
2086               }
2087             else
2088               {
2089                 if (e->indirect_inlining_edge)
2090                   inlined_indir_cnt += e->count;
2091                 else
2092                   inlined_cnt += e->count;
2093               }
2094           }
2095       }
2096     for (e = node->indirect_calls; e; e = e->next_callee)
2097       if (e->indirect_info->polymorphic)
2098         indirect_poly_cnt += e->count;
2099       else
2100         indirect_cnt += e->count;
2101   }
2102   if (max_count)
2103     {
2104       fprintf (dump_file,
2105                "Inlined %"PRId64 " + speculative "
2106                "%"PRId64 " + speculative polymorphic "
2107                "%"PRId64 " + previously indirect "
2108                "%"PRId64 " + virtual "
2109                "%"PRId64 " + virtual and previously indirect "
2110                "%"PRId64 "\n" "Not inlined "
2111                "%"PRId64 " + previously indirect "
2112                "%"PRId64 " + virtual "
2113                "%"PRId64 " + virtual and previously indirect "
2114                "%"PRId64 " + stil indirect "
2115                "%"PRId64 " + still indirect polymorphic "
2116                "%"PRId64 "\n", inlined_cnt,
2117                inlined_speculative, inlined_speculative_ply,
2118                inlined_indir_cnt, inlined_virt_cnt, inlined_virt_indir_cnt,
2119                noninlined_cnt, noninlined_indir_cnt, noninlined_virt_cnt,
2120                noninlined_virt_indir_cnt, indirect_cnt, indirect_poly_cnt);
2121       fprintf (dump_file,
2122                "Removed speculations %"PRId64 "\n",
2123                spec_rem);
2124     }
2125   dump_overall_stats ();
2126   fprintf (dump_file, "\nWhy inlining failed?\n");
2127   for (i = 0; i < CIF_N_REASONS; i++)
2128     if (reason[i][2])
2129       fprintf (dump_file, "%-50s: %8i calls, %8i freq, %"PRId64" count\n",
2130                cgraph_inline_failed_string ((cgraph_inline_failed_t) i),
2131                (int) reason[i][2], (int) reason[i][1], reason[i][0]);
2132 }
2133
2134 /* Decide on the inlining.  We do so in the topological order to avoid
2135    expenses on updating data structures.  */
2136
2137 static unsigned int
2138 ipa_inline (void)
2139 {
2140   struct cgraph_node *node;
2141   int nnodes;
2142   struct cgraph_node **order;
2143   int i;
2144   int cold;
2145   bool remove_functions = false;
2146
2147   if (!optimize)
2148     return 0;
2149
2150   order = XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
2151
2152   if (in_lto_p && optimize)
2153     ipa_update_after_lto_read ();
2154
2155   if (dump_file)
2156     dump_inline_summaries (dump_file);
2157
2158   nnodes = ipa_reverse_postorder (order);
2159
2160   FOR_EACH_FUNCTION (node)
2161     node->aux = 0;
2162
2163   if (dump_file)
2164     fprintf (dump_file, "\nFlattening functions:\n");
2165
2166   /* In the first pass handle functions to be flattened.  Do this with
2167      a priority so none of our later choices will make this impossible.  */
2168   for (i = nnodes - 1; i >= 0; i--)
2169     {
2170       node = order[i];
2171
2172       /* Handle nodes to be flattened.
2173          Ideally when processing callees we stop inlining at the
2174          entry of cycles, possibly cloning that entry point and
2175          try to flatten itself turning it into a self-recursive
2176          function.  */
2177       if (lookup_attribute ("flatten",
2178                             DECL_ATTRIBUTES (node->decl)) != NULL)
2179         {
2180           if (dump_file)
2181             fprintf (dump_file,
2182                      "Flattening %s\n", node->name ());
2183           flatten_function (node, false);
2184         }
2185     }
2186   if (dump_file)
2187     dump_overall_stats ();
2188
2189   inline_small_functions ();
2190
2191   /* Do first after-inlining removal.  We want to remove all "stale" extern inline
2192      functions and virtual functions so we really know what is called once.  */
2193   symtab_remove_unreachable_nodes (false, dump_file);
2194   free (order);
2195
2196   /* Inline functions with a property that after inlining into all callers the
2197      code size will shrink because the out-of-line copy is eliminated.
2198      We do this regardless on the callee size as long as function growth limits
2199      are met.  */
2200   if (dump_file)
2201     fprintf (dump_file,
2202              "\nDeciding on functions to be inlined into all callers and removing useless speculations:\n");
2203
2204   /* Inlining one function called once has good chance of preventing
2205      inlining other function into the same callee.  Ideally we should
2206      work in priority order, but probably inlining hot functions first
2207      is good cut without the extra pain of maintaining the queue.
2208
2209      ??? this is not really fitting the bill perfectly: inlining function
2210      into callee often leads to better optimization of callee due to
2211      increased context for optimization.
2212      For example if main() function calls a function that outputs help
2213      and then function that does the main optmization, we should inline
2214      the second with priority even if both calls are cold by themselves.
2215
2216      We probably want to implement new predicate replacing our use of
2217      maybe_hot_edge interpreted as maybe_hot_edge || callee is known
2218      to be hot.  */
2219   for (cold = 0; cold <= 1; cold ++)
2220     {
2221       FOR_EACH_DEFINED_FUNCTION (node)
2222         {
2223           struct cgraph_edge *edge, *next;
2224           bool update=false;
2225
2226           for (edge = node->callees; edge; edge = next)
2227             {
2228               next = edge->next_callee;
2229               if (edge->speculative && !speculation_useful_p (edge, false))
2230                 {
2231                   cgraph_resolve_speculation (edge, NULL);
2232                   spec_rem += edge->count;
2233                   update = true;
2234                   remove_functions = true;
2235                 }
2236             }
2237           if (update)
2238             {
2239               struct cgraph_node *where = node->global.inlined_to
2240                                           ? node->global.inlined_to : node;
2241               reset_node_growth_cache (where);
2242               reset_edge_caches (where);
2243               inline_update_overall_summary (where);
2244             }
2245           if (flag_inline_functions_called_once
2246               && want_inline_function_to_all_callers_p (node, cold))
2247             {
2248               int num_calls = 0;
2249               cgraph_for_node_and_aliases (node, sum_callers,
2250                                            &num_calls, true);
2251               while (cgraph_for_node_and_aliases (node, inline_to_all_callers,
2252                                                   &num_calls, true))
2253                 ;
2254               remove_functions = true;
2255             }
2256         }
2257     }
2258
2259   /* Free ipa-prop structures if they are no longer needed.  */
2260   if (optimize)
2261     ipa_free_all_structures_after_iinln ();
2262
2263   if (dump_file)
2264     {
2265       fprintf (dump_file,
2266                "\nInlined %i calls, eliminated %i functions\n\n",
2267                ncalls_inlined, nfunctions_inlined);
2268       dump_inline_stats ();
2269     }
2270
2271   if (dump_file)
2272     dump_inline_summaries (dump_file);
2273   /* In WPA we use inline summaries for partitioning process.  */
2274   if (!flag_wpa)
2275     inline_free_summary ();
2276   return remove_functions ? TODO_remove_functions : 0;
2277 }
2278
2279 /* Inline always-inline function calls in NODE.  */
2280
2281 static bool
2282 inline_always_inline_functions (struct cgraph_node *node)
2283 {
2284   struct cgraph_edge *e;
2285   bool inlined = false;
2286
2287   for (e = node->callees; e; e = e->next_callee)
2288     {
2289       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2290       if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl))
2291         continue;
2292
2293       if (cgraph_edge_recursive_p (e))
2294         {
2295           if (dump_file)
2296             fprintf (dump_file, "  Not inlining recursive call to %s.\n",
2297                      e->callee->name ());
2298           e->inline_failed = CIF_RECURSIVE_INLINING;
2299           continue;
2300         }
2301
2302       if (!can_early_inline_edge_p (e))
2303         {
2304           /* Set inlined to true if the callee is marked "always_inline" but
2305              is not inlinable.  This will allow flagging an error later in
2306              expand_call_inline in tree-inline.c.  */
2307           if (lookup_attribute ("always_inline",
2308                                  DECL_ATTRIBUTES (callee->decl)) != NULL)
2309             inlined = true;
2310           continue;
2311         }
2312
2313       if (dump_file)
2314         fprintf (dump_file, "  Inlining %s into %s (always_inline).\n",
2315                  xstrdup (e->callee->name ()),
2316                  xstrdup (e->caller->name ()));
2317       inline_call (e, true, NULL, NULL, false);
2318       inlined = true;
2319     }
2320   if (inlined)
2321     inline_update_overall_summary (node);
2322
2323   return inlined;
2324 }
2325
2326 /* Decide on the inlining.  We do so in the topological order to avoid
2327    expenses on updating data structures.  */
2328
2329 static bool
2330 early_inline_small_functions (struct cgraph_node *node)
2331 {
2332   struct cgraph_edge *e;
2333   bool inlined = false;
2334
2335   for (e = node->callees; e; e = e->next_callee)
2336     {
2337       struct cgraph_node *callee = cgraph_function_or_thunk_node (e->callee, NULL);
2338       if (!inline_summary (callee)->inlinable
2339           || !e->inline_failed)
2340         continue;
2341
2342       /* Do not consider functions not declared inline.  */
2343       if (!DECL_DECLARED_INLINE_P (callee->decl)
2344           && !flag_inline_small_functions
2345           && !flag_inline_functions)
2346         continue;
2347
2348       if (dump_file)
2349         fprintf (dump_file, "Considering inline candidate %s.\n",
2350                  callee->name ());
2351
2352       if (!can_early_inline_edge_p (e))
2353         continue;
2354
2355       if (cgraph_edge_recursive_p (e))
2356         {
2357           if (dump_file)
2358             fprintf (dump_file, "  Not inlining: recursive call.\n");
2359           continue;
2360         }
2361
2362       if (!want_early_inline_function_p (e))
2363         continue;
2364
2365       if (dump_file)
2366         fprintf (dump_file, " Inlining %s into %s.\n",
2367                  xstrdup (callee->name ()),
2368                  xstrdup (e->caller->name ()));
2369       inline_call (e, true, NULL, NULL, true);
2370       inlined = true;
2371     }
2372
2373   return inlined;
2374 }
2375
2376 /* Do inlining of small functions.  Doing so early helps profiling and other
2377    passes to be somewhat more effective and avoids some code duplication in
2378    later real inlining pass for testcases with very many function calls.  */
2379
2380 namespace {
2381
2382 const pass_data pass_data_early_inline =
2383 {
2384   GIMPLE_PASS, /* type */
2385   "einline", /* name */
2386   OPTGROUP_INLINE, /* optinfo_flags */
2387   true, /* has_execute */
2388   TV_EARLY_INLINING, /* tv_id */
2389   PROP_ssa, /* properties_required */
2390   0, /* properties_provided */
2391   0, /* properties_destroyed */
2392   0, /* todo_flags_start */
2393   0, /* todo_flags_finish */
2394 };
2395
2396 class pass_early_inline : public gimple_opt_pass
2397 {
2398 public:
2399   pass_early_inline (gcc::context *ctxt)
2400     : gimple_opt_pass (pass_data_early_inline, ctxt)
2401   {}
2402
2403   /* opt_pass methods: */
2404   virtual unsigned int execute (function *);
2405
2406 }; // class pass_early_inline
2407
2408 unsigned int
2409 pass_early_inline::execute (function *fun)
2410 {
2411   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2412   struct cgraph_edge *edge;
2413   unsigned int todo = 0;
2414   int iterations = 0;
2415   bool inlined = false;
2416
2417   if (seen_error ())
2418     return 0;
2419
2420   /* Do nothing if datastructures for ipa-inliner are already computed.  This
2421      happens when some pass decides to construct new function and
2422      cgraph_add_new_function calls lowering passes and early optimization on
2423      it.  This may confuse ourself when early inliner decide to inline call to
2424      function clone, because function clones don't have parameter list in
2425      ipa-prop matching their signature.  */
2426   if (ipa_node_params_vector.exists ())
2427     return 0;
2428
2429 #ifdef ENABLE_CHECKING
2430   verify_cgraph_node (node);
2431 #endif
2432   ipa_remove_all_references (&node->ref_list);
2433
2434   /* Even when not optimizing or not inlining inline always-inline
2435      functions.  */
2436   inlined = inline_always_inline_functions (node);
2437
2438   if (!optimize
2439       || flag_no_inline
2440       || !flag_early_inlining
2441       /* Never inline regular functions into always-inline functions
2442          during incremental inlining.  This sucks as functions calling
2443          always inline functions will get less optimized, but at the
2444          same time inlining of functions calling always inline
2445          function into an always inline function might introduce
2446          cycles of edges to be always inlined in the callgraph.
2447
2448          We might want to be smarter and just avoid this type of inlining.  */
2449       || DECL_DISREGARD_INLINE_LIMITS (node->decl))
2450     ;
2451   else if (lookup_attribute ("flatten",
2452                              DECL_ATTRIBUTES (node->decl)) != NULL)
2453     {
2454       /* When the function is marked to be flattened, recursively inline
2455          all calls in it.  */
2456       if (dump_file)
2457         fprintf (dump_file,
2458                  "Flattening %s\n", node->name ());
2459       flatten_function (node, true);
2460       inlined = true;
2461     }
2462   else
2463     {
2464       /* We iterate incremental inlining to get trivial cases of indirect
2465          inlining.  */
2466       while (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS)
2467              && early_inline_small_functions (node))
2468         {
2469           timevar_push (TV_INTEGRATION);
2470           todo |= optimize_inline_calls (current_function_decl);
2471
2472           /* Technically we ought to recompute inline parameters so the new
2473              iteration of early inliner works as expected.  We however have
2474              values approximately right and thus we only need to update edge
2475              info that might be cleared out for newly discovered edges.  */
2476           for (edge = node->callees; edge; edge = edge->next_callee)
2477             {
2478               struct inline_edge_summary *es = inline_edge_summary (edge);
2479               es->call_stmt_size
2480                 = estimate_num_insns (edge->call_stmt, &eni_size_weights);
2481               es->call_stmt_time
2482                 = estimate_num_insns (edge->call_stmt, &eni_time_weights);
2483               if (edge->callee->decl
2484                   && !gimple_check_call_matching_types (
2485                       edge->call_stmt, edge->callee->decl, false))
2486                 edge->call_stmt_cannot_inline_p = true;
2487             }
2488           if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
2489             inline_update_overall_summary (node);
2490           timevar_pop (TV_INTEGRATION);
2491           iterations++;
2492           inlined = false;
2493         }
2494       if (dump_file)
2495         fprintf (dump_file, "Iterations: %i\n", iterations);
2496     }
2497
2498   if (inlined)
2499     {
2500       timevar_push (TV_INTEGRATION);
2501       todo |= optimize_inline_calls (current_function_decl);
2502       timevar_pop (TV_INTEGRATION);
2503     }
2504
2505   fun->always_inline_functions_inlined = true;
2506
2507   return todo;
2508 }
2509
2510 } // anon namespace
2511
2512 gimple_opt_pass *
2513 make_pass_early_inline (gcc::context *ctxt)
2514 {
2515   return new pass_early_inline (ctxt);
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_ipa_inline =
2521 {
2522   IPA_PASS, /* type */
2523   "inline", /* name */
2524   OPTGROUP_INLINE, /* optinfo_flags */
2525   true, /* has_execute */
2526   TV_IPA_INLINING, /* tv_id */
2527   0, /* properties_required */
2528   0, /* properties_provided */
2529   0, /* properties_destroyed */
2530   TODO_remove_functions, /* todo_flags_start */
2531   ( TODO_dump_symtab ), /* todo_flags_finish */
2532 };
2533
2534 class pass_ipa_inline : public ipa_opt_pass_d
2535 {
2536 public:
2537   pass_ipa_inline (gcc::context *ctxt)
2538     : ipa_opt_pass_d (pass_data_ipa_inline, ctxt,
2539                       inline_generate_summary, /* generate_summary */
2540                       inline_write_summary, /* write_summary */
2541                       inline_read_summary, /* read_summary */
2542                       NULL, /* write_optimization_summary */
2543                       NULL, /* read_optimization_summary */
2544                       NULL, /* stmt_fixup */
2545                       0, /* function_transform_todo_flags_start */
2546                       inline_transform, /* function_transform */
2547                       NULL) /* variable_transform */
2548   {}
2549
2550   /* opt_pass methods: */
2551   virtual unsigned int execute (function *) { return ipa_inline (); }
2552
2553 }; // class pass_ipa_inline
2554
2555 } // anon namespace
2556
2557 ipa_opt_pass_d *
2558 make_pass_ipa_inline (gcc::context *ctxt)
2559 {
2560   return new pass_ipa_inline (ctxt);
2561 }