gcc/ipa-utils.cc

   1 /* Utilities for ipa analysis.
   2    Copyright (C) 2005-2023 Free Software Foundation, Inc.
   3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "predict.h"
  28 #include "alloc-pool.h"
  29 #include "cgraph.h"
  30 #include "lto-streamer.h"
  31 #include "dumpfile.h"
  32 #include "splay-tree.h"
  33 #include "ipa-utils.h"
  34 #include "symbol-summary.h"
  35 #include "tree-vrp.h"
  36 #include "ipa-prop.h"
  37 #include "ipa-fnsummary.h"
  38 #include "tree-eh.h"
  39 #include "gimple-iterator.h"
  40 #include "ipa-modref-tree.h"
  41 #include "ipa-modref.h"
  42 #include "tree-ssa-loop-niter.h"
  43 #include "calls.h"
  44 #include "cfgloop.h"
  45 #include "cfganal.h"
  46
  47 /* Debugging function for postorder and inorder code. NOTE is a string
  48    that is printed before the nodes are printed.  ORDER is an array of
  49    cgraph_nodes that has COUNT useful nodes in it.  */
  50
  51 void
  52 ipa_print_order (FILE* out,
  53                  const char * note,
  54                  struct cgraph_node** order,
  55                  int count)
  56 {
  57   int i;
  58   fprintf (out, "\n\n ordered call graph: %s\n", note);
  59
  60   for (i = count - 1; i >= 0; i--)
  61     order[i]->dump (out);
  62   fprintf (out, "\n");
  63   fflush (out);
  64 }
  65
  66
  67 struct searchc_env {
  68   struct cgraph_node **stack;
  69   struct cgraph_node **result;
  70   int stack_size;
  71   int order_pos;
  72   splay_tree nodes_marked_new;
  73   bool reduce;
  74   int count;
  75 };
  76
  77 /* This is an implementation of Tarjan's strongly connected region
  78    finder as reprinted in Aho Hopcraft and Ullman's The Design and
  79    Analysis of Computer Programs (1975) pages 192-193.  This version
  80    has been customized for cgraph_nodes.  The env parameter is because
  81    it is recursive and there are no nested functions here.  This
  82    function should only be called from itself or
  83    ipa_reduced_postorder.  ENV is a stack env and would be
  84    unnecessary if C had nested functions.  V is the node to start
  85    searching from.  */
  86
  87 static void
  88 searchc (struct searchc_env* env, struct cgraph_node *v,
  89          bool (*ignore_edge) (struct cgraph_edge *))
  90 {
  91   struct cgraph_edge *edge;
  92   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
  93
  94   /* mark node as old */
  95   v_info->new_node = false;
  96   splay_tree_remove (env->nodes_marked_new, v->get_uid ());
  97
  98   v_info->dfn_number = env->count;
  99   v_info->low_link = env->count;
 100   env->count++;
 101   env->stack[(env->stack_size)++] = v;
 102   v_info->on_stack = true;
 103
 104   for (edge = v->callees; edge; edge = edge->next_callee)
 105     {
 106       struct ipa_dfs_info * w_info;
 107       enum availability avail;
 108       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
 109
 110       if (!w || (ignore_edge && ignore_edge (edge)))
 111         continue;
 112
 113       if (w->aux
 114           && (avail >= AVAIL_INTERPOSABLE))
 115         {
 116           w_info = (struct ipa_dfs_info *) w->aux;
 117           if (w_info->new_node)
 118             {
 119               searchc (env, w, ignore_edge);
 120               v_info->low_link =
 121                 (v_info->low_link < w_info->low_link) ?
 122                 v_info->low_link : w_info->low_link;
 123             }
 124           else
 125             if ((w_info->dfn_number < v_info->dfn_number)
 126                 && (w_info->on_stack))
 127               v_info->low_link =
 128                 (w_info->dfn_number < v_info->low_link) ?
 129                 w_info->dfn_number : v_info->low_link;
 130         }
 131     }
 132
 133
 134   if (v_info->low_link == v_info->dfn_number)
 135     {
 136       struct cgraph_node *last = NULL;
 137       struct cgraph_node *x;
 138       struct ipa_dfs_info *x_info;
 139       do {
 140         x = env->stack[--(env->stack_size)];
 141         x_info = (struct ipa_dfs_info *) x->aux;
 142         x_info->on_stack = false;
 143         x_info->scc_no = v_info->dfn_number;
 144
 145         if (env->reduce)
 146           {
 147             x_info->next_cycle = last;
 148             last = x;
 149           }
 150         else
 151           env->result[env->order_pos++] = x;
 152       }
 153       while (v != x);
 154       if (env->reduce)
 155         env->result[env->order_pos++] = v;
 156     }
 157 }
 158
 159 /* Topsort the call graph by caller relation.  Put the result in ORDER.
 160
 161    The REDUCE flag is true if you want the cycles reduced to single nodes.
 162    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
 163    call graph nodes in a reduced node.
 164
 165    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
 166    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
 167    for the topological sort.   */
 168
 169 int
 170 ipa_reduced_postorder (struct cgraph_node **order,
 171                        bool reduce,
 172                        bool (*ignore_edge) (struct cgraph_edge *))
 173 {
 174   struct cgraph_node *node;
 175   struct searchc_env env;
 176   splay_tree_node result;
 177   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 178   env.stack_size = 0;
 179   env.result = order;
 180   env.order_pos = 0;
 181   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
 182   env.count = 1;
 183   env.reduce = reduce;
 184
 185   FOR_EACH_DEFINED_FUNCTION (node)
 186     {
 187       enum availability avail = node->get_availability ();
 188
 189       if (avail > AVAIL_INTERPOSABLE
 190           || avail == AVAIL_INTERPOSABLE)
 191         {
 192           /* Reuse the info if it is already there.  */
 193           struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
 194           if (!info)
 195             info = XCNEW (struct ipa_dfs_info);
 196           info->new_node = true;
 197           info->on_stack = false;
 198           info->next_cycle = NULL;
 199           node->aux = info;
 200
 201           splay_tree_insert (env.nodes_marked_new,
 202                              (splay_tree_key)node->get_uid (),
 203                              (splay_tree_value)node);
 204         }
 205       else
 206         node->aux = NULL;
 207     }
 208   result = splay_tree_min (env.nodes_marked_new);
 209   while (result)
 210     {
 211       node = (struct cgraph_node *)result->value;
 212       searchc (&env, node, ignore_edge);
 213       result = splay_tree_min (env.nodes_marked_new);
 214     }
 215   splay_tree_delete (env.nodes_marked_new);
 216   free (env.stack);
 217
 218   return env.order_pos;
 219 }
 220
 221 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
 222    graph nodes.  */
 223
 224 void
 225 ipa_free_postorder_info (void)
 226 {
 227   struct cgraph_node *node;
 228   FOR_EACH_DEFINED_FUNCTION (node)
 229     {
 230       /* Get rid of the aux information.  */
 231       if (node->aux)
 232         {
 233           free (node->aux);
 234           node->aux = NULL;
 235         }
 236     }
 237 }
 238
 239 /* Get the set of nodes for the cycle in the reduced call graph starting
 240    from NODE.  */
 241
 242 vec<cgraph_node *>
 243 ipa_get_nodes_in_cycle (struct cgraph_node *node)
 244 {
 245   vec<cgraph_node *> v = vNULL;
 246   struct ipa_dfs_info *node_dfs_info;
 247   while (node)
 248     {
 249       v.safe_push (node);
 250       node_dfs_info = (struct ipa_dfs_info *) node->aux;
 251       node = node_dfs_info->next_cycle;
 252     }
 253   return v;
 254 }
 255
 256 /* Return true iff the CS is an edge within a strongly connected component as
 257    computed by ipa_reduced_postorder.  */
 258
 259 bool
 260 ipa_edge_within_scc (struct cgraph_edge *cs)
 261 {
 262   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
 263   struct ipa_dfs_info *callee_dfs;
 264   struct cgraph_node *callee = cs->callee->function_symbol ();
 265
 266   callee_dfs = (struct ipa_dfs_info *) callee->aux;
 267   return (caller_dfs
 268           && callee_dfs
 269           && caller_dfs->scc_no == callee_dfs->scc_no);
 270 }
 271
 272 struct postorder_stack
 273 {
 274   struct cgraph_node *node;
 275   struct cgraph_edge *edge;
 276   int ref;
 277 };
 278
 279 /* Fill array order with all nodes with output flag set in the reverse
 280    topological order.  Return the number of elements in the array.
 281    FIXME: While walking, consider aliases, too.  */
 282
 283 int
 284 ipa_reverse_postorder (struct cgraph_node **order)
 285 {
 286   struct cgraph_node *node, *node2;
 287   int stack_size = 0;
 288   int order_pos = 0;
 289   struct cgraph_edge *edge;
 290   int pass;
 291   struct ipa_ref *ref = NULL;
 292
 293   struct postorder_stack *stack =
 294     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
 295
 296   /* We have to deal with cycles nicely, so use a depth first traversal
 297      output algorithm.  Ignore the fact that some functions won't need
 298      to be output and put them into order as well, so we get dependencies
 299      right through inline functions.  */
 300   FOR_EACH_FUNCTION (node)
 301     node->aux = NULL;
 302   for (pass = 0; pass < 2; pass++)
 303     FOR_EACH_FUNCTION (node)
 304       if (!node->aux
 305           && (pass
 306               || (!node->address_taken
 307                   && !node->inlined_to
 308                   && !node->alias && !node->thunk
 309                   && !node->only_called_directly_p ())))
 310         {
 311           stack_size = 0;
 312           stack[stack_size].node = node;
 313           stack[stack_size].edge = node->callers;
 314           stack[stack_size].ref = 0;
 315           node->aux = (void *)(size_t)1;
 316           while (stack_size >= 0)
 317             {
 318               while (true)
 319                 {
 320                   node2 = NULL;
 321                   while (stack[stack_size].edge && !node2)
 322                     {
 323                       edge = stack[stack_size].edge;
 324                       node2 = edge->caller;
 325                       stack[stack_size].edge = edge->next_caller;
 326                       /* Break possible cycles involving always-inline
 327                          functions by ignoring edges from always-inline
 328                          functions to non-always-inline functions.  */
 329                       if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
 330                           && !DECL_DISREGARD_INLINE_LIMITS
 331                             (edge->callee->function_symbol ()->decl))
 332                         node2 = NULL;
 333                     }
 334                   for (; stack[stack_size].node->iterate_referring (
 335                                                        stack[stack_size].ref,
 336                                                        ref) && !node2;
 337                        stack[stack_size].ref++)
 338                     {
 339                       if (ref->use == IPA_REF_ALIAS)
 340                         node2 = dyn_cast <cgraph_node *> (ref->referring);
 341                     }
 342                   if (!node2)
 343                     break;
 344                   if (!node2->aux)
 345                     {
 346                       stack[++stack_size].node = node2;
 347                       stack[stack_size].edge = node2->callers;
 348                       stack[stack_size].ref = 0;
 349                       node2->aux = (void *)(size_t)1;
 350                     }
 351                 }
 352               order[order_pos++] = stack[stack_size--].node;
 353             }
 354         }
 355   free (stack);
 356   FOR_EACH_FUNCTION (node)
 357     node->aux = NULL;
 358   return order_pos;
 359 }
 360
 361
 362
 363 /* Given a memory reference T, will return the variable at the bottom
 364    of the access.  Unlike get_base_address, this will recurse through
 365    INDIRECT_REFS.  */
 366
 367 tree
 368 get_base_var (tree t)
 369 {
 370   while (!SSA_VAR_P (t)
 371          && (!CONSTANT_CLASS_P (t))
 372          && TREE_CODE (t) != LABEL_DECL
 373          && TREE_CODE (t) != FUNCTION_DECL
 374          && TREE_CODE (t) != CONST_DECL
 375          && TREE_CODE (t) != CONSTRUCTOR)
 376     {
 377       t = TREE_OPERAND (t, 0);
 378     }
 379   return t;
 380 }
 381
 382 /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count.  */
 383
 384 void
 385 scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
 386 {
 387   profile_count to = node->count;
 388   profile_count::adjust_for_ipa_scaling (&to, &orig_count);
 389   struct cgraph_edge *e;
 390
 391   for (e = node->callees; e; e = e->next_callee)
 392     e->count = e->count.apply_scale (to, orig_count);
 393   for (e = node->indirect_calls; e; e = e->next_callee)
 394     e->count = e->count.apply_scale (to, orig_count);
 395 }
 396
 397 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
 398    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
 399    unless PRESERVE_BODY is set.  */
 400
 401 void
 402 ipa_merge_profiles (struct cgraph_node *dst,
 403                     struct cgraph_node *src,
 404                     bool preserve_body)
 405 {
 406   tree oldsrcdecl = src->decl;
 407   struct function *srccfun, *dstcfun;
 408   bool match = true;
 409   bool copy_counts = false;
 410
 411   if (!src->definition
 412       || !dst->definition)
 413     return;
 414
 415   if (src->frequency < dst->frequency)
 416     src->frequency = dst->frequency;
 417
 418   /* Time profiles are merged.  */
 419   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
 420     dst->tp_first_run = src->tp_first_run;
 421
 422   if (src->profile_id && !dst->profile_id)
 423     dst->profile_id = src->profile_id;
 424
 425   /* Merging zero profile to dst is no-op.  */
 426   if (src->count.ipa () == profile_count::zero ())
 427     return;
 428
 429   /* FIXME when we merge in unknown profile, we ought to set counts as
 430      unsafe.  */
 431   if (!src->count.initialized_p ()
 432       || !(src->count.ipa () == src->count))
 433     return;
 434   profile_count orig_count = dst->count;
 435
 436   /* Either sum the profiles if both are IPA and not global0, or
 437      pick more informative one (that is nonzero IPA if other is
 438      uninitialized, guessed or global0).   */
 439
 440   if ((dst->count.ipa ().nonzero_p ()
 441        || src->count.ipa ().nonzero_p ())
 442       && dst->count.ipa ().initialized_p ()
 443       && src->count.ipa ().initialized_p ())
 444     dst->count = dst->count.ipa () + src->count.ipa ();
 445   else if (dst->count.ipa ().initialized_p ())
 446     ;
 447   else if (src->count.ipa ().initialized_p ())
 448     {
 449       copy_counts = true;
 450       dst->count = src->count.ipa ();
 451     }
 452
 453   /* If no updating needed return early.  */
 454   if (dst->count == orig_count)
 455     return;
 456
 457   if (symtab->dump_file)
 458     {
 459       fprintf (symtab->dump_file, "Merging profiles of %s count:",
 460                src->dump_name ());
 461       src->count.dump (symtab->dump_file);
 462       fprintf (symtab->dump_file, " to %s count:",
 463                dst->dump_name ());
 464       orig_count.dump (symtab->dump_file);
 465       fprintf (symtab->dump_file, " resulting count:");
 466       dst->count.dump (symtab->dump_file);
 467       fprintf (symtab->dump_file, "\n");
 468     }
 469
 470   /* First handle functions with no gimple body.  */
 471   if (dst->thunk || dst->alias
 472       || src->thunk || src->alias)
 473     {
 474       scale_ipa_profile_for_fn (dst, orig_count);
 475       return;
 476     }
 477
 478   /* This is ugly.  We need to get both function bodies into memory.
 479      If declaration is merged, we need to duplicate it to be able
 480      to load body that is being replaced.  This makes symbol table
 481      temporarily inconsistent.  */
 482   if (src->decl == dst->decl)
 483     {
 484       struct lto_in_decl_state temp;
 485       struct lto_in_decl_state *state;
 486
 487       /* We are going to move the decl, we want to remove its file decl data.
 488          and link these with the new decl. */
 489       temp.fn_decl = src->decl;
 490       lto_in_decl_state **slot
 491         = src->lto_file_data->function_decl_states->find_slot (&temp,
 492                                                                NO_INSERT);
 493       state = *slot;
 494       src->lto_file_data->function_decl_states->clear_slot (slot);
 495       gcc_assert (state);
 496
 497       /* Duplicate the decl and be sure it does not link into body of DST.  */
 498       src->decl = copy_node (src->decl);
 499       DECL_STRUCT_FUNCTION (src->decl) = NULL;
 500       DECL_ARGUMENTS (src->decl) = NULL;
 501       DECL_INITIAL (src->decl) = NULL;
 502       DECL_RESULT (src->decl) = NULL;
 503
 504       /* Associate the decl state with new declaration, so LTO streamer
 505          can look it up.  */
 506       state->fn_decl = src->decl;
 507       slot
 508         = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
 509       gcc_assert (!*slot);
 510       *slot = state;
 511     }
 512   src->get_untransformed_body ();
 513   dst->get_untransformed_body ();
 514   srccfun = DECL_STRUCT_FUNCTION (src->decl);
 515   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
 516   if (n_basic_blocks_for_fn (srccfun)
 517       != n_basic_blocks_for_fn (dstcfun))
 518     {
 519       if (symtab->dump_file)
 520         fprintf (symtab->dump_file,
 521                  "Giving up; number of basic block mismatch.\n");
 522       match = false;
 523     }
 524   else if (last_basic_block_for_fn (srccfun)
 525            != last_basic_block_for_fn (dstcfun))
 526     {
 527       if (symtab->dump_file)
 528         fprintf (symtab->dump_file,
 529                  "Giving up; last block mismatch.\n");
 530       match = false;
 531     }
 532   else
 533     {
 534       basic_block srcbb, dstbb;
 535       struct cgraph_edge *e, *e2;
 536
 537       for (e = dst->callees, e2 = src->callees; e && e2 && match;
 538            e2 = e2->next_callee, e = e->next_callee)
 539         {
 540           if (gimple_bb (e->call_stmt)->index
 541               != gimple_bb (e2->call_stmt)->index)
 542             {
 543               if (symtab->dump_file)
 544                 fprintf (symtab->dump_file,
 545                          "Giving up; call stmt mismatch.\n");
 546               match = false;
 547             }
 548         }
 549       if (e || e2)
 550         {
 551           if (symtab->dump_file)
 552             fprintf (symtab->dump_file,
 553                      "Giving up; number of calls differs.\n");
 554           match = false;
 555         }
 556       for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
 557            e2 = e2->next_callee, e = e->next_callee)
 558         {
 559           if (gimple_bb (e->call_stmt)->index
 560               != gimple_bb (e2->call_stmt)->index)
 561             {
 562               if (symtab->dump_file)
 563                 fprintf (symtab->dump_file,
 564                          "Giving up; indirect call stmt mismatch.\n");
 565               match = false;
 566             }
 567         }
 568       if (e || e2)
 569         {
 570           if (symtab->dump_file)
 571             fprintf (symtab->dump_file,
 572                      "Giving up; number of indirect calls differs.\n");
 573           match=false;
 574         }
 575
 576       if (match)
 577         FOR_ALL_BB_FN (srcbb, srccfun)
 578           {
 579             unsigned int i;
 580
 581             dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 582             if (dstbb == NULL)
 583               {
 584                 if (symtab->dump_file)
 585                   fprintf (symtab->dump_file,
 586                            "No matching block for bb %i.\n",
 587                            srcbb->index);
 588                 match = false;
 589                 break;
 590               }
 591             if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
 592               {
 593                 if (symtab->dump_file)
 594                   fprintf (symtab->dump_file,
 595                            "Edge count mismatch for bb %i.\n",
 596                            srcbb->index);
 597                 match = false;
 598                 break;
 599               }
 600             for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 601               {
 602                 edge srce = EDGE_SUCC (srcbb, i);
 603                 edge dste = EDGE_SUCC (dstbb, i);
 604                 if (srce->dest->index != dste->dest->index)
 605                   {
 606                     if (symtab->dump_file)
 607                       fprintf (symtab->dump_file,
 608                                "Succ edge mismatch for bb %i.\n",
 609                                srce->dest->index);
 610                     match = false;
 611                     break;
 612                   }
 613               }
 614           }
 615     }
 616   if (match)
 617     {
 618       struct cgraph_edge *e, *e2;
 619       basic_block srcbb, dstbb;
 620
 621       /* Function and global profile may be out of sync.  First scale it same
 622          way as fixup_cfg would.  */
 623       profile_count srcnum = src->count;
 624       profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
 625       bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
 626       profile_count dstnum = orig_count;
 627       profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
 628       bool dstscale = !copy_counts
 629                       && dstnum.initialized_p () && !(dstnum == dstden);
 630
 631       /* TODO: merge also statement histograms.  */
 632       FOR_ALL_BB_FN (srcbb, srccfun)
 633         {
 634           unsigned int i;
 635
 636           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 637
 638           profile_count srccount = srcbb->count;
 639           if (srcscale)
 640             srccount = srccount.apply_scale (srcnum, srcden);
 641           if (dstscale)
 642             dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
 643
 644           if (copy_counts)
 645             {
 646               dstbb->count = srccount;
 647               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 648                 {
 649                   edge srce = EDGE_SUCC (srcbb, i);
 650                   edge dste = EDGE_SUCC (dstbb, i);
 651                   if (srce->probability.initialized_p ())
 652                     dste->probability = srce->probability;
 653                 }
 654             }
 655           else
 656             {
 657               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 658                 {
 659                   edge srce = EDGE_SUCC (srcbb, i);
 660                   edge dste = EDGE_SUCC (dstbb, i);
 661                   dste->probability =
 662                     dste->probability * dstbb->count.ipa ().probability_in
 663                                                  (dstbb->count.ipa ()
 664                                                   + srccount.ipa ())
 665                     + srce->probability * srcbb->count.ipa ().probability_in
 666                                                  (dstbb->count.ipa ()
 667                                                   + srccount.ipa ());
 668                 }
 669               dstbb->count = dstbb->count.ipa () + srccount.ipa ();
 670             }
 671         }
 672       push_cfun (dstcfun);
 673       update_max_bb_count ();
 674       compute_function_frequency ();
 675       pop_cfun ();
 676       for (e = dst->callees; e; e = e->next_callee)
 677         {
 678           if (e->speculative)
 679             continue;
 680           e->count = gimple_bb (e->call_stmt)->count;
 681         }
 682       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
 683            e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
 684         {
 685           if (!e->speculative && !e2->speculative)
 686             {
 687               /* FIXME: we need to also merge ipa-profile histograms
 688                  because with LTO merging happens from lto-symtab before
 689                  these are converted to indirect edges.  */
 690               e->count = gimple_bb (e->call_stmt)->count;
 691               continue;
 692             }
 693
 694           /* When copying just remove all speuclations on dst and then copy
 695              one from src.  */
 696           if (copy_counts)
 697             {
 698               while (e->speculative)
 699                 cgraph_edge::resolve_speculation (e, NULL);
 700               e->count = gimple_bb (e->call_stmt)->count;
 701               if (e2->speculative)
 702                 {
 703                   for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 704                        e3;
 705                        e3 = e3->next_speculative_call_target ())
 706                     {
 707                       cgraph_edge *ns;
 708                       ns = e->make_speculative
 709                          (dyn_cast <cgraph_node *>
 710                             (e3->speculative_call_target_ref ()->referred),
 711                              e3->count, e3->speculative_id);
 712                       /* Target may differ from ref (for example it may be
 713                          redirected to local alias.  */
 714                       ns->redirect_callee (e3->callee);
 715                     }
 716                 }
 717               continue;
 718             }
 719
 720           /* Iterate all speculations in SRC, see if corresponding ones exist
 721              int DST and if so, sum the counts.  Otherwise create new
 722              speculation.  */
 723           int max_spec = 0;
 724           for (cgraph_edge *e3 = e->first_speculative_call_target ();
 725                e3;
 726                e3 = e3->next_speculative_call_target ())
 727             if (e3->speculative_id > max_spec)
 728               max_spec = e3->speculative_id;
 729           for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 730                e3;
 731                e3 = e3->next_speculative_call_target ())
 732             {
 733               cgraph_edge *te
 734                  = e->speculative_call_for_target
 735                          (dyn_cast <cgraph_node *>
 736                             (e3->speculative_call_target_ref ()->referred));
 737               if (te)
 738                 te->count = te->count + e3->count;
 739               else
 740                 {
 741                   e->count = e->count + e3->count;
 742                   cgraph_edge *ns;
 743                   ns = e->make_speculative
 744                          (dyn_cast <cgraph_node *>
 745                             (e3->speculative_call_target_ref ()
 746                              ->referred),
 747                           e3->count,
 748                           e3->speculative_id + max_spec + 1);
 749                   /* Target may differ from ref (for example it may be
 750                      redirected to local alias.  */
 751                   ns->redirect_callee (e3->callee);
 752                 }
 753             }
 754         }
 755       if (!preserve_body)
 756         src->release_body ();
 757       /* Update summary.  */
 758       compute_fn_summary (dst, 0);
 759     }
 760   /* We can't update CFG profile, but we can scale IPA profile. CFG
 761      will be scaled according to dst->count after IPA passes.  */
 762   else
 763     scale_ipa_profile_for_fn (dst, orig_count);
 764   src->decl = oldsrcdecl;
 765 }
 766
 767 /* Return true if call to DEST is known to be self-recusive
 768    call withing FUNC.  */
 769
 770 bool
 771 recursive_call_p (tree func, tree dest)
 772 {
 773   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
 774   struct cgraph_node *cnode = cgraph_node::get_create (func);
 775   ipa_ref *alias;
 776   enum availability avail;
 777
 778   gcc_assert (!cnode->alias);
 779   if (cnode != dest_node->ultimate_alias_target (&avail))
 780     return false;
 781   if (avail >= AVAIL_AVAILABLE)
 782     return true;
 783   if (!dest_node->semantically_equivalent_p (cnode))
 784     return false;
 785   /* If there is only one way to call the fuction or we know all of them
 786      are semantically equivalent, we still can consider call recursive.  */
 787   FOR_EACH_ALIAS (cnode, alias)
 788     if (!dest_node->semantically_equivalent_p (alias->referring))
 789       return false;
 790   return true;
 791 }
 792
 793 /* Return true if stmt may terminate execution of function.
 794    If assume_return_or_eh we can further assume that the function ends
 795    either by retrn statement or EH (no trapping or infinite loops).  */
 796
 797 bool
 798 stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
 799 {
 800   if (stmt_can_throw_external (fun, stmt))
 801     return true;
 802   if (assume_return_or_eh)
 803     return false;
 804   gasm *astmt = dyn_cast <gasm *> (stmt);
 805   if (astmt && gimple_asm_volatile_p (astmt))
 806     return true;
 807   if (gimple_could_trap_p (stmt))
 808     return true;
 809   if (gcall *call = dyn_cast <gcall *> (stmt))
 810     {
 811       int flags = gimple_call_flags (call);
 812       if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
 813         return false;
 814       modref_summary *s = get_modref_function_summary (call, NULL);
 815       if (s && !s->side_effects)
 816         return false;
 817       return true;
 818     }
 819   return false;
 820 }
 821
 822 /* Return bitmap of all basic blocks whose first statements are known to
 823    execute on every invocation of the function.
 824
 825    If assume_return_or_eh we can further assume that the function ends
 826    either by retrn statement or EH (no trapping or infinite loops).
 827    This is useful when sumarizing function in passes like ipa-modref.
 828
 829    Seeing assume_return_or_eh to false is used to prove that given
 830    statmeent will be executed even if the function gets into infinite
 831    loop or trap.  */
 832 bitmap
 833 find_always_executed_bbs (function *fun, bool assume_return_or_eh)
 834 {
 835   auto_vec<basic_block, 20> stack;
 836   auto_vec<basic_block, 20> terminating_bbs;
 837   hash_set<basic_block> visited;
 838   hash_set<basic_block> terminating_bbs_set;
 839   edge e;
 840   edge_iterator ei;
 841   int flags = flags_from_decl_or_type (fun->decl);
 842   /* PUre and const functions always return.  */
 843   assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
 844   if (!assume_return_or_eh)
 845     mark_dfs_back_edges (fun);
 846
 847   /* First walk all BBs reachable from entry stopping on statements that may
 848      terminate execution.  Everything past this statement is not going to be executed
 849      each invocation.  */
 850   stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
 851   while (!stack.is_empty ())
 852     {
 853       basic_block bb = stack.pop ();
 854       bool found = false, found_exit = false;
 855       if (bb->index == EXIT_BLOCK)
 856         continue;
 857       FOR_EACH_EDGE (e, ei, bb->succs)
 858         {
 859           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
 860             {
 861               found_exit = true;
 862               break;
 863             }
 864           /* Watch for infinite loops.  */
 865           if (!found
 866               && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
 867             {
 868               if (!dom_info_available_p (CDI_DOMINATORS))
 869                 calculate_dominance_info (CDI_DOMINATORS);
 870               /* If this is not a loop latch edge it is an irreducible region.
 871                  Assume that it is infinite.
 872                  TODO: with C++ forced progression we can still walk the
 873                  irreducible region and see if it contains any side effects.
 874                  Similarly for loops.  -ffinite-loops does not really imply
 875                  this since we allow inlining across -ffinite-loops bondary
 876                  and thus it can be used only as a loop flag.  */
 877               if (e->dest->loop_father->header != e->dest
 878                   || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
 879                 found = true;
 880               else if (!finite_loop_p (e->dest->loop_father))
 881                 found = true;
 882             }
 883         }
 884       if (!assume_return_or_eh
 885           && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
 886         found = true;
 887       for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
 888            !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
 889         if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
 890           {
 891             found = true;
 892             break;
 893           }
 894       if (found)
 895         {
 896           visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
 897           if (!found_exit)
 898             {
 899               terminating_bbs.safe_push (bb);
 900               terminating_bbs_set.add (bb);
 901             }
 902         }
 903       else
 904         FOR_EACH_EDGE (e, ei, bb->succs)
 905           if (!visited.add (e->dest))
 906             stack.safe_push (e->dest);
 907     }
 908
 909   /* Next walk from exit block and find all articulations in the CFG.
 910      Add all terminating basic blocks as "fake" predecessors of the
 911      exit block.  */
 912
 913   bitmap ret = BITMAP_ALLOC (NULL);
 914   /* A degenerated case when there is no path to exit.  */
 915   if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
 916     {
 917       bitmap_set_bit (ret,
 918                       single_succ_edge
 919                         (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
 920       return ret;
 921     }
 922
 923   struct astate
 924   {
 925     unsigned int dfs_preorder;
 926     unsigned int dfs_postorder;
 927
 928     unsigned int low, high;
 929   };
 930
 931   struct worklist
 932   {
 933     basic_block bb;
 934     astate *cstate;
 935   };
 936
 937   struct obstack state_obstack;
 938   gcc_obstack_init (&state_obstack);
 939   hash_map<basic_block, astate *> state;
 940   auto_vec<worklist, 32> worklist_vec;
 941   unsigned int next_dfs_num = 1;
 942
 943   /* Always executed blocks are blocks that are on every path from entry to exit.
 944      We proceed in two steps.  First we do backward DFS walk (so we know that entry
 945      is always reached) and record preorder and postorder visiting times.
 946
 947      In second step we proceed in postorder and for every block A we compute
 948      minimal preorder (A.low) and maximal postorder (A.high) of block reachable
 949      from the BBs in DFS subtree of A.  If A is always executed there are no
 950      edges out of this subtree.  This can be tested by checking that A.low == A.preorder
 951      and B.high == A.postorder.
 952
 953      This is first step. Do backward DFS walk and record preorder, postorder
 954      and predecessor info.  Initialize stack in postorder.  */
 955   worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
 956   worklist_vec.safe_push (we);
 957   while (!worklist_vec.is_empty ())
 958     {
 959       worklist &w = worklist_vec.last ();
 960       basic_block bb = w.bb;
 961       astate *cstate = w.cstate;
 962
 963       if (!cstate)
 964         {
 965           astate **slot = &state.get_or_insert (bb);
 966
 967           cstate = *slot;
 968           /* Already processed by DFS?  */
 969           if (cstate)
 970             {
 971               worklist_vec.pop ();
 972               continue;
 973             }
 974           /* DFS is visiting BB for first time.  */
 975           *slot = cstate = XOBNEW (&state_obstack, struct astate);
 976           cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
 977           w.cstate = cstate;
 978           /* Exit block is special; process all fake edges we identified.  */
 979           if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
 980             for (basic_block bb2 : terminating_bbs)
 981               {
 982                 worklist we = {bb2, NULL};
 983                 worklist_vec.safe_push (we);
 984               }
 985           FOR_EACH_EDGE (e, ei, bb->preds)
 986             if (visited.contains (e->src))
 987               {
 988                 worklist we = {e->src, NULL};
 989                 worklist_vec.safe_push (we);
 990               }
 991           /* Keep BB on worklist so we process it last time.  */
 992           continue;
 993         }
 994       /* We are finished with processing reachable BBs, see if we have articulation.  */
 995       worklist_vec.pop ();
 996       cstate->high = cstate->dfs_postorder = next_dfs_num++;
 997       stack.safe_push (bb);
 998     }
 999   /* This is the final postorder walk.  Determine low and high values and mark
1000      always executed blocks.  */
1001   for (basic_block bb : stack)
1002     {
1003       astate *cstate = *state.get (bb);
1004       FOR_EACH_EDGE (e, ei, bb->preds)
1005         {
1006           astate **cstate2 = state.get (e->src);
1007           /* We skip walking part of CFG reached only after first edge to exit.
1008              No BB reachable from the skipped part is always executed */
1009           if (!cstate2)
1010             {
1011               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1012                 cstate->low = 0;
1013               continue;
1014             }
1015           cstate->low = MIN (cstate->low, (*cstate2)->low);
1016           cstate->high = MAX (cstate->high, (*cstate2)->high);
1017         }
1018       if (dump_file && (dump_flags & TDF_DETAILS) && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1019         fprintf (dump_file, "BB %i %s preorder %i posorder %i low %i high %i\n",
1020                  bb->index, terminating_bbs_set.contains (bb) ? "(terminating)": "",
1021                  cstate->dfs_preorder, cstate->dfs_postorder, cstate->low, cstate->high);
1022       if (cstate->low == cstate->dfs_preorder && cstate->high == cstate->dfs_postorder
1023           && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1024         bitmap_set_bit (ret, bb->index);
1025       if (terminating_bbs_set.contains (bb))
1026         cstate->low = 0;
1027       else
1028         FOR_EACH_EDGE (e, ei, bb->succs)
1029           {
1030             astate **cstate2 = state.get (e->dest);
1031             if (!cstate2)
1032               continue;
1033             cstate->low = MIN (cstate->low, (*cstate2)->low);
1034             cstate->high = MAX (cstate->high, (*cstate2)->high);
1035           }
1036       }
1037   obstack_free (&state_obstack, NULL);
1038   if (dump_file)
1039     {
1040       fprintf (dump_file, "Always executed bbbs %s: ",
1041                assume_return_or_eh ? "(assuming return or EH)": "");
1042       bitmap_print (dump_file, ret, "", "\n");
1043     }
1044
1045   return ret;
1046 }