gcc/ipa-utils.cc

   1 /* Utilities for ipa analysis.
   2    Copyright (C) 2005-2024 Free Software Foundation, Inc.
   3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "predict.h"
  28 #include "alloc-pool.h"
  29 #include "cgraph.h"
  30 #include "lto-streamer.h"
  31 #include "dumpfile.h"
  32 #include "splay-tree.h"
  33 #include "ipa-utils.h"
  34 #include "symbol-summary.h"
  35 #include "tree-vrp.h"
  36 #include "ipa-prop.h"
  37 #include "ipa-fnsummary.h"
  38 #include "tree-eh.h"
  39 #include "gimple-iterator.h"
  40 #include "ipa-modref-tree.h"
  41 #include "ipa-modref.h"
  42 #include "tree-ssa-loop-niter.h"
  43 #include "calls.h"
  44 #include "cfgloop.h"
  45 #include "cfganal.h"
  46
  47 /* Debugging function for postorder and inorder code. NOTE is a string
  48    that is printed before the nodes are printed.  ORDER is an array of
  49    cgraph_nodes that has COUNT useful nodes in it.  */
  50
  51 void
  52 ipa_print_order (FILE* out,
  53                  const char * note,
  54                  struct cgraph_node** order,
  55                  int count)
  56 {
  57   int i;
  58   fprintf (out, "\n\n ordered call graph: %s\n", note);
  59
  60   for (i = count - 1; i >= 0; i--)
  61     order[i]->dump (out);
  62   fprintf (out, "\n");
  63   fflush (out);
  64 }
  65
  66
  67 struct searchc_env {
  68   struct cgraph_node **stack;
  69   struct cgraph_node **result;
  70   int stack_size;
  71   int order_pos;
  72   splay_tree nodes_marked_new;
  73   bool reduce;
  74   int count;
  75 };
  76
  77 /* This is an implementation of Tarjan's strongly connected region
  78    finder as reprinted in Aho Hopcraft and Ullman's The Design and
  79    Analysis of Computer Programs (1975) pages 192-193.  This version
  80    has been customized for cgraph_nodes.  The env parameter is because
  81    it is recursive and there are no nested functions here.  This
  82    function should only be called from itself or
  83    ipa_reduced_postorder.  ENV is a stack env and would be
  84    unnecessary if C had nested functions.  V is the node to start
  85    searching from.  */
  86
  87 static void
  88 searchc (struct searchc_env* env, struct cgraph_node *v,
  89          bool (*ignore_edge) (struct cgraph_edge *))
  90 {
  91   struct cgraph_edge *edge;
  92   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
  93
  94   /* mark node as old */
  95   v_info->new_node = false;
  96   splay_tree_remove (env->nodes_marked_new, v->get_uid ());
  97
  98   v_info->dfn_number = env->count;
  99   v_info->low_link = env->count;
 100   env->count++;
 101   env->stack[(env->stack_size)++] = v;
 102   v_info->on_stack = true;
 103
 104   for (edge = v->callees; edge; edge = edge->next_callee)
 105     {
 106       struct ipa_dfs_info * w_info;
 107       enum availability avail;
 108       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
 109
 110       if (!w || (ignore_edge && ignore_edge (edge)))
 111         continue;
 112
 113       if (w->aux
 114           && (avail >= AVAIL_INTERPOSABLE))
 115         {
 116           w_info = (struct ipa_dfs_info *) w->aux;
 117           if (w_info->new_node)
 118             {
 119               searchc (env, w, ignore_edge);
 120               v_info->low_link =
 121                 (v_info->low_link < w_info->low_link) ?
 122                 v_info->low_link : w_info->low_link;
 123             }
 124           else
 125             if ((w_info->dfn_number < v_info->dfn_number)
 126                 && (w_info->on_stack))
 127               v_info->low_link =
 128                 (w_info->dfn_number < v_info->low_link) ?
 129                 w_info->dfn_number : v_info->low_link;
 130         }
 131     }
 132
 133
 134   if (v_info->low_link == v_info->dfn_number)
 135     {
 136       struct cgraph_node *last = NULL;
 137       struct cgraph_node *x;
 138       struct ipa_dfs_info *x_info;
 139       do {
 140         x = env->stack[--(env->stack_size)];
 141         x_info = (struct ipa_dfs_info *) x->aux;
 142         x_info->on_stack = false;
 143         x_info->scc_no = v_info->dfn_number;
 144
 145         if (env->reduce)
 146           {
 147             x_info->next_cycle = last;
 148             last = x;
 149           }
 150         else
 151           env->result[env->order_pos++] = x;
 152       }
 153       while (v != x);
 154       if (env->reduce)
 155         env->result[env->order_pos++] = v;
 156     }
 157 }
 158
 159 /* Topsort the call graph by caller relation.  Put the result in ORDER.
 160
 161    The REDUCE flag is true if you want the cycles reduced to single nodes.
 162    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
 163    call graph nodes in a reduced node.
 164
 165    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
 166    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
 167    for the topological sort.   */
 168
 169 int
 170 ipa_reduced_postorder (struct cgraph_node **order,
 171                        bool reduce,
 172                        bool (*ignore_edge) (struct cgraph_edge *))
 173 {
 174   struct cgraph_node *node;
 175   struct searchc_env env;
 176   splay_tree_node result;
 177   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 178   env.stack_size = 0;
 179   env.result = order;
 180   env.order_pos = 0;
 181   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
 182   env.count = 1;
 183   env.reduce = reduce;
 184
 185   FOR_EACH_DEFINED_FUNCTION (node)
 186     {
 187       enum availability avail = node->get_availability ();
 188
 189       if (avail > AVAIL_INTERPOSABLE
 190           || avail == AVAIL_INTERPOSABLE)
 191         {
 192           /* Reuse the info if it is already there.  */
 193           struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
 194           if (!info)
 195             info = XCNEW (struct ipa_dfs_info);
 196           info->new_node = true;
 197           info->on_stack = false;
 198           info->next_cycle = NULL;
 199           node->aux = info;
 200
 201           splay_tree_insert (env.nodes_marked_new,
 202                              (splay_tree_key)node->get_uid (),
 203                              (splay_tree_value)node);
 204         }
 205       else
 206         node->aux = NULL;
 207     }
 208   result = splay_tree_min (env.nodes_marked_new);
 209   while (result)
 210     {
 211       node = (struct cgraph_node *)result->value;
 212       searchc (&env, node, ignore_edge);
 213       result = splay_tree_min (env.nodes_marked_new);
 214     }
 215   splay_tree_delete (env.nodes_marked_new);
 216   free (env.stack);
 217
 218   return env.order_pos;
 219 }
 220
 221 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
 222    graph nodes.  */
 223
 224 void
 225 ipa_free_postorder_info (void)
 226 {
 227   struct cgraph_node *node;
 228   FOR_EACH_DEFINED_FUNCTION (node)
 229     {
 230       /* Get rid of the aux information.  */
 231       if (node->aux)
 232         {
 233           free (node->aux);
 234           node->aux = NULL;
 235         }
 236     }
 237 }
 238
 239 /* Get the set of nodes for the cycle in the reduced call graph starting
 240    from NODE.  */
 241
 242 vec<cgraph_node *>
 243 ipa_get_nodes_in_cycle (struct cgraph_node *node)
 244 {
 245   vec<cgraph_node *> v = vNULL;
 246   struct ipa_dfs_info *node_dfs_info;
 247   while (node)
 248     {
 249       v.safe_push (node);
 250       node_dfs_info = (struct ipa_dfs_info *) node->aux;
 251       node = node_dfs_info->next_cycle;
 252     }
 253   return v;
 254 }
 255
 256 /* Return true iff the CS is an edge within a strongly connected component as
 257    computed by ipa_reduced_postorder.  */
 258
 259 bool
 260 ipa_edge_within_scc (struct cgraph_edge *cs)
 261 {
 262   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
 263   struct ipa_dfs_info *callee_dfs;
 264   struct cgraph_node *callee = cs->callee->function_symbol ();
 265
 266   callee_dfs = (struct ipa_dfs_info *) callee->aux;
 267   return (caller_dfs
 268           && callee_dfs
 269           && caller_dfs->scc_no == callee_dfs->scc_no);
 270 }
 271
 272 struct postorder_stack
 273 {
 274   struct cgraph_node *node;
 275   struct cgraph_edge *edge;
 276   int ref;
 277 };
 278
 279 /* Fill array order with all nodes with output flag set in the reverse
 280    topological order.  Return the number of elements in the array.
 281    FIXME: While walking, consider aliases, too.  */
 282
 283 int
 284 ipa_reverse_postorder (struct cgraph_node **order)
 285 {
 286   struct cgraph_node *node, *node2;
 287   int stack_size = 0;
 288   int order_pos = 0;
 289   struct cgraph_edge *edge;
 290   int pass;
 291   struct ipa_ref *ref = NULL;
 292
 293   struct postorder_stack *stack =
 294     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
 295
 296   /* We have to deal with cycles nicely, so use a depth first traversal
 297      output algorithm.  Ignore the fact that some functions won't need
 298      to be output and put them into order as well, so we get dependencies
 299      right through inline functions.  */
 300   FOR_EACH_FUNCTION (node)
 301     node->aux = NULL;
 302   for (pass = 0; pass < 2; pass++)
 303     FOR_EACH_FUNCTION (node)
 304       if (!node->aux
 305           && (pass
 306               || (!node->address_taken
 307                   && !node->inlined_to
 308                   && !node->alias && !node->thunk
 309                   && !node->only_called_directly_p ())))
 310         {
 311           stack_size = 0;
 312           stack[stack_size].node = node;
 313           stack[stack_size].edge = node->callers;
 314           stack[stack_size].ref = 0;
 315           node->aux = (void *)(size_t)1;
 316           while (stack_size >= 0)
 317             {
 318               while (true)
 319                 {
 320                   node2 = NULL;
 321                   while (stack[stack_size].edge && !node2)
 322                     {
 323                       edge = stack[stack_size].edge;
 324                       node2 = edge->caller;
 325                       stack[stack_size].edge = edge->next_caller;
 326                     }
 327                   for (; stack[stack_size].node->iterate_referring (
 328                                                        stack[stack_size].ref,
 329                                                        ref) && !node2;
 330                        stack[stack_size].ref++)
 331                     {
 332                       if (ref->use == IPA_REF_ALIAS)
 333                         node2 = dyn_cast <cgraph_node *> (ref->referring);
 334                     }
 335                   if (!node2)
 336                     break;
 337                   if (!node2->aux)
 338                     {
 339                       stack[++stack_size].node = node2;
 340                       stack[stack_size].edge = node2->callers;
 341                       stack[stack_size].ref = 0;
 342                       node2->aux = (void *)(size_t)1;
 343                     }
 344                 }
 345               order[order_pos++] = stack[stack_size--].node;
 346             }
 347         }
 348   free (stack);
 349   FOR_EACH_FUNCTION (node)
 350     node->aux = NULL;
 351   return order_pos;
 352 }
 353
 354
 355
 356 /* Given a memory reference T, will return the variable at the bottom
 357    of the access.  Unlike get_base_address, this will recurse through
 358    INDIRECT_REFS.  */
 359
 360 tree
 361 get_base_var (tree t)
 362 {
 363   while (!SSA_VAR_P (t)
 364          && (!CONSTANT_CLASS_P (t))
 365          && TREE_CODE (t) != LABEL_DECL
 366          && TREE_CODE (t) != FUNCTION_DECL
 367          && TREE_CODE (t) != CONST_DECL
 368          && TREE_CODE (t) != CONSTRUCTOR)
 369     {
 370       t = TREE_OPERAND (t, 0);
 371     }
 372   return t;
 373 }
 374
 375 /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count.  */
 376
 377 void
 378 scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
 379 {
 380   profile_count to = node->count;
 381   profile_count::adjust_for_ipa_scaling (&to, &orig_count);
 382   struct cgraph_edge *e;
 383
 384   for (e = node->callees; e; e = e->next_callee)
 385     e->count = e->count.apply_scale (to, orig_count);
 386   for (e = node->indirect_calls; e; e = e->next_callee)
 387     e->count = e->count.apply_scale (to, orig_count);
 388 }
 389
 390 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
 391    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
 392    unless PRESERVE_BODY is set.  */
 393
 394 void
 395 ipa_merge_profiles (struct cgraph_node *dst,
 396                     struct cgraph_node *src,
 397                     bool preserve_body)
 398 {
 399   tree oldsrcdecl = src->decl;
 400   struct function *srccfun, *dstcfun;
 401   bool match = true;
 402   bool copy_counts = false;
 403
 404   if (!src->definition
 405       || !dst->definition)
 406     return;
 407
 408   if (src->frequency < dst->frequency)
 409     src->frequency = dst->frequency;
 410
 411   /* Time profiles are merged.  */
 412   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
 413     dst->tp_first_run = src->tp_first_run;
 414
 415   if (src->profile_id && !dst->profile_id)
 416     dst->profile_id = src->profile_id;
 417
 418   /* Merging zero profile to dst is no-op.  */
 419   if (src->count.ipa () == profile_count::zero ())
 420     return;
 421
 422   /* FIXME when we merge in unknown profile, we ought to set counts as
 423      unsafe.  */
 424   if (!src->count.initialized_p ()
 425       || !(src->count.ipa () == src->count))
 426     return;
 427   profile_count orig_count = dst->count;
 428
 429   /* Either sum the profiles if both are IPA and not global0, or
 430      pick more informative one (that is nonzero IPA if other is
 431      uninitialized, guessed or global0).   */
 432
 433   if ((dst->count.ipa ().nonzero_p ()
 434        || src->count.ipa ().nonzero_p ())
 435       && dst->count.ipa ().initialized_p ()
 436       && src->count.ipa ().initialized_p ())
 437     dst->count = dst->count.ipa () + src->count.ipa ();
 438   else if (dst->count.ipa ().initialized_p ())
 439     ;
 440   else if (src->count.ipa ().initialized_p ())
 441     {
 442       copy_counts = true;
 443       dst->count = src->count.ipa ();
 444     }
 445
 446   /* If no updating needed return early.  */
 447   if (dst->count == orig_count)
 448     return;
 449
 450   if (symtab->dump_file)
 451     {
 452       fprintf (symtab->dump_file, "Merging profiles of %s count:",
 453                src->dump_name ());
 454       src->count.dump (symtab->dump_file);
 455       fprintf (symtab->dump_file, " to %s count:",
 456                dst->dump_name ());
 457       orig_count.dump (symtab->dump_file);
 458       fprintf (symtab->dump_file, " resulting count:");
 459       dst->count.dump (symtab->dump_file);
 460       fprintf (symtab->dump_file, "\n");
 461     }
 462
 463   /* First handle functions with no gimple body.  */
 464   if (dst->thunk || dst->alias
 465       || src->thunk || src->alias)
 466     {
 467       scale_ipa_profile_for_fn (dst, orig_count);
 468       return;
 469     }
 470
 471   /* This is ugly.  We need to get both function bodies into memory.
 472      If declaration is merged, we need to duplicate it to be able
 473      to load body that is being replaced.  This makes symbol table
 474      temporarily inconsistent.  */
 475   if (src->decl == dst->decl)
 476     {
 477       struct lto_in_decl_state temp;
 478       struct lto_in_decl_state *state;
 479
 480       /* We are going to move the decl, we want to remove its file decl data.
 481          and link these with the new decl. */
 482       temp.fn_decl = src->decl;
 483       lto_in_decl_state **slot
 484         = src->lto_file_data->function_decl_states->find_slot (&temp,
 485                                                                NO_INSERT);
 486       state = *slot;
 487       src->lto_file_data->function_decl_states->clear_slot (slot);
 488       gcc_assert (state);
 489
 490       /* Duplicate the decl and be sure it does not link into body of DST.  */
 491       src->decl = copy_node (src->decl);
 492       DECL_STRUCT_FUNCTION (src->decl) = NULL;
 493       DECL_ARGUMENTS (src->decl) = NULL;
 494       DECL_INITIAL (src->decl) = NULL;
 495       DECL_RESULT (src->decl) = NULL;
 496
 497       /* Associate the decl state with new declaration, so LTO streamer
 498          can look it up.  */
 499       state->fn_decl = src->decl;
 500       slot
 501         = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
 502       gcc_assert (!*slot);
 503       *slot = state;
 504     }
 505   src->get_untransformed_body ();
 506   dst->get_untransformed_body ();
 507   srccfun = DECL_STRUCT_FUNCTION (src->decl);
 508   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
 509   if (n_basic_blocks_for_fn (srccfun)
 510       != n_basic_blocks_for_fn (dstcfun))
 511     {
 512       if (symtab->dump_file)
 513         fprintf (symtab->dump_file,
 514                  "Giving up; number of basic block mismatch.\n");
 515       match = false;
 516     }
 517   else if (last_basic_block_for_fn (srccfun)
 518            != last_basic_block_for_fn (dstcfun))
 519     {
 520       if (symtab->dump_file)
 521         fprintf (symtab->dump_file,
 522                  "Giving up; last block mismatch.\n");
 523       match = false;
 524     }
 525   else
 526     {
 527       basic_block srcbb, dstbb;
 528       struct cgraph_edge *e, *e2;
 529
 530       for (e = dst->callees, e2 = src->callees; e && e2 && match;
 531            e2 = e2->next_callee, e = e->next_callee)
 532         {
 533           if (gimple_bb (e->call_stmt)->index
 534               != gimple_bb (e2->call_stmt)->index)
 535             {
 536               if (symtab->dump_file)
 537                 fprintf (symtab->dump_file,
 538                          "Giving up; call stmt mismatch.\n");
 539               match = false;
 540             }
 541         }
 542       if (e || e2)
 543         {
 544           if (symtab->dump_file)
 545             fprintf (symtab->dump_file,
 546                      "Giving up; number of calls differs.\n");
 547           match = false;
 548         }
 549       for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
 550            e2 = e2->next_callee, e = e->next_callee)
 551         {
 552           if (gimple_bb (e->call_stmt)->index
 553               != gimple_bb (e2->call_stmt)->index)
 554             {
 555               if (symtab->dump_file)
 556                 fprintf (symtab->dump_file,
 557                          "Giving up; indirect call stmt mismatch.\n");
 558               match = false;
 559             }
 560         }
 561       if (e || e2)
 562         {
 563           if (symtab->dump_file)
 564             fprintf (symtab->dump_file,
 565                      "Giving up; number of indirect calls differs.\n");
 566           match=false;
 567         }
 568
 569       if (match)
 570         FOR_ALL_BB_FN (srcbb, srccfun)
 571           {
 572             unsigned int i;
 573
 574             dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 575             if (dstbb == NULL)
 576               {
 577                 if (symtab->dump_file)
 578                   fprintf (symtab->dump_file,
 579                            "No matching block for bb %i.\n",
 580                            srcbb->index);
 581                 match = false;
 582                 break;
 583               }
 584             if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
 585               {
 586                 if (symtab->dump_file)
 587                   fprintf (symtab->dump_file,
 588                            "Edge count mismatch for bb %i.\n",
 589                            srcbb->index);
 590                 match = false;
 591                 break;
 592               }
 593             for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 594               {
 595                 edge srce = EDGE_SUCC (srcbb, i);
 596                 edge dste = EDGE_SUCC (dstbb, i);
 597                 if (srce->dest->index != dste->dest->index)
 598                   {
 599                     if (symtab->dump_file)
 600                       fprintf (symtab->dump_file,
 601                                "Succ edge mismatch for bb %i.\n",
 602                                srce->dest->index);
 603                     match = false;
 604                     break;
 605                   }
 606               }
 607           }
 608     }
 609   if (match)
 610     {
 611       struct cgraph_edge *e, *e2;
 612       basic_block srcbb, dstbb;
 613
 614       /* Function and global profile may be out of sync.  First scale it same
 615          way as fixup_cfg would.  */
 616       profile_count srcnum = src->count;
 617       profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
 618       bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
 619       profile_count dstnum = orig_count;
 620       profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
 621       bool dstscale = !copy_counts
 622                       && dstnum.initialized_p () && !(dstnum == dstden);
 623
 624       /* TODO: merge also statement histograms.  */
 625       FOR_ALL_BB_FN (srcbb, srccfun)
 626         {
 627           unsigned int i;
 628
 629           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 630
 631           profile_count srccount = srcbb->count;
 632           if (srcscale)
 633             srccount = srccount.apply_scale (srcnum, srcden);
 634           if (dstscale)
 635             dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
 636
 637           if (copy_counts)
 638             {
 639               dstbb->count = srccount;
 640               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 641                 {
 642                   edge srce = EDGE_SUCC (srcbb, i);
 643                   edge dste = EDGE_SUCC (dstbb, i);
 644                   if (srce->probability.initialized_p ())
 645                     dste->probability = srce->probability;
 646                 }
 647             }
 648           else
 649             {
 650               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 651                 {
 652                   edge srce = EDGE_SUCC (srcbb, i);
 653                   edge dste = EDGE_SUCC (dstbb, i);
 654                   profile_count sum =
 655                     dstbb->count.ipa () + srccount.ipa ();
 656                   if (sum.nonzero_p ())
 657                     dste->probability =
 658                       dste->probability * dstbb->count.ipa ().probability_in
 659                                                    (sum)
 660                       + srce->probability * srcbb->count.ipa ().probability_in
 661                                                    (sum);
 662                 }
 663               dstbb->count = dstbb->count.ipa () + srccount.ipa ();
 664             }
 665         }
 666       push_cfun (dstcfun);
 667       update_max_bb_count ();
 668       compute_function_frequency ();
 669       pop_cfun ();
 670       for (e = dst->callees; e; e = e->next_callee)
 671         {
 672           if (e->speculative)
 673             continue;
 674           e->count = gimple_bb (e->call_stmt)->count;
 675         }
 676       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
 677            e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
 678         {
 679           if (!e->speculative && !e2->speculative)
 680             {
 681               /* FIXME: we need to also merge ipa-profile histograms
 682                  because with LTO merging happens from lto-symtab before
 683                  these are converted to indirect edges.  */
 684               e->count = gimple_bb (e->call_stmt)->count;
 685               continue;
 686             }
 687
 688           /* When copying just remove all speuclations on dst and then copy
 689              one from src.  */
 690           if (copy_counts)
 691             {
 692               while (e->speculative)
 693                 cgraph_edge::resolve_speculation (e, NULL);
 694               e->count = gimple_bb (e->call_stmt)->count;
 695               if (e2->speculative)
 696                 {
 697                   for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 698                        e3;
 699                        e3 = e3->next_speculative_call_target ())
 700                     {
 701                       cgraph_edge *ns;
 702                       ns = e->make_speculative
 703                          (dyn_cast <cgraph_node *>
 704                             (e3->speculative_call_target_ref ()->referred),
 705                              e3->count, e3->speculative_id);
 706                       /* Target may differ from ref (for example it may be
 707                          redirected to local alias.  */
 708                       ns->redirect_callee (e3->callee);
 709                     }
 710                 }
 711               continue;
 712             }
 713
 714           /* Iterate all speculations in SRC, see if corresponding ones exist
 715              int DST and if so, sum the counts.  Otherwise create new
 716              speculation.  */
 717           int max_spec = 0;
 718           for (cgraph_edge *e3 = e->first_speculative_call_target ();
 719                e3;
 720                e3 = e3->next_speculative_call_target ())
 721             if (e3->speculative_id > max_spec)
 722               max_spec = e3->speculative_id;
 723           for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 724                e3;
 725                e3 = e3->next_speculative_call_target ())
 726             {
 727               cgraph_edge *te
 728                  = e->speculative_call_for_target
 729                          (dyn_cast <cgraph_node *>
 730                             (e3->speculative_call_target_ref ()->referred));
 731               if (te)
 732                 te->count = te->count + e3->count;
 733               else
 734                 {
 735                   e->count = e->count + e3->count;
 736                   cgraph_edge *ns;
 737                   ns = e->make_speculative
 738                          (dyn_cast <cgraph_node *>
 739                             (e3->speculative_call_target_ref ()
 740                              ->referred),
 741                           e3->count,
 742                           e3->speculative_id + max_spec + 1);
 743                   /* Target may differ from ref (for example it may be
 744                      redirected to local alias.  */
 745                   ns->redirect_callee (e3->callee);
 746                 }
 747             }
 748         }
 749       if (!preserve_body)
 750         src->release_body ();
 751       /* Update summary.  */
 752       compute_fn_summary (dst, 0);
 753     }
 754   /* We can't update CFG profile, but we can scale IPA profile. CFG
 755      will be scaled according to dst->count after IPA passes.  */
 756   else
 757     scale_ipa_profile_for_fn (dst, orig_count);
 758   src->decl = oldsrcdecl;
 759 }
 760
 761 /* Return true if call to DEST is known to be self-recusive
 762    call withing FUNC.  */
 763
 764 bool
 765 recursive_call_p (tree func, tree dest)
 766 {
 767   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
 768   struct cgraph_node *cnode = cgraph_node::get_create (func);
 769   ipa_ref *alias;
 770   enum availability avail;
 771
 772   gcc_assert (!cnode->alias);
 773   if (cnode != dest_node->ultimate_alias_target (&avail))
 774     return false;
 775   if (avail >= AVAIL_AVAILABLE)
 776     return true;
 777   if (!dest_node->semantically_equivalent_p (cnode))
 778     return false;
 779   /* If there is only one way to call the fuction or we know all of them
 780      are semantically equivalent, we still can consider call recursive.  */
 781   FOR_EACH_ALIAS (cnode, alias)
 782     if (!dest_node->semantically_equivalent_p (alias->referring))
 783       return false;
 784   return true;
 785 }
 786
 787 /* Return true if stmt may terminate execution of function.
 788    If assume_return_or_eh we can further assume that the function ends
 789    either by retrn statement or EH (no trapping or infinite loops).  */
 790
 791 bool
 792 stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
 793 {
 794   if (stmt_can_throw_external (fun, stmt))
 795     return true;
 796   if (assume_return_or_eh)
 797     return false;
 798   gasm *astmt = dyn_cast <gasm *> (stmt);
 799   if (astmt && gimple_asm_volatile_p (astmt))
 800     return true;
 801   if (gimple_could_trap_p (stmt))
 802     return true;
 803   if (gcall *call = dyn_cast <gcall *> (stmt))
 804     {
 805       int flags = gimple_call_flags (call);
 806       if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
 807         return false;
 808       modref_summary *s = get_modref_function_summary (call, NULL);
 809       if (s && !s->side_effects)
 810         return false;
 811       return true;
 812     }
 813   return false;
 814 }
 815
 816 /* Return bitmap of all basic blocks whose first statements are known to
 817    execute on every invocation of the function.
 818
 819    If assume_return_or_eh we can further assume that the function ends
 820    either by retrn statement or EH (no trapping or infinite loops).
 821    This is useful when sumarizing function in passes like ipa-modref.
 822
 823    Seeing assume_return_or_eh to false is used to prove that given
 824    statmeent will be executed even if the function gets into infinite
 825    loop or trap.  */
 826 bitmap
 827 find_always_executed_bbs (function *fun, bool assume_return_or_eh)
 828 {
 829   auto_vec<basic_block, 20> stack;
 830   auto_vec<basic_block, 20> terminating_bbs;
 831   hash_set<basic_block> visited;
 832   hash_set<basic_block> terminating_bbs_set;
 833   edge e;
 834   edge_iterator ei;
 835   int flags = flags_from_decl_or_type (fun->decl);
 836   /* PUre and const functions always return.  */
 837   assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
 838   if (!assume_return_or_eh)
 839     mark_dfs_back_edges (fun);
 840
 841   /* First walk all BBs reachable from entry stopping on statements that may
 842      terminate execution.  Everything past this statement is not going to be executed
 843      each invocation.  */
 844   stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
 845   while (!stack.is_empty ())
 846     {
 847       basic_block bb = stack.pop ();
 848       bool found = false, found_exit = false;
 849       if (bb->index == EXIT_BLOCK)
 850         continue;
 851       FOR_EACH_EDGE (e, ei, bb->succs)
 852         {
 853           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
 854             {
 855               found_exit = true;
 856               break;
 857             }
 858           /* Watch for infinite loops.  */
 859           if (!found
 860               && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
 861             {
 862               if (!dom_info_available_p (CDI_DOMINATORS))
 863                 calculate_dominance_info (CDI_DOMINATORS);
 864               /* If this is not a loop latch edge it is an irreducible region.
 865                  Assume that it is infinite.
 866                  TODO: with C++ forced progression we can still walk the
 867                  irreducible region and see if it contains any side effects.
 868                  Similarly for loops.  -ffinite-loops does not really imply
 869                  this since we allow inlining across -ffinite-loops bondary
 870                  and thus it can be used only as a loop flag.  */
 871               if (e->dest->loop_father->header != e->dest
 872                   || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
 873                 found = true;
 874               else if (!finite_loop_p (e->dest->loop_father))
 875                 found = true;
 876             }
 877         }
 878       if (!assume_return_or_eh
 879           && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
 880         found = true;
 881       for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
 882            !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
 883         if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
 884           {
 885             found = true;
 886             break;
 887           }
 888       if (found)
 889         {
 890           visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
 891           if (!found_exit)
 892             {
 893               terminating_bbs.safe_push (bb);
 894               terminating_bbs_set.add (bb);
 895             }
 896         }
 897       else
 898         FOR_EACH_EDGE (e, ei, bb->succs)
 899           if (!visited.add (e->dest))
 900             stack.safe_push (e->dest);
 901     }
 902
 903   /* Next walk from exit block and find all articulations in the CFG.
 904      Add all terminating basic blocks as "fake" predecessors of the
 905      exit block.  */
 906
 907   bitmap ret = BITMAP_ALLOC (NULL);
 908   /* A degenerated case when there is no path to exit.  */
 909   if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
 910     {
 911       bitmap_set_bit (ret,
 912                       single_succ_edge
 913                         (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
 914       return ret;
 915     }
 916
 917   struct astate
 918   {
 919     unsigned int dfs_preorder;
 920     unsigned int dfs_postorder;
 921
 922     unsigned int low, high;
 923   };
 924
 925   struct worklist
 926   {
 927     basic_block bb;
 928     astate *cstate;
 929   };
 930
 931   struct obstack state_obstack;
 932   gcc_obstack_init (&state_obstack);
 933   hash_map<basic_block, astate *> state;
 934   auto_vec<worklist, 32> worklist_vec;
 935   unsigned int next_dfs_num = 1;
 936
 937   /* Always executed blocks are blocks that are on every path from entry to exit.
 938      We proceed in two steps.  First we do backward DFS walk (so we know that entry
 939      is always reached) and record preorder and postorder visiting times.
 940
 941      In second step we proceed in postorder and for every block A we compute
 942      minimal preorder (A.low) and maximal postorder (A.high) of block reachable
 943      from the BBs in DFS subtree of A.  If A is always executed there are no
 944      edges out of this subtree.  This can be tested by checking that A.low == A.preorder
 945      and B.high == A.postorder.
 946
 947      This is first step. Do backward DFS walk and record preorder, postorder
 948      and predecessor info.  Initialize stack in postorder.  */
 949   worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
 950   worklist_vec.safe_push (we);
 951   while (!worklist_vec.is_empty ())
 952     {
 953       worklist &w = worklist_vec.last ();
 954       basic_block bb = w.bb;
 955       astate *cstate = w.cstate;
 956
 957       if (!cstate)
 958         {
 959           astate **slot = &state.get_or_insert (bb);
 960
 961           cstate = *slot;
 962           /* Already processed by DFS?  */
 963           if (cstate)
 964             {
 965               worklist_vec.pop ();
 966               continue;
 967             }
 968           /* DFS is visiting BB for first time.  */
 969           *slot = cstate = XOBNEW (&state_obstack, struct astate);
 970           cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
 971           w.cstate = cstate;
 972           /* Exit block is special; process all fake edges we identified.  */
 973           if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
 974             for (basic_block bb2 : terminating_bbs)
 975               {
 976                 worklist we = {bb2, NULL};
 977                 worklist_vec.safe_push (we);
 978               }
 979           FOR_EACH_EDGE (e, ei, bb->preds)
 980             if (visited.contains (e->src))
 981               {
 982                 worklist we = {e->src, NULL};
 983                 worklist_vec.safe_push (we);
 984               }
 985           /* Keep BB on worklist so we process it last time.  */
 986           continue;
 987         }
 988       /* We are finished with processing reachable BBs, see if we have articulation.  */
 989       worklist_vec.pop ();
 990       cstate->high = cstate->dfs_postorder = next_dfs_num++;
 991       stack.safe_push (bb);
 992     }
 993   /* This is the final postorder walk.  Determine low and high values and mark
 994      always executed blocks.  */
 995   for (basic_block bb : stack)
 996     {
 997       astate *cstate = *state.get (bb);
 998       FOR_EACH_EDGE (e, ei, bb->preds)
 999         {
1000           astate **cstate2 = state.get (e->src);
1001           /* We skip walking part of CFG reached only after first edge to exit.
1002              No BB reachable from the skipped part is always executed */
1003           if (!cstate2)
1004             {
1005               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1006                 cstate->low = 0;
1007               continue;
1008             }
1009           cstate->low = MIN (cstate->low, (*cstate2)->low);
1010           cstate->high = MAX (cstate->high, (*cstate2)->high);
1011         }
1012       if (dump_file && (dump_flags & TDF_DETAILS) && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1013         fprintf (dump_file, "BB %i %s preorder %i posorder %i low %i high %i\n",
1014                  bb->index, terminating_bbs_set.contains (bb) ? "(terminating)": "",
1015                  cstate->dfs_preorder, cstate->dfs_postorder, cstate->low, cstate->high);
1016       if (cstate->low == cstate->dfs_preorder && cstate->high == cstate->dfs_postorder
1017           && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1018         bitmap_set_bit (ret, bb->index);
1019       if (terminating_bbs_set.contains (bb))
1020         cstate->low = 0;
1021       else
1022         FOR_EACH_EDGE (e, ei, bb->succs)
1023           {
1024             astate **cstate2 = state.get (e->dest);
1025             if (!cstate2)
1026               continue;
1027             cstate->low = MIN (cstate->low, (*cstate2)->low);
1028             cstate->high = MAX (cstate->high, (*cstate2)->high);
1029           }
1030       }
1031   obstack_free (&state_obstack, NULL);
1032   if (dump_file)
1033     {
1034       fprintf (dump_file, "Always executed bbbs %s: ",
1035                assume_return_or_eh ? "(assuming return or EH)": "");
1036       bitmap_print (dump_file, ret, "", "\n");
1037     }
1038
1039   return ret;
1040 }