gcc/ipa-utils.cc

   1 /* Utilities for ipa analysis.
   2    Copyright (C) 2005-2023 Free Software Foundation, Inc.
   3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "predict.h"
  28 #include "alloc-pool.h"
  29 #include "cgraph.h"
  30 #include "lto-streamer.h"
  31 #include "dumpfile.h"
  32 #include "splay-tree.h"
  33 #include "ipa-utils.h"
  34 #include "symbol-summary.h"
  35 #include "tree-vrp.h"
  36 #include "ipa-prop.h"
  37 #include "ipa-fnsummary.h"
  38 #include "tree-eh.h"
  39 #include "gimple-iterator.h"
  40 #include "ipa-modref-tree.h"
  41 #include "ipa-modref.h"
  42 #include "tree-ssa-loop-niter.h"
  43 #include "calls.h"
  44 #include "cfgloop.h"
  45 #include "cfganal.h"
  46
  47 /* Debugging function for postorder and inorder code. NOTE is a string
  48    that is printed before the nodes are printed.  ORDER is an array of
  49    cgraph_nodes that has COUNT useful nodes in it.  */
  50
  51 void
  52 ipa_print_order (FILE* out,
  53                  const char * note,
  54                  struct cgraph_node** order,
  55                  int count)
  56 {
  57   int i;
  58   fprintf (out, "\n\n ordered call graph: %s\n", note);
  59
  60   for (i = count - 1; i >= 0; i--)
  61     order[i]->dump (out);
  62   fprintf (out, "\n");
  63   fflush (out);
  64 }
  65
  66
  67 struct searchc_env {
  68   struct cgraph_node **stack;
  69   struct cgraph_node **result;
  70   int stack_size;
  71   int order_pos;
  72   splay_tree nodes_marked_new;
  73   bool reduce;
  74   int count;
  75 };
  76
  77 /* This is an implementation of Tarjan's strongly connected region
  78    finder as reprinted in Aho Hopcraft and Ullman's The Design and
  79    Analysis of Computer Programs (1975) pages 192-193.  This version
  80    has been customized for cgraph_nodes.  The env parameter is because
  81    it is recursive and there are no nested functions here.  This
  82    function should only be called from itself or
  83    ipa_reduced_postorder.  ENV is a stack env and would be
  84    unnecessary if C had nested functions.  V is the node to start
  85    searching from.  */
  86
  87 static void
  88 searchc (struct searchc_env* env, struct cgraph_node *v,
  89          bool (*ignore_edge) (struct cgraph_edge *))
  90 {
  91   struct cgraph_edge *edge;
  92   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
  93
  94   /* mark node as old */
  95   v_info->new_node = false;
  96   splay_tree_remove (env->nodes_marked_new, v->get_uid ());
  97
  98   v_info->dfn_number = env->count;
  99   v_info->low_link = env->count;
 100   env->count++;
 101   env->stack[(env->stack_size)++] = v;
 102   v_info->on_stack = true;
 103
 104   for (edge = v->callees; edge; edge = edge->next_callee)
 105     {
 106       struct ipa_dfs_info * w_info;
 107       enum availability avail;
 108       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
 109
 110       if (!w || (ignore_edge && ignore_edge (edge)))
 111         continue;
 112
 113       if (w->aux
 114           && (avail >= AVAIL_INTERPOSABLE))
 115         {
 116           w_info = (struct ipa_dfs_info *) w->aux;
 117           if (w_info->new_node)
 118             {
 119               searchc (env, w, ignore_edge);
 120               v_info->low_link =
 121                 (v_info->low_link < w_info->low_link) ?
 122                 v_info->low_link : w_info->low_link;
 123             }
 124           else
 125             if ((w_info->dfn_number < v_info->dfn_number)
 126                 && (w_info->on_stack))
 127               v_info->low_link =
 128                 (w_info->dfn_number < v_info->low_link) ?
 129                 w_info->dfn_number : v_info->low_link;
 130         }
 131     }
 132
 133
 134   if (v_info->low_link == v_info->dfn_number)
 135     {
 136       struct cgraph_node *last = NULL;
 137       struct cgraph_node *x;
 138       struct ipa_dfs_info *x_info;
 139       do {
 140         x = env->stack[--(env->stack_size)];
 141         x_info = (struct ipa_dfs_info *) x->aux;
 142         x_info->on_stack = false;
 143         x_info->scc_no = v_info->dfn_number;
 144
 145         if (env->reduce)
 146           {
 147             x_info->next_cycle = last;
 148             last = x;
 149           }
 150         else
 151           env->result[env->order_pos++] = x;
 152       }
 153       while (v != x);
 154       if (env->reduce)
 155         env->result[env->order_pos++] = v;
 156     }
 157 }
 158
 159 /* Topsort the call graph by caller relation.  Put the result in ORDER.
 160
 161    The REDUCE flag is true if you want the cycles reduced to single nodes.
 162    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
 163    call graph nodes in a reduced node.
 164
 165    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
 166    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
 167    for the topological sort.   */
 168
 169 int
 170 ipa_reduced_postorder (struct cgraph_node **order,
 171                        bool reduce,
 172                        bool (*ignore_edge) (struct cgraph_edge *))
 173 {
 174   struct cgraph_node *node;
 175   struct searchc_env env;
 176   splay_tree_node result;
 177   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 178   env.stack_size = 0;
 179   env.result = order;
 180   env.order_pos = 0;
 181   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
 182   env.count = 1;
 183   env.reduce = reduce;
 184
 185   FOR_EACH_DEFINED_FUNCTION (node)
 186     {
 187       enum availability avail = node->get_availability ();
 188
 189       if (avail > AVAIL_INTERPOSABLE
 190           || avail == AVAIL_INTERPOSABLE)
 191         {
 192           /* Reuse the info if it is already there.  */
 193           struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
 194           if (!info)
 195             info = XCNEW (struct ipa_dfs_info);
 196           info->new_node = true;
 197           info->on_stack = false;
 198           info->next_cycle = NULL;
 199           node->aux = info;
 200
 201           splay_tree_insert (env.nodes_marked_new,
 202                              (splay_tree_key)node->get_uid (),
 203                              (splay_tree_value)node);
 204         }
 205       else
 206         node->aux = NULL;
 207     }
 208   result = splay_tree_min (env.nodes_marked_new);
 209   while (result)
 210     {
 211       node = (struct cgraph_node *)result->value;
 212       searchc (&env, node, ignore_edge);
 213       result = splay_tree_min (env.nodes_marked_new);
 214     }
 215   splay_tree_delete (env.nodes_marked_new);
 216   free (env.stack);
 217
 218   return env.order_pos;
 219 }
 220
 221 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
 222    graph nodes.  */
 223
 224 void
 225 ipa_free_postorder_info (void)
 226 {
 227   struct cgraph_node *node;
 228   FOR_EACH_DEFINED_FUNCTION (node)
 229     {
 230       /* Get rid of the aux information.  */
 231       if (node->aux)
 232         {
 233           free (node->aux);
 234           node->aux = NULL;
 235         }
 236     }
 237 }
 238
 239 /* Get the set of nodes for the cycle in the reduced call graph starting
 240    from NODE.  */
 241
 242 vec<cgraph_node *>
 243 ipa_get_nodes_in_cycle (struct cgraph_node *node)
 244 {
 245   vec<cgraph_node *> v = vNULL;
 246   struct ipa_dfs_info *node_dfs_info;
 247   while (node)
 248     {
 249       v.safe_push (node);
 250       node_dfs_info = (struct ipa_dfs_info *) node->aux;
 251       node = node_dfs_info->next_cycle;
 252     }
 253   return v;
 254 }
 255
 256 /* Return true iff the CS is an edge within a strongly connected component as
 257    computed by ipa_reduced_postorder.  */
 258
 259 bool
 260 ipa_edge_within_scc (struct cgraph_edge *cs)
 261 {
 262   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
 263   struct ipa_dfs_info *callee_dfs;
 264   struct cgraph_node *callee = cs->callee->function_symbol ();
 265
 266   callee_dfs = (struct ipa_dfs_info *) callee->aux;
 267   return (caller_dfs
 268           && callee_dfs
 269           && caller_dfs->scc_no == callee_dfs->scc_no);
 270 }
 271
 272 struct postorder_stack
 273 {
 274   struct cgraph_node *node;
 275   struct cgraph_edge *edge;
 276   int ref;
 277 };
 278
 279 /* Fill array order with all nodes with output flag set in the reverse
 280    topological order.  Return the number of elements in the array.
 281    FIXME: While walking, consider aliases, too.  */
 282
 283 int
 284 ipa_reverse_postorder (struct cgraph_node **order)
 285 {
 286   struct cgraph_node *node, *node2;
 287   int stack_size = 0;
 288   int order_pos = 0;
 289   struct cgraph_edge *edge;
 290   int pass;
 291   struct ipa_ref *ref = NULL;
 292
 293   struct postorder_stack *stack =
 294     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
 295
 296   /* We have to deal with cycles nicely, so use a depth first traversal
 297      output algorithm.  Ignore the fact that some functions won't need
 298      to be output and put them into order as well, so we get dependencies
 299      right through inline functions.  */
 300   FOR_EACH_FUNCTION (node)
 301     node->aux = NULL;
 302   for (pass = 0; pass < 2; pass++)
 303     FOR_EACH_FUNCTION (node)
 304       if (!node->aux
 305           && (pass
 306               || (!node->address_taken
 307                   && !node->inlined_to
 308                   && !node->alias && !node->thunk
 309                   && !node->only_called_directly_p ())))
 310         {
 311           stack_size = 0;
 312           stack[stack_size].node = node;
 313           stack[stack_size].edge = node->callers;
 314           stack[stack_size].ref = 0;
 315           node->aux = (void *)(size_t)1;
 316           while (stack_size >= 0)
 317             {
 318               while (true)
 319                 {
 320                   node2 = NULL;
 321                   while (stack[stack_size].edge && !node2)
 322                     {
 323                       edge = stack[stack_size].edge;
 324                       node2 = edge->caller;
 325                       stack[stack_size].edge = edge->next_caller;
 326                     }
 327                   for (; stack[stack_size].node->iterate_referring (
 328                                                        stack[stack_size].ref,
 329                                                        ref) && !node2;
 330                        stack[stack_size].ref++)
 331                     {
 332                       if (ref->use == IPA_REF_ALIAS)
 333                         node2 = dyn_cast <cgraph_node *> (ref->referring);
 334                     }
 335                   if (!node2)
 336                     break;
 337                   if (!node2->aux)
 338                     {
 339                       stack[++stack_size].node = node2;
 340                       stack[stack_size].edge = node2->callers;
 341                       stack[stack_size].ref = 0;
 342                       node2->aux = (void *)(size_t)1;
 343                     }
 344                 }
 345               order[order_pos++] = stack[stack_size--].node;
 346             }
 347         }
 348   free (stack);
 349   FOR_EACH_FUNCTION (node)
 350     node->aux = NULL;
 351   return order_pos;
 352 }
 353
 354
 355
 356 /* Given a memory reference T, will return the variable at the bottom
 357    of the access.  Unlike get_base_address, this will recurse through
 358    INDIRECT_REFS.  */
 359
 360 tree
 361 get_base_var (tree t)
 362 {
 363   while (!SSA_VAR_P (t)
 364          && (!CONSTANT_CLASS_P (t))
 365          && TREE_CODE (t) != LABEL_DECL
 366          && TREE_CODE (t) != FUNCTION_DECL
 367          && TREE_CODE (t) != CONST_DECL
 368          && TREE_CODE (t) != CONSTRUCTOR)
 369     {
 370       t = TREE_OPERAND (t, 0);
 371     }
 372   return t;
 373 }
 374
 375 /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count.  */
 376
 377 void
 378 scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
 379 {
 380   profile_count to = node->count;
 381   profile_count::adjust_for_ipa_scaling (&to, &orig_count);
 382   struct cgraph_edge *e;
 383
 384   for (e = node->callees; e; e = e->next_callee)
 385     e->count = e->count.apply_scale (to, orig_count);
 386   for (e = node->indirect_calls; e; e = e->next_callee)
 387     e->count = e->count.apply_scale (to, orig_count);
 388 }
 389
 390 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
 391    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
 392    unless PRESERVE_BODY is set.  */
 393
 394 void
 395 ipa_merge_profiles (struct cgraph_node *dst,
 396                     struct cgraph_node *src,
 397                     bool preserve_body)
 398 {
 399   tree oldsrcdecl = src->decl;
 400   struct function *srccfun, *dstcfun;
 401   bool match = true;
 402   bool copy_counts = false;
 403
 404   if (!src->definition
 405       || !dst->definition)
 406     return;
 407
 408   if (src->frequency < dst->frequency)
 409     src->frequency = dst->frequency;
 410
 411   /* Time profiles are merged.  */
 412   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
 413     dst->tp_first_run = src->tp_first_run;
 414
 415   if (src->profile_id && !dst->profile_id)
 416     dst->profile_id = src->profile_id;
 417
 418   /* Merging zero profile to dst is no-op.  */
 419   if (src->count.ipa () == profile_count::zero ())
 420     return;
 421
 422   /* FIXME when we merge in unknown profile, we ought to set counts as
 423      unsafe.  */
 424   if (!src->count.initialized_p ()
 425       || !(src->count.ipa () == src->count))
 426     return;
 427   profile_count orig_count = dst->count;
 428
 429   /* Either sum the profiles if both are IPA and not global0, or
 430      pick more informative one (that is nonzero IPA if other is
 431      uninitialized, guessed or global0).   */
 432
 433   if ((dst->count.ipa ().nonzero_p ()
 434        || src->count.ipa ().nonzero_p ())
 435       && dst->count.ipa ().initialized_p ()
 436       && src->count.ipa ().initialized_p ())
 437     dst->count = dst->count.ipa () + src->count.ipa ();
 438   else if (dst->count.ipa ().initialized_p ())
 439     ;
 440   else if (src->count.ipa ().initialized_p ())
 441     {
 442       copy_counts = true;
 443       dst->count = src->count.ipa ();
 444     }
 445
 446   /* If no updating needed return early.  */
 447   if (dst->count == orig_count)
 448     return;
 449
 450   if (symtab->dump_file)
 451     {
 452       fprintf (symtab->dump_file, "Merging profiles of %s count:",
 453                src->dump_name ());
 454       src->count.dump (symtab->dump_file);
 455       fprintf (symtab->dump_file, " to %s count:",
 456                dst->dump_name ());
 457       orig_count.dump (symtab->dump_file);
 458       fprintf (symtab->dump_file, " resulting count:");
 459       dst->count.dump (symtab->dump_file);
 460       fprintf (symtab->dump_file, "\n");
 461     }
 462
 463   /* First handle functions with no gimple body.  */
 464   if (dst->thunk || dst->alias
 465       || src->thunk || src->alias)
 466     {
 467       scale_ipa_profile_for_fn (dst, orig_count);
 468       return;
 469     }
 470
 471   /* This is ugly.  We need to get both function bodies into memory.
 472      If declaration is merged, we need to duplicate it to be able
 473      to load body that is being replaced.  This makes symbol table
 474      temporarily inconsistent.  */
 475   if (src->decl == dst->decl)
 476     {
 477       struct lto_in_decl_state temp;
 478       struct lto_in_decl_state *state;
 479
 480       /* We are going to move the decl, we want to remove its file decl data.
 481          and link these with the new decl. */
 482       temp.fn_decl = src->decl;
 483       lto_in_decl_state **slot
 484         = src->lto_file_data->function_decl_states->find_slot (&temp,
 485                                                                NO_INSERT);
 486       state = *slot;
 487       src->lto_file_data->function_decl_states->clear_slot (slot);
 488       gcc_assert (state);
 489
 490       /* Duplicate the decl and be sure it does not link into body of DST.  */
 491       src->decl = copy_node (src->decl);
 492       DECL_STRUCT_FUNCTION (src->decl) = NULL;
 493       DECL_ARGUMENTS (src->decl) = NULL;
 494       DECL_INITIAL (src->decl) = NULL;
 495       DECL_RESULT (src->decl) = NULL;
 496
 497       /* Associate the decl state with new declaration, so LTO streamer
 498          can look it up.  */
 499       state->fn_decl = src->decl;
 500       slot
 501         = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
 502       gcc_assert (!*slot);
 503       *slot = state;
 504     }
 505   src->get_untransformed_body ();
 506   dst->get_untransformed_body ();
 507   srccfun = DECL_STRUCT_FUNCTION (src->decl);
 508   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
 509   if (n_basic_blocks_for_fn (srccfun)
 510       != n_basic_blocks_for_fn (dstcfun))
 511     {
 512       if (symtab->dump_file)
 513         fprintf (symtab->dump_file,
 514                  "Giving up; number of basic block mismatch.\n");
 515       match = false;
 516     }
 517   else if (last_basic_block_for_fn (srccfun)
 518            != last_basic_block_for_fn (dstcfun))
 519     {
 520       if (symtab->dump_file)
 521         fprintf (symtab->dump_file,
 522                  "Giving up; last block mismatch.\n");
 523       match = false;
 524     }
 525   else
 526     {
 527       basic_block srcbb, dstbb;
 528       struct cgraph_edge *e, *e2;
 529
 530       for (e = dst->callees, e2 = src->callees; e && e2 && match;
 531            e2 = e2->next_callee, e = e->next_callee)
 532         {
 533           if (gimple_bb (e->call_stmt)->index
 534               != gimple_bb (e2->call_stmt)->index)
 535             {
 536               if (symtab->dump_file)
 537                 fprintf (symtab->dump_file,
 538                          "Giving up; call stmt mismatch.\n");
 539               match = false;
 540             }
 541         }
 542       if (e || e2)
 543         {
 544           if (symtab->dump_file)
 545             fprintf (symtab->dump_file,
 546                      "Giving up; number of calls differs.\n");
 547           match = false;
 548         }
 549       for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
 550            e2 = e2->next_callee, e = e->next_callee)
 551         {
 552           if (gimple_bb (e->call_stmt)->index
 553               != gimple_bb (e2->call_stmt)->index)
 554             {
 555               if (symtab->dump_file)
 556                 fprintf (symtab->dump_file,
 557                          "Giving up; indirect call stmt mismatch.\n");
 558               match = false;
 559             }
 560         }
 561       if (e || e2)
 562         {
 563           if (symtab->dump_file)
 564             fprintf (symtab->dump_file,
 565                      "Giving up; number of indirect calls differs.\n");
 566           match=false;
 567         }
 568
 569       if (match)
 570         FOR_ALL_BB_FN (srcbb, srccfun)
 571           {
 572             unsigned int i;
 573
 574             dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 575             if (dstbb == NULL)
 576               {
 577                 if (symtab->dump_file)
 578                   fprintf (symtab->dump_file,
 579                            "No matching block for bb %i.\n",
 580                            srcbb->index);
 581                 match = false;
 582                 break;
 583               }
 584             if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
 585               {
 586                 if (symtab->dump_file)
 587                   fprintf (symtab->dump_file,
 588                            "Edge count mismatch for bb %i.\n",
 589                            srcbb->index);
 590                 match = false;
 591                 break;
 592               }
 593             for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 594               {
 595                 edge srce = EDGE_SUCC (srcbb, i);
 596                 edge dste = EDGE_SUCC (dstbb, i);
 597                 if (srce->dest->index != dste->dest->index)
 598                   {
 599                     if (symtab->dump_file)
 600                       fprintf (symtab->dump_file,
 601                                "Succ edge mismatch for bb %i.\n",
 602                                srce->dest->index);
 603                     match = false;
 604                     break;
 605                   }
 606               }
 607           }
 608     }
 609   if (match)
 610     {
 611       struct cgraph_edge *e, *e2;
 612       basic_block srcbb, dstbb;
 613
 614       /* Function and global profile may be out of sync.  First scale it same
 615          way as fixup_cfg would.  */
 616       profile_count srcnum = src->count;
 617       profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
 618       bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
 619       profile_count dstnum = orig_count;
 620       profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
 621       bool dstscale = !copy_counts
 622                       && dstnum.initialized_p () && !(dstnum == dstden);
 623
 624       /* TODO: merge also statement histograms.  */
 625       FOR_ALL_BB_FN (srcbb, srccfun)
 626         {
 627           unsigned int i;
 628
 629           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 630
 631           profile_count srccount = srcbb->count;
 632           if (srcscale)
 633             srccount = srccount.apply_scale (srcnum, srcden);
 634           if (dstscale)
 635             dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
 636
 637           if (copy_counts)
 638             {
 639               dstbb->count = srccount;
 640               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 641                 {
 642                   edge srce = EDGE_SUCC (srcbb, i);
 643                   edge dste = EDGE_SUCC (dstbb, i);
 644                   if (srce->probability.initialized_p ())
 645                     dste->probability = srce->probability;
 646                 }
 647             }
 648           else
 649             {
 650               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 651                 {
 652                   edge srce = EDGE_SUCC (srcbb, i);
 653                   edge dste = EDGE_SUCC (dstbb, i);
 654                   dste->probability =
 655                     dste->probability * dstbb->count.ipa ().probability_in
 656                                                  (dstbb->count.ipa ()
 657                                                   + srccount.ipa ())
 658                     + srce->probability * srcbb->count.ipa ().probability_in
 659                                                  (dstbb->count.ipa ()
 660                                                   + srccount.ipa ());
 661                 }
 662               dstbb->count = dstbb->count.ipa () + srccount.ipa ();
 663             }
 664         }
 665       push_cfun (dstcfun);
 666       update_max_bb_count ();
 667       compute_function_frequency ();
 668       pop_cfun ();
 669       for (e = dst->callees; e; e = e->next_callee)
 670         {
 671           if (e->speculative)
 672             continue;
 673           e->count = gimple_bb (e->call_stmt)->count;
 674         }
 675       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
 676            e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
 677         {
 678           if (!e->speculative && !e2->speculative)
 679             {
 680               /* FIXME: we need to also merge ipa-profile histograms
 681                  because with LTO merging happens from lto-symtab before
 682                  these are converted to indirect edges.  */
 683               e->count = gimple_bb (e->call_stmt)->count;
 684               continue;
 685             }
 686
 687           /* When copying just remove all speuclations on dst and then copy
 688              one from src.  */
 689           if (copy_counts)
 690             {
 691               while (e->speculative)
 692                 cgraph_edge::resolve_speculation (e, NULL);
 693               e->count = gimple_bb (e->call_stmt)->count;
 694               if (e2->speculative)
 695                 {
 696                   for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 697                        e3;
 698                        e3 = e3->next_speculative_call_target ())
 699                     {
 700                       cgraph_edge *ns;
 701                       ns = e->make_speculative
 702                          (dyn_cast <cgraph_node *>
 703                             (e3->speculative_call_target_ref ()->referred),
 704                              e3->count, e3->speculative_id);
 705                       /* Target may differ from ref (for example it may be
 706                          redirected to local alias.  */
 707                       ns->redirect_callee (e3->callee);
 708                     }
 709                 }
 710               continue;
 711             }
 712
 713           /* Iterate all speculations in SRC, see if corresponding ones exist
 714              int DST and if so, sum the counts.  Otherwise create new
 715              speculation.  */
 716           int max_spec = 0;
 717           for (cgraph_edge *e3 = e->first_speculative_call_target ();
 718                e3;
 719                e3 = e3->next_speculative_call_target ())
 720             if (e3->speculative_id > max_spec)
 721               max_spec = e3->speculative_id;
 722           for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 723                e3;
 724                e3 = e3->next_speculative_call_target ())
 725             {
 726               cgraph_edge *te
 727                  = e->speculative_call_for_target
 728                          (dyn_cast <cgraph_node *>
 729                             (e3->speculative_call_target_ref ()->referred));
 730               if (te)
 731                 te->count = te->count + e3->count;
 732               else
 733                 {
 734                   e->count = e->count + e3->count;
 735                   cgraph_edge *ns;
 736                   ns = e->make_speculative
 737                          (dyn_cast <cgraph_node *>
 738                             (e3->speculative_call_target_ref ()
 739                              ->referred),
 740                           e3->count,
 741                           e3->speculative_id + max_spec + 1);
 742                   /* Target may differ from ref (for example it may be
 743                      redirected to local alias.  */
 744                   ns->redirect_callee (e3->callee);
 745                 }
 746             }
 747         }
 748       if (!preserve_body)
 749         src->release_body ();
 750       /* Update summary.  */
 751       compute_fn_summary (dst, 0);
 752     }
 753   /* We can't update CFG profile, but we can scale IPA profile. CFG
 754      will be scaled according to dst->count after IPA passes.  */
 755   else
 756     scale_ipa_profile_for_fn (dst, orig_count);
 757   src->decl = oldsrcdecl;
 758 }
 759
 760 /* Return true if call to DEST is known to be self-recusive
 761    call withing FUNC.  */
 762
 763 bool
 764 recursive_call_p (tree func, tree dest)
 765 {
 766   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
 767   struct cgraph_node *cnode = cgraph_node::get_create (func);
 768   ipa_ref *alias;
 769   enum availability avail;
 770
 771   gcc_assert (!cnode->alias);
 772   if (cnode != dest_node->ultimate_alias_target (&avail))
 773     return false;
 774   if (avail >= AVAIL_AVAILABLE)
 775     return true;
 776   if (!dest_node->semantically_equivalent_p (cnode))
 777     return false;
 778   /* If there is only one way to call the fuction or we know all of them
 779      are semantically equivalent, we still can consider call recursive.  */
 780   FOR_EACH_ALIAS (cnode, alias)
 781     if (!dest_node->semantically_equivalent_p (alias->referring))
 782       return false;
 783   return true;
 784 }
 785
 786 /* Return true if stmt may terminate execution of function.
 787    If assume_return_or_eh we can further assume that the function ends
 788    either by retrn statement or EH (no trapping or infinite loops).  */
 789
 790 bool
 791 stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
 792 {
 793   if (stmt_can_throw_external (fun, stmt))
 794     return true;
 795   if (assume_return_or_eh)
 796     return false;
 797   gasm *astmt = dyn_cast <gasm *> (stmt);
 798   if (astmt && gimple_asm_volatile_p (astmt))
 799     return true;
 800   if (gimple_could_trap_p (stmt))
 801     return true;
 802   if (gcall *call = dyn_cast <gcall *> (stmt))
 803     {
 804       int flags = gimple_call_flags (call);
 805       if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
 806         return false;
 807       modref_summary *s = get_modref_function_summary (call, NULL);
 808       if (s && !s->side_effects)
 809         return false;
 810       return true;
 811     }
 812   return false;
 813 }
 814
 815 /* Return bitmap of all basic blocks whose first statements are known to
 816    execute on every invocation of the function.
 817
 818    If assume_return_or_eh we can further assume that the function ends
 819    either by retrn statement or EH (no trapping or infinite loops).
 820    This is useful when sumarizing function in passes like ipa-modref.
 821
 822    Seeing assume_return_or_eh to false is used to prove that given
 823    statmeent will be executed even if the function gets into infinite
 824    loop or trap.  */
 825 bitmap
 826 find_always_executed_bbs (function *fun, bool assume_return_or_eh)
 827 {
 828   auto_vec<basic_block, 20> stack;
 829   auto_vec<basic_block, 20> terminating_bbs;
 830   hash_set<basic_block> visited;
 831   hash_set<basic_block> terminating_bbs_set;
 832   edge e;
 833   edge_iterator ei;
 834   int flags = flags_from_decl_or_type (fun->decl);
 835   /* PUre and const functions always return.  */
 836   assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
 837   if (!assume_return_or_eh)
 838     mark_dfs_back_edges (fun);
 839
 840   /* First walk all BBs reachable from entry stopping on statements that may
 841      terminate execution.  Everything past this statement is not going to be executed
 842      each invocation.  */
 843   stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
 844   while (!stack.is_empty ())
 845     {
 846       basic_block bb = stack.pop ();
 847       bool found = false, found_exit = false;
 848       if (bb->index == EXIT_BLOCK)
 849         continue;
 850       FOR_EACH_EDGE (e, ei, bb->succs)
 851         {
 852           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
 853             {
 854               found_exit = true;
 855               break;
 856             }
 857           /* Watch for infinite loops.  */
 858           if (!found
 859               && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
 860             {
 861               if (!dom_info_available_p (CDI_DOMINATORS))
 862                 calculate_dominance_info (CDI_DOMINATORS);
 863               /* If this is not a loop latch edge it is an irreducible region.
 864                  Assume that it is infinite.
 865                  TODO: with C++ forced progression we can still walk the
 866                  irreducible region and see if it contains any side effects.
 867                  Similarly for loops.  -ffinite-loops does not really imply
 868                  this since we allow inlining across -ffinite-loops bondary
 869                  and thus it can be used only as a loop flag.  */
 870               if (e->dest->loop_father->header != e->dest
 871                   || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
 872                 found = true;
 873               else if (!finite_loop_p (e->dest->loop_father))
 874                 found = true;
 875             }
 876         }
 877       if (!assume_return_or_eh
 878           && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
 879         found = true;
 880       for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
 881            !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
 882         if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
 883           {
 884             found = true;
 885             break;
 886           }
 887       if (found)
 888         {
 889           visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
 890           if (!found_exit)
 891             {
 892               terminating_bbs.safe_push (bb);
 893               terminating_bbs_set.add (bb);
 894             }
 895         }
 896       else
 897         FOR_EACH_EDGE (e, ei, bb->succs)
 898           if (!visited.add (e->dest))
 899             stack.safe_push (e->dest);
 900     }
 901
 902   /* Next walk from exit block and find all articulations in the CFG.
 903      Add all terminating basic blocks as "fake" predecessors of the
 904      exit block.  */
 905
 906   bitmap ret = BITMAP_ALLOC (NULL);
 907   /* A degenerated case when there is no path to exit.  */
 908   if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
 909     {
 910       bitmap_set_bit (ret,
 911                       single_succ_edge
 912                         (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
 913       return ret;
 914     }
 915
 916   struct astate
 917   {
 918     unsigned int dfs_preorder;
 919     unsigned int dfs_postorder;
 920
 921     unsigned int low, high;
 922   };
 923
 924   struct worklist
 925   {
 926     basic_block bb;
 927     astate *cstate;
 928   };
 929
 930   struct obstack state_obstack;
 931   gcc_obstack_init (&state_obstack);
 932   hash_map<basic_block, astate *> state;
 933   auto_vec<worklist, 32> worklist_vec;
 934   unsigned int next_dfs_num = 1;
 935
 936   /* Always executed blocks are blocks that are on every path from entry to exit.
 937      We proceed in two steps.  First we do backward DFS walk (so we know that entry
 938      is always reached) and record preorder and postorder visiting times.
 939
 940      In second step we proceed in postorder and for every block A we compute
 941      minimal preorder (A.low) and maximal postorder (A.high) of block reachable
 942      from the BBs in DFS subtree of A.  If A is always executed there are no
 943      edges out of this subtree.  This can be tested by checking that A.low == A.preorder
 944      and B.high == A.postorder.
 945
 946      This is first step. Do backward DFS walk and record preorder, postorder
 947      and predecessor info.  Initialize stack in postorder.  */
 948   worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
 949   worklist_vec.safe_push (we);
 950   while (!worklist_vec.is_empty ())
 951     {
 952       worklist &w = worklist_vec.last ();
 953       basic_block bb = w.bb;
 954       astate *cstate = w.cstate;
 955
 956       if (!cstate)
 957         {
 958           astate **slot = &state.get_or_insert (bb);
 959
 960           cstate = *slot;
 961           /* Already processed by DFS?  */
 962           if (cstate)
 963             {
 964               worklist_vec.pop ();
 965               continue;
 966             }
 967           /* DFS is visiting BB for first time.  */
 968           *slot = cstate = XOBNEW (&state_obstack, struct astate);
 969           cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
 970           w.cstate = cstate;
 971           /* Exit block is special; process all fake edges we identified.  */
 972           if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
 973             for (basic_block bb2 : terminating_bbs)
 974               {
 975                 worklist we = {bb2, NULL};
 976                 worklist_vec.safe_push (we);
 977               }
 978           FOR_EACH_EDGE (e, ei, bb->preds)
 979             if (visited.contains (e->src))
 980               {
 981                 worklist we = {e->src, NULL};
 982                 worklist_vec.safe_push (we);
 983               }
 984           /* Keep BB on worklist so we process it last time.  */
 985           continue;
 986         }
 987       /* We are finished with processing reachable BBs, see if we have articulation.  */
 988       worklist_vec.pop ();
 989       cstate->high = cstate->dfs_postorder = next_dfs_num++;
 990       stack.safe_push (bb);
 991     }
 992   /* This is the final postorder walk.  Determine low and high values and mark
 993      always executed blocks.  */
 994   for (basic_block bb : stack)
 995     {
 996       astate *cstate = *state.get (bb);
 997       FOR_EACH_EDGE (e, ei, bb->preds)
 998         {
 999           astate **cstate2 = state.get (e->src);
1000           /* We skip walking part of CFG reached only after first edge to exit.
1001              No BB reachable from the skipped part is always executed */
1002           if (!cstate2)
1003             {
1004               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1005                 cstate->low = 0;
1006               continue;
1007             }
1008           cstate->low = MIN (cstate->low, (*cstate2)->low);
1009           cstate->high = MAX (cstate->high, (*cstate2)->high);
1010         }
1011       if (dump_file && (dump_flags & TDF_DETAILS) && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1012         fprintf (dump_file, "BB %i %s preorder %i posorder %i low %i high %i\n",
1013                  bb->index, terminating_bbs_set.contains (bb) ? "(terminating)": "",
1014                  cstate->dfs_preorder, cstate->dfs_postorder, cstate->low, cstate->high);
1015       if (cstate->low == cstate->dfs_preorder && cstate->high == cstate->dfs_postorder
1016           && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1017         bitmap_set_bit (ret, bb->index);
1018       if (terminating_bbs_set.contains (bb))
1019         cstate->low = 0;
1020       else
1021         FOR_EACH_EDGE (e, ei, bb->succs)
1022           {
1023             astate **cstate2 = state.get (e->dest);
1024             if (!cstate2)
1025               continue;
1026             cstate->low = MIN (cstate->low, (*cstate2)->low);
1027             cstate->high = MAX (cstate->high, (*cstate2)->high);
1028           }
1029       }
1030   obstack_free (&state_obstack, NULL);
1031   if (dump_file)
1032     {
1033       fprintf (dump_file, "Always executed bbbs %s: ",
1034                assume_return_or_eh ? "(assuming return or EH)": "");
1035       bitmap_print (dump_file, ret, "", "\n");
1036     }
1037
1038   return ret;
1039 }