gcc/ipa-utils.cc

   1 /* Utilities for ipa analysis.
   2    Copyright (C) 2005-2024 Free Software Foundation, Inc.
   3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #define INCLUDE_MEMORY
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "predict.h"
  29 #include "alloc-pool.h"
  30 #include "cgraph.h"
  31 #include "lto-streamer.h"
  32 #include "dumpfile.h"
  33 #include "splay-tree.h"
  34 #include "ipa-utils.h"
  35 #include "symbol-summary.h"
  36 #include "tree-vrp.h"
  37 #include "sreal.h"
  38 #include "ipa-cp.h"
  39 #include "ipa-prop.h"
  40 #include "ipa-fnsummary.h"
  41 #include "tree-eh.h"
  42 #include "gimple-iterator.h"
  43 #include "ipa-modref-tree.h"
  44 #include "ipa-modref.h"
  45 #include "tree-ssa-loop-niter.h"
  46 #include "calls.h"
  47 #include "cfgloop.h"
  48 #include "cfganal.h"
  49
  50 /* Debugging function for postorder and inorder code. NOTE is a string
  51    that is printed before the nodes are printed.  ORDER is an array of
  52    cgraph_nodes that has COUNT useful nodes in it.  */
  53
  54 void
  55 ipa_print_order (FILE* out,
  56                  const char * note,
  57                  struct cgraph_node** order,
  58                  int count)
  59 {
  60   int i;
  61   fprintf (out, "\n\n ordered call graph: %s\n", note);
  62
  63   for (i = count - 1; i >= 0; i--)
  64     order[i]->dump (out);
  65   fprintf (out, "\n");
  66   fflush (out);
  67 }
  68
  69
  70 struct searchc_env {
  71   struct cgraph_node **stack;
  72   struct cgraph_node **result;
  73   int stack_size;
  74   int order_pos;
  75   splay_tree nodes_marked_new;
  76   bool reduce;
  77   int count;
  78 };
  79
  80 /* This is an implementation of Tarjan's strongly connected region
  81    finder as reprinted in Aho Hopcraft and Ullman's The Design and
  82    Analysis of Computer Programs (1975) pages 192-193.  This version
  83    has been customized for cgraph_nodes.  The env parameter is because
  84    it is recursive and there are no nested functions here.  This
  85    function should only be called from itself or
  86    ipa_reduced_postorder.  ENV is a stack env and would be
  87    unnecessary if C had nested functions.  V is the node to start
  88    searching from.  */
  89
  90 static void
  91 searchc (struct searchc_env* env, struct cgraph_node *v,
  92          bool (*ignore_edge) (struct cgraph_edge *))
  93 {
  94   struct cgraph_edge *edge;
  95   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
  96
  97   /* mark node as old */
  98   v_info->new_node = false;
  99   splay_tree_remove (env->nodes_marked_new, v->get_uid ());
 100
 101   v_info->dfn_number = env->count;
 102   v_info->low_link = env->count;
 103   env->count++;
 104   env->stack[(env->stack_size)++] = v;
 105   v_info->on_stack = true;
 106
 107   for (edge = v->callees; edge; edge = edge->next_callee)
 108     {
 109       struct ipa_dfs_info * w_info;
 110       enum availability avail;
 111       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
 112
 113       if (!w || (ignore_edge && ignore_edge (edge)))
 114         continue;
 115
 116       if (w->aux
 117           && (avail >= AVAIL_INTERPOSABLE))
 118         {
 119           w_info = (struct ipa_dfs_info *) w->aux;
 120           if (w_info->new_node)
 121             {
 122               searchc (env, w, ignore_edge);
 123               v_info->low_link =
 124                 (v_info->low_link < w_info->low_link) ?
 125                 v_info->low_link : w_info->low_link;
 126             }
 127           else
 128             if ((w_info->dfn_number < v_info->dfn_number)
 129                 && (w_info->on_stack))
 130               v_info->low_link =
 131                 (w_info->dfn_number < v_info->low_link) ?
 132                 w_info->dfn_number : v_info->low_link;
 133         }
 134     }
 135
 136
 137   if (v_info->low_link == v_info->dfn_number)
 138     {
 139       struct cgraph_node *last = NULL;
 140       struct cgraph_node *x;
 141       struct ipa_dfs_info *x_info;
 142       do {
 143         x = env->stack[--(env->stack_size)];
 144         x_info = (struct ipa_dfs_info *) x->aux;
 145         x_info->on_stack = false;
 146         x_info->scc_no = v_info->dfn_number;
 147
 148         if (env->reduce)
 149           {
 150             x_info->next_cycle = last;
 151             last = x;
 152           }
 153         else
 154           env->result[env->order_pos++] = x;
 155       }
 156       while (v != x);
 157       if (env->reduce)
 158         env->result[env->order_pos++] = v;
 159     }
 160 }
 161
 162 /* Topsort the call graph by caller relation.  Put the result in ORDER.
 163
 164    The REDUCE flag is true if you want the cycles reduced to single nodes.
 165    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
 166    call graph nodes in a reduced node.
 167
 168    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
 169    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
 170    for the topological sort.   */
 171
 172 int
 173 ipa_reduced_postorder (struct cgraph_node **order,
 174                        bool reduce,
 175                        bool (*ignore_edge) (struct cgraph_edge *))
 176 {
 177   struct cgraph_node *node;
 178   struct searchc_env env;
 179   splay_tree_node result;
 180   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 181   env.stack_size = 0;
 182   env.result = order;
 183   env.order_pos = 0;
 184   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
 185   env.count = 1;
 186   env.reduce = reduce;
 187
 188   FOR_EACH_DEFINED_FUNCTION (node)
 189     {
 190       enum availability avail = node->get_availability ();
 191
 192       if (avail > AVAIL_INTERPOSABLE
 193           || avail == AVAIL_INTERPOSABLE)
 194         {
 195           /* Reuse the info if it is already there.  */
 196           struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
 197           if (!info)
 198             info = XCNEW (struct ipa_dfs_info);
 199           info->new_node = true;
 200           info->on_stack = false;
 201           info->next_cycle = NULL;
 202           node->aux = info;
 203
 204           splay_tree_insert (env.nodes_marked_new,
 205                              (splay_tree_key)node->get_uid (),
 206                              (splay_tree_value)node);
 207         }
 208       else
 209         node->aux = NULL;
 210     }
 211   result = splay_tree_min (env.nodes_marked_new);
 212   while (result)
 213     {
 214       node = (struct cgraph_node *)result->value;
 215       searchc (&env, node, ignore_edge);
 216       result = splay_tree_min (env.nodes_marked_new);
 217     }
 218   splay_tree_delete (env.nodes_marked_new);
 219   free (env.stack);
 220
 221   return env.order_pos;
 222 }
 223
 224 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
 225    graph nodes.  */
 226
 227 void
 228 ipa_free_postorder_info (void)
 229 {
 230   struct cgraph_node *node;
 231   FOR_EACH_DEFINED_FUNCTION (node)
 232     {
 233       /* Get rid of the aux information.  */
 234       if (node->aux)
 235         {
 236           free (node->aux);
 237           node->aux = NULL;
 238         }
 239     }
 240 }
 241
 242 /* Get the set of nodes for the cycle in the reduced call graph starting
 243    from NODE.  */
 244
 245 vec<cgraph_node *>
 246 ipa_get_nodes_in_cycle (struct cgraph_node *node)
 247 {
 248   vec<cgraph_node *> v = vNULL;
 249   struct ipa_dfs_info *node_dfs_info;
 250   while (node)
 251     {
 252       v.safe_push (node);
 253       node_dfs_info = (struct ipa_dfs_info *) node->aux;
 254       node = node_dfs_info->next_cycle;
 255     }
 256   return v;
 257 }
 258
 259 /* Return true iff the CS is an edge within a strongly connected component as
 260    computed by ipa_reduced_postorder.  */
 261
 262 bool
 263 ipa_edge_within_scc (struct cgraph_edge *cs)
 264 {
 265   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
 266   struct ipa_dfs_info *callee_dfs;
 267   struct cgraph_node *callee = cs->callee->function_symbol ();
 268
 269   callee_dfs = (struct ipa_dfs_info *) callee->aux;
 270   return (caller_dfs
 271           && callee_dfs
 272           && caller_dfs->scc_no == callee_dfs->scc_no);
 273 }
 274
 275 struct postorder_stack
 276 {
 277   struct cgraph_node *node;
 278   struct cgraph_edge *edge;
 279   int ref;
 280 };
 281
 282 /* Fill array order with all nodes with output flag set in the reverse
 283    topological order.  Return the number of elements in the array.
 284    FIXME: While walking, consider aliases, too.  */
 285
 286 int
 287 ipa_reverse_postorder (struct cgraph_node **order)
 288 {
 289   struct cgraph_node *node, *node2;
 290   int stack_size = 0;
 291   int order_pos = 0;
 292   struct cgraph_edge *edge;
 293   int pass;
 294   struct ipa_ref *ref = NULL;
 295
 296   struct postorder_stack *stack =
 297     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
 298
 299   /* We have to deal with cycles nicely, so use a depth first traversal
 300      output algorithm.  Ignore the fact that some functions won't need
 301      to be output and put them into order as well, so we get dependencies
 302      right through inline functions.  */
 303   FOR_EACH_FUNCTION (node)
 304     node->aux = NULL;
 305   for (pass = 0; pass < 2; pass++)
 306     FOR_EACH_FUNCTION (node)
 307       if (!node->aux
 308           && (pass
 309               || (!node->address_taken
 310                   && !node->inlined_to
 311                   && !node->alias && !node->thunk
 312                   && !node->only_called_directly_p ())))
 313         {
 314           stack_size = 0;
 315           stack[stack_size].node = node;
 316           stack[stack_size].edge = node->callers;
 317           stack[stack_size].ref = 0;
 318           node->aux = (void *)(size_t)1;
 319           while (stack_size >= 0)
 320             {
 321               while (true)
 322                 {
 323                   node2 = NULL;
 324                   while (stack[stack_size].edge && !node2)
 325                     {
 326                       edge = stack[stack_size].edge;
 327                       node2 = edge->caller;
 328                       stack[stack_size].edge = edge->next_caller;
 329                     }
 330                   for (; stack[stack_size].node->iterate_referring (
 331                                                        stack[stack_size].ref,
 332                                                        ref) && !node2;
 333                        stack[stack_size].ref++)
 334                     {
 335                       if (ref->use == IPA_REF_ALIAS)
 336                         node2 = dyn_cast <cgraph_node *> (ref->referring);
 337                     }
 338                   if (!node2)
 339                     break;
 340                   if (!node2->aux)
 341                     {
 342                       stack[++stack_size].node = node2;
 343                       stack[stack_size].edge = node2->callers;
 344                       stack[stack_size].ref = 0;
 345                       node2->aux = (void *)(size_t)1;
 346                     }
 347                 }
 348               order[order_pos++] = stack[stack_size--].node;
 349             }
 350         }
 351   free (stack);
 352   FOR_EACH_FUNCTION (node)
 353     node->aux = NULL;
 354   return order_pos;
 355 }
 356
 357
 358
 359 /* Given a memory reference T, will return the variable at the bottom
 360    of the access.  Unlike get_base_address, this will recurse through
 361    INDIRECT_REFS.  */
 362
 363 tree
 364 get_base_var (tree t)
 365 {
 366   while (!SSA_VAR_P (t)
 367          && (!CONSTANT_CLASS_P (t))
 368          && TREE_CODE (t) != LABEL_DECL
 369          && TREE_CODE (t) != FUNCTION_DECL
 370          && TREE_CODE (t) != CONST_DECL
 371          && TREE_CODE (t) != CONSTRUCTOR)
 372     {
 373       t = TREE_OPERAND (t, 0);
 374     }
 375   return t;
 376 }
 377
 378 /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count.  */
 379
 380 void
 381 scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count)
 382 {
 383   profile_count to = node->count;
 384   profile_count::adjust_for_ipa_scaling (&to, &orig_count);
 385   struct cgraph_edge *e;
 386
 387   for (e = node->callees; e; e = e->next_callee)
 388     e->count = e->count.apply_scale (to, orig_count);
 389   for (e = node->indirect_calls; e; e = e->next_callee)
 390     e->count = e->count.apply_scale (to, orig_count);
 391 }
 392
 393 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
 394    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
 395    unless PRESERVE_BODY is set.  */
 396
 397 void
 398 ipa_merge_profiles (struct cgraph_node *dst,
 399                     struct cgraph_node *src,
 400                     bool preserve_body)
 401 {
 402   tree oldsrcdecl = src->decl;
 403   struct function *srccfun, *dstcfun;
 404   bool match = true;
 405   bool copy_counts = false;
 406
 407   if (!src->definition
 408       || !dst->definition)
 409     return;
 410
 411   if (src->frequency < dst->frequency)
 412     src->frequency = dst->frequency;
 413
 414   /* Time profiles are merged.  */
 415   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
 416     dst->tp_first_run = src->tp_first_run;
 417
 418   if (src->profile_id && !dst->profile_id)
 419     dst->profile_id = src->profile_id;
 420
 421   /* Merging zero profile to dst is no-op.  */
 422   if (src->count.ipa () == profile_count::zero ())
 423     return;
 424
 425   /* FIXME when we merge in unknown profile, we ought to set counts as
 426      unsafe.  */
 427   if (!src->count.initialized_p ()
 428       || !(src->count.ipa () == src->count))
 429     return;
 430   profile_count orig_count = dst->count;
 431
 432   /* Either sum the profiles if both are IPA and not global0, or
 433      pick more informative one (that is nonzero IPA if other is
 434      uninitialized, guessed or global0).   */
 435
 436   if ((dst->count.ipa ().nonzero_p ()
 437        || src->count.ipa ().nonzero_p ())
 438       && dst->count.ipa ().initialized_p ()
 439       && src->count.ipa ().initialized_p ())
 440     dst->count = dst->count.ipa () + src->count.ipa ();
 441   else if (dst->count.ipa ().initialized_p ())
 442     ;
 443   else if (src->count.ipa ().initialized_p ())
 444     {
 445       copy_counts = true;
 446       dst->count = src->count.ipa ();
 447     }
 448
 449   /* If no updating needed return early.  */
 450   if (dst->count == orig_count)
 451     return;
 452
 453   if (symtab->dump_file)
 454     {
 455       fprintf (symtab->dump_file, "Merging profiles of %s count:",
 456                src->dump_name ());
 457       src->count.dump (symtab->dump_file);
 458       fprintf (symtab->dump_file, " to %s count:",
 459                dst->dump_name ());
 460       orig_count.dump (symtab->dump_file);
 461       fprintf (symtab->dump_file, " resulting count:");
 462       dst->count.dump (symtab->dump_file);
 463       fprintf (symtab->dump_file, "\n");
 464     }
 465
 466   /* First handle functions with no gimple body.  */
 467   if (dst->thunk || dst->alias
 468       || src->thunk || src->alias)
 469     {
 470       scale_ipa_profile_for_fn (dst, orig_count);
 471       return;
 472     }
 473
 474   /* This is ugly.  We need to get both function bodies into memory.
 475      If declaration is merged, we need to duplicate it to be able
 476      to load body that is being replaced.  This makes symbol table
 477      temporarily inconsistent.  */
 478   if (src->decl == dst->decl)
 479     {
 480       struct lto_in_decl_state temp;
 481       struct lto_in_decl_state *state;
 482
 483       /* We are going to move the decl, we want to remove its file decl data.
 484          and link these with the new decl. */
 485       temp.fn_decl = src->decl;
 486       lto_in_decl_state **slot
 487         = src->lto_file_data->function_decl_states->find_slot (&temp,
 488                                                                NO_INSERT);
 489       state = *slot;
 490       src->lto_file_data->function_decl_states->clear_slot (slot);
 491       gcc_assert (state);
 492
 493       /* Duplicate the decl and be sure it does not link into body of DST.  */
 494       src->decl = copy_node (src->decl);
 495       DECL_STRUCT_FUNCTION (src->decl) = NULL;
 496       DECL_ARGUMENTS (src->decl) = NULL;
 497       DECL_INITIAL (src->decl) = NULL;
 498       DECL_RESULT (src->decl) = NULL;
 499
 500       /* Associate the decl state with new declaration, so LTO streamer
 501          can look it up.  */
 502       state->fn_decl = src->decl;
 503       slot
 504         = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
 505       gcc_assert (!*slot);
 506       *slot = state;
 507     }
 508   src->get_untransformed_body ();
 509   dst->get_untransformed_body ();
 510   srccfun = DECL_STRUCT_FUNCTION (src->decl);
 511   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
 512   if (n_basic_blocks_for_fn (srccfun)
 513       != n_basic_blocks_for_fn (dstcfun))
 514     {
 515       if (symtab->dump_file)
 516         fprintf (symtab->dump_file,
 517                  "Giving up; number of basic block mismatch.\n");
 518       match = false;
 519     }
 520   else if (last_basic_block_for_fn (srccfun)
 521            != last_basic_block_for_fn (dstcfun))
 522     {
 523       if (symtab->dump_file)
 524         fprintf (symtab->dump_file,
 525                  "Giving up; last block mismatch.\n");
 526       match = false;
 527     }
 528   else
 529     {
 530       basic_block srcbb, dstbb;
 531       struct cgraph_edge *e, *e2;
 532
 533       for (e = dst->callees, e2 = src->callees; e && e2 && match;
 534            e2 = e2->next_callee, e = e->next_callee)
 535         {
 536           if (gimple_bb (e->call_stmt)->index
 537               != gimple_bb (e2->call_stmt)->index)
 538             {
 539               if (symtab->dump_file)
 540                 fprintf (symtab->dump_file,
 541                          "Giving up; call stmt mismatch.\n");
 542               match = false;
 543             }
 544         }
 545       if (e || e2)
 546         {
 547           if (symtab->dump_file)
 548             fprintf (symtab->dump_file,
 549                      "Giving up; number of calls differs.\n");
 550           match = false;
 551         }
 552       for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match;
 553            e2 = e2->next_callee, e = e->next_callee)
 554         {
 555           if (gimple_bb (e->call_stmt)->index
 556               != gimple_bb (e2->call_stmt)->index)
 557             {
 558               if (symtab->dump_file)
 559                 fprintf (symtab->dump_file,
 560                          "Giving up; indirect call stmt mismatch.\n");
 561               match = false;
 562             }
 563         }
 564       if (e || e2)
 565         {
 566           if (symtab->dump_file)
 567             fprintf (symtab->dump_file,
 568                      "Giving up; number of indirect calls differs.\n");
 569           match=false;
 570         }
 571
 572       if (match)
 573         FOR_ALL_BB_FN (srcbb, srccfun)
 574           {
 575             unsigned int i;
 576
 577             dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 578             if (dstbb == NULL)
 579               {
 580                 if (symtab->dump_file)
 581                   fprintf (symtab->dump_file,
 582                            "No matching block for bb %i.\n",
 583                            srcbb->index);
 584                 match = false;
 585                 break;
 586               }
 587             if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
 588               {
 589                 if (symtab->dump_file)
 590                   fprintf (symtab->dump_file,
 591                            "Edge count mismatch for bb %i.\n",
 592                            srcbb->index);
 593                 match = false;
 594                 break;
 595               }
 596             for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 597               {
 598                 edge srce = EDGE_SUCC (srcbb, i);
 599                 edge dste = EDGE_SUCC (dstbb, i);
 600                 if (srce->dest->index != dste->dest->index)
 601                   {
 602                     if (symtab->dump_file)
 603                       fprintf (symtab->dump_file,
 604                                "Succ edge mismatch for bb %i.\n",
 605                                srce->dest->index);
 606                     match = false;
 607                     break;
 608                   }
 609               }
 610           }
 611     }
 612   if (match)
 613     {
 614       struct cgraph_edge *e, *e2;
 615       basic_block srcbb, dstbb;
 616
 617       /* Function and global profile may be out of sync.  First scale it same
 618          way as fixup_cfg would.  */
 619       profile_count srcnum = src->count;
 620       profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count;
 621       bool srcscale = srcnum.initialized_p () && !(srcnum == srcden);
 622       profile_count dstnum = orig_count;
 623       profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count;
 624       bool dstscale = !copy_counts
 625                       && dstnum.initialized_p () && !(dstnum == dstden);
 626
 627       /* TODO: merge also statement histograms.  */
 628       FOR_ALL_BB_FN (srcbb, srccfun)
 629         {
 630           unsigned int i;
 631
 632           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 633
 634           profile_count srccount = srcbb->count;
 635           if (srcscale)
 636             srccount = srccount.apply_scale (srcnum, srcden);
 637           if (dstscale)
 638             dstbb->count = dstbb->count.apply_scale (dstnum, dstden);
 639
 640           if (copy_counts)
 641             {
 642               dstbb->count = srccount;
 643               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 644                 {
 645                   edge srce = EDGE_SUCC (srcbb, i);
 646                   edge dste = EDGE_SUCC (dstbb, i);
 647                   if (srce->probability.initialized_p ())
 648                     dste->probability = srce->probability;
 649                 }
 650             }
 651           else
 652             {
 653               for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 654                 {
 655                   edge srce = EDGE_SUCC (srcbb, i);
 656                   edge dste = EDGE_SUCC (dstbb, i);
 657                   profile_count sum =
 658                     dstbb->count.ipa () + srccount.ipa ();
 659                   if (sum.nonzero_p ())
 660                     dste->probability =
 661                       dste->probability * dstbb->count.ipa ().probability_in
 662                                                    (sum)
 663                       + srce->probability * srcbb->count.ipa ().probability_in
 664                                                    (sum);
 665                 }
 666               dstbb->count = dstbb->count.ipa () + srccount.ipa ();
 667             }
 668         }
 669       push_cfun (dstcfun);
 670       update_max_bb_count ();
 671       compute_function_frequency ();
 672       pop_cfun ();
 673       for (e = dst->callees; e; e = e->next_callee)
 674         {
 675           if (e->speculative)
 676             continue;
 677           e->count = gimple_bb (e->call_stmt)->count;
 678         }
 679       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
 680            e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
 681         {
 682           if (!e->speculative && !e2->speculative)
 683             {
 684               /* FIXME: we need to also merge ipa-profile histograms
 685                  because with LTO merging happens from lto-symtab before
 686                  these are converted to indirect edges.  */
 687               e->count = gimple_bb (e->call_stmt)->count;
 688               continue;
 689             }
 690
 691           /* When copying just remove all speuclations on dst and then copy
 692              one from src.  */
 693           if (copy_counts)
 694             {
 695               while (e->speculative)
 696                 cgraph_edge::resolve_speculation (e, NULL);
 697               e->count = gimple_bb (e->call_stmt)->count;
 698               if (e2->speculative)
 699                 {
 700                   for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 701                        e3;
 702                        e3 = e3->next_speculative_call_target ())
 703                     {
 704                       cgraph_edge *ns;
 705                       ns = e->make_speculative
 706                          (dyn_cast <cgraph_node *>
 707                             (e3->speculative_call_target_ref ()->referred),
 708                              e3->count, e3->speculative_id);
 709                       /* Target may differ from ref (for example it may be
 710                          redirected to local alias.  */
 711                       ns->redirect_callee (e3->callee);
 712                     }
 713                 }
 714               continue;
 715             }
 716
 717           /* Iterate all speculations in SRC, see if corresponding ones exist
 718              int DST and if so, sum the counts.  Otherwise create new
 719              speculation.  */
 720           int max_spec = 0;
 721           for (cgraph_edge *e3 = e->first_speculative_call_target ();
 722                e3;
 723                e3 = e3->next_speculative_call_target ())
 724             if (e3->speculative_id > max_spec)
 725               max_spec = e3->speculative_id;
 726           for (cgraph_edge *e3 = e2->first_speculative_call_target ();
 727                e3;
 728                e3 = e3->next_speculative_call_target ())
 729             {
 730               cgraph_edge *te
 731                  = e->speculative_call_for_target
 732                          (dyn_cast <cgraph_node *>
 733                             (e3->speculative_call_target_ref ()->referred));
 734               if (te)
 735                 te->count = te->count + e3->count;
 736               else
 737                 {
 738                   e->count = e->count + e3->count;
 739                   cgraph_edge *ns;
 740                   ns = e->make_speculative
 741                          (dyn_cast <cgraph_node *>
 742                             (e3->speculative_call_target_ref ()
 743                              ->referred),
 744                           e3->count,
 745                           e3->speculative_id + max_spec + 1);
 746                   /* Target may differ from ref (for example it may be
 747                      redirected to local alias.  */
 748                   ns->redirect_callee (e3->callee);
 749                 }
 750             }
 751         }
 752       if (!preserve_body)
 753         src->release_body ();
 754       /* Update summary.  */
 755       compute_fn_summary (dst, 0);
 756     }
 757   /* We can't update CFG profile, but we can scale IPA profile. CFG
 758      will be scaled according to dst->count after IPA passes.  */
 759   else
 760     scale_ipa_profile_for_fn (dst, orig_count);
 761   src->decl = oldsrcdecl;
 762 }
 763
 764 /* Return true if call to DEST is known to be self-recusive
 765    call withing FUNC.  */
 766
 767 bool
 768 recursive_call_p (tree func, tree dest)
 769 {
 770   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
 771   struct cgraph_node *cnode = cgraph_node::get_create (func);
 772   ipa_ref *alias;
 773   enum availability avail;
 774
 775   gcc_assert (!cnode->alias);
 776   if (cnode != dest_node->ultimate_alias_target (&avail))
 777     return false;
 778   if (avail >= AVAIL_AVAILABLE)
 779     return true;
 780   if (!dest_node->semantically_equivalent_p (cnode))
 781     return false;
 782   /* If there is only one way to call the fuction or we know all of them
 783      are semantically equivalent, we still can consider call recursive.  */
 784   FOR_EACH_ALIAS (cnode, alias)
 785     if (!dest_node->semantically_equivalent_p (alias->referring))
 786       return false;
 787   return true;
 788 }
 789
 790 /* Return true if stmt may terminate execution of function.
 791    If assume_return_or_eh we can further assume that the function ends
 792    either by retrn statement or EH (no trapping or infinite loops).  */
 793
 794 bool
 795 stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh)
 796 {
 797   if (stmt_can_throw_external (fun, stmt))
 798     return true;
 799   if (assume_return_or_eh)
 800     return false;
 801   gasm *astmt = dyn_cast <gasm *> (stmt);
 802   if (astmt && gimple_asm_volatile_p (astmt))
 803     return true;
 804   if (gimple_could_trap_p (stmt))
 805     return true;
 806   if (gcall *call = dyn_cast <gcall *> (stmt))
 807     {
 808       int flags = gimple_call_flags (call);
 809       if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE))
 810         return false;
 811       modref_summary *s = get_modref_function_summary (call, NULL);
 812       if (s && !s->side_effects)
 813         return false;
 814       return true;
 815     }
 816   return false;
 817 }
 818
 819 /* Return bitmap of all basic blocks whose first statements are known to
 820    execute on every invocation of the function.
 821
 822    If assume_return_or_eh we can further assume that the function ends
 823    either by retrn statement or EH (no trapping or infinite loops).
 824    This is useful when sumarizing function in passes like ipa-modref.
 825
 826    Seeing assume_return_or_eh to false is used to prove that given
 827    statmeent will be executed even if the function gets into infinite
 828    loop or trap.  */
 829 bitmap
 830 find_always_executed_bbs (function *fun, bool assume_return_or_eh)
 831 {
 832   auto_vec<basic_block, 20> stack;
 833   auto_vec<basic_block, 20> terminating_bbs;
 834   hash_set<basic_block> visited;
 835   hash_set<basic_block> terminating_bbs_set;
 836   edge e;
 837   edge_iterator ei;
 838   int flags = flags_from_decl_or_type (fun->decl);
 839   /* PUre and const functions always return.  */
 840   assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE);
 841   if (!assume_return_or_eh)
 842     mark_dfs_back_edges (fun);
 843
 844   /* First walk all BBs reachable from entry stopping on statements that may
 845      terminate execution.  Everything past this statement is not going to be executed
 846      each invocation.  */
 847   stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun));
 848   while (!stack.is_empty ())
 849     {
 850       basic_block bb = stack.pop ();
 851       bool found = false, found_exit = false;
 852       if (bb->index == EXIT_BLOCK)
 853         continue;
 854       FOR_EACH_EDGE (e, ei, bb->succs)
 855         {
 856           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun))
 857             {
 858               found_exit = true;
 859               break;
 860             }
 861           /* Watch for infinite loops.  */
 862           if (!found
 863               && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK))
 864             {
 865               if (!dom_info_available_p (CDI_DOMINATORS))
 866                 calculate_dominance_info (CDI_DOMINATORS);
 867               /* If this is not a loop latch edge it is an irreducible region.
 868                  Assume that it is infinite.
 869                  TODO: with C++ forced progression we can still walk the
 870                  irreducible region and see if it contains any side effects.
 871                  Similarly for loops.  -ffinite-loops does not really imply
 872                  this since we allow inlining across -ffinite-loops bondary
 873                  and thus it can be used only as a loop flag.  */
 874               if (e->dest->loop_father->header != e->dest
 875                   || !dominated_by_p (CDI_DOMINATORS, bb, e->dest))
 876                 found = true;
 877               else if (!finite_loop_p (e->dest->loop_father))
 878                 found = true;
 879             }
 880         }
 881       if (!assume_return_or_eh
 882           && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP)))
 883         found = true;
 884       for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb);
 885            !gsi_end_p (si) && !found; gsi_next_nondebug (&si))
 886         if (stmt_may_terminate_function_p (fun, gsi_stmt (si), assume_return_or_eh))
 887           {
 888             found = true;
 889             break;
 890           }
 891       if (found)
 892         {
 893           visited.add (EXIT_BLOCK_PTR_FOR_FN (fun));
 894           if (!found_exit)
 895             {
 896               terminating_bbs.safe_push (bb);
 897               terminating_bbs_set.add (bb);
 898             }
 899         }
 900       else
 901         FOR_EACH_EDGE (e, ei, bb->succs)
 902           if (!visited.add (e->dest))
 903             stack.safe_push (e->dest);
 904     }
 905
 906   /* Next walk from exit block and find all articulations in the CFG.
 907      Add all terminating basic blocks as "fake" predecessors of the
 908      exit block.  */
 909
 910   bitmap ret = BITMAP_ALLOC (NULL);
 911   bitmap_tree_view (ret);
 912   /* A degenerated case when there is no path to exit.  */
 913   if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun)))
 914     {
 915       bitmap_set_bit (ret,
 916                       single_succ_edge
 917                         (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index);
 918       return ret;
 919     }
 920
 921   struct astate
 922   {
 923     unsigned int dfs_preorder;
 924     unsigned int dfs_postorder;
 925
 926     unsigned int low, high;
 927   };
 928
 929   struct worklist
 930   {
 931     basic_block bb;
 932     astate *cstate;
 933   };
 934
 935   struct obstack state_obstack;
 936   gcc_obstack_init (&state_obstack);
 937   hash_map<basic_block, astate *> state;
 938   auto_vec<worklist, 32> worklist_vec;
 939   unsigned int next_dfs_num = 1;
 940
 941   /* Always executed blocks are blocks that are on every path from entry to exit.
 942      We proceed in two steps.  First we do backward DFS walk (so we know that entry
 943      is always reached) and record preorder and postorder visiting times.
 944
 945      In second step we proceed in postorder and for every block A we compute
 946      minimal preorder (A.low) and maximal postorder (A.high) of block reachable
 947      from the BBs in DFS subtree of A.  If A is always executed there are no
 948      edges out of this subtree.  This can be tested by checking that A.low == A.preorder
 949      and B.high == A.postorder.
 950
 951      This is first step. Do backward DFS walk and record preorder, postorder
 952      and predecessor info.  Initialize stack in postorder.  */
 953   worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL};
 954   worklist_vec.safe_push (we);
 955   while (!worklist_vec.is_empty ())
 956     {
 957       worklist &w = worklist_vec.last ();
 958       basic_block bb = w.bb;
 959       astate *cstate = w.cstate;
 960
 961       if (!cstate)
 962         {
 963           astate **slot = &state.get_or_insert (bb);
 964
 965           cstate = *slot;
 966           /* Already processed by DFS?  */
 967           if (cstate)
 968             {
 969               worklist_vec.pop ();
 970               continue;
 971             }
 972           /* DFS is visiting BB for first time.  */
 973           *slot = cstate = XOBNEW (&state_obstack, struct astate);
 974           cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++;
 975           w.cstate = cstate;
 976           /* Exit block is special; process all fake edges we identified.  */
 977           if (bb == EXIT_BLOCK_PTR_FOR_FN (fun))
 978             for (basic_block bb2 : terminating_bbs)
 979               {
 980                 worklist we = {bb2, NULL};
 981                 worklist_vec.safe_push (we);
 982               }
 983           FOR_EACH_EDGE (e, ei, bb->preds)
 984             if (visited.contains (e->src))
 985               {
 986                 worklist we = {e->src, NULL};
 987                 worklist_vec.safe_push (we);
 988               }
 989           /* Keep BB on worklist so we process it last time.  */
 990           continue;
 991         }
 992       /* We are finished with processing reachable BBs, see if we have articulation.  */
 993       worklist_vec.pop ();
 994       cstate->high = cstate->dfs_postorder = next_dfs_num++;
 995       stack.safe_push (bb);
 996     }
 997   /* This is the final postorder walk.  Determine low and high values and mark
 998      always executed blocks.  */
 999   for (basic_block bb : stack)
1000     {
1001       astate *cstate = *state.get (bb);
1002       FOR_EACH_EDGE (e, ei, bb->preds)
1003         {
1004           astate **cstate2 = state.get (e->src);
1005           /* We skip walking part of CFG reached only after first edge to exit.
1006              No BB reachable from the skipped part is always executed */
1007           if (!cstate2)
1008             {
1009               if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun))
1010                 cstate->low = 0;
1011               continue;
1012             }
1013           cstate->low = MIN (cstate->low, (*cstate2)->low);
1014           cstate->high = MAX (cstate->high, (*cstate2)->high);
1015         }
1016       if (dump_file && (dump_flags & TDF_DETAILS)
1017           && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1018         fprintf (dump_file,
1019                  "BB %i %s preorder %i posorder %i low %i high %i\n",
1020                  bb->index,
1021                  terminating_bbs_set.contains (bb) ? "(terminating)" : "",
1022                  cstate->dfs_preorder, cstate->dfs_postorder, cstate->low,
1023                  cstate->high);
1024       if (cstate->low == cstate->dfs_preorder
1025           && cstate->high == cstate->dfs_postorder
1026           && bb != EXIT_BLOCK_PTR_FOR_FN (fun))
1027         bitmap_set_bit (ret, bb->index);
1028       if (terminating_bbs_set.contains (bb))
1029         cstate->low = 0;
1030       else
1031         FOR_EACH_EDGE (e, ei, bb->succs)
1032           {
1033             astate **cstate2 = state.get (e->dest);
1034             if (!cstate2)
1035               continue;
1036             cstate->low = MIN (cstate->low, (*cstate2)->low);
1037             cstate->high = MAX (cstate->high, (*cstate2)->high);
1038           }
1039       }
1040   obstack_free (&state_obstack, NULL);
1041   if (dump_file)
1042     {
1043       fprintf (dump_file, "Always executed bbbs %s: ",
1044                assume_return_or_eh ? "(assuming return or EH)" : "");
1045       bitmap_print (dump_file, ret, "", "\n");
1046     }
1047
1048   return ret;
1049 }