gcc/ipa-utils.c

   1 /* Utilities for ipa analysis.
   2    Copyright (C) 2005-2016 Free Software Foundation, Inc.
   3    Contributed by Kenneth Zadeck <zadeck@naturalbridge.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "predict.h"
  28 #include "alloc-pool.h"
  29 #include "cgraph.h"
  30 #include "lto-streamer.h"
  31 #include "dumpfile.h"
  32 #include "splay-tree.h"
  33 #include "ipa-utils.h"
  34 #include "symbol-summary.h"
  35 #include "ipa-prop.h"
  36 #include "ipa-inline.h"
  37
  38 /* Debugging function for postorder and inorder code. NOTE is a string
  39    that is printed before the nodes are printed.  ORDER is an array of
  40    cgraph_nodes that has COUNT useful nodes in it.  */
  41
  42 void
  43 ipa_print_order (FILE* out,
  44                  const char * note,
  45                  struct cgraph_node** order,
  46                  int count)
  47 {
  48   int i;
  49   fprintf (out, "\n\n ordered call graph: %s\n", note);
  50
  51   for (i = count - 1; i >= 0; i--)
  52     order[i]->dump (out);
  53   fprintf (out, "\n");
  54   fflush (out);
  55 }
  56
  57
  58 struct searchc_env {
  59   struct cgraph_node **stack;
  60   int stack_size;
  61   struct cgraph_node **result;
  62   int order_pos;
  63   splay_tree nodes_marked_new;
  64   bool reduce;
  65   bool allow_overwritable;
  66   int count;
  67 };
  68
  69 /* This is an implementation of Tarjan's strongly connected region
  70    finder as reprinted in Aho Hopcraft and Ullman's The Design and
  71    Analysis of Computer Programs (1975) pages 192-193.  This version
  72    has been customized for cgraph_nodes.  The env parameter is because
  73    it is recursive and there are no nested functions here.  This
  74    function should only be called from itself or
  75    ipa_reduced_postorder.  ENV is a stack env and would be
  76    unnecessary if C had nested functions.  V is the node to start
  77    searching from.  */
  78
  79 static void
  80 searchc (struct searchc_env* env, struct cgraph_node *v,
  81          bool (*ignore_edge) (struct cgraph_edge *))
  82 {
  83   struct cgraph_edge *edge;
  84   struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux;
  85
  86   /* mark node as old */
  87   v_info->new_node = false;
  88   splay_tree_remove (env->nodes_marked_new, v->uid);
  89
  90   v_info->dfn_number = env->count;
  91   v_info->low_link = env->count;
  92   env->count++;
  93   env->stack[(env->stack_size)++] = v;
  94   v_info->on_stack = true;
  95
  96   for (edge = v->callees; edge; edge = edge->next_callee)
  97     {
  98       struct ipa_dfs_info * w_info;
  99       enum availability avail;
 100       struct cgraph_node *w = edge->callee->ultimate_alias_target (&avail);
 101
 102       if (!w || (ignore_edge && ignore_edge (edge)))
 103         continue;
 104
 105       if (w->aux
 106           && (avail > AVAIL_INTERPOSABLE
 107               || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
 108         {
 109           w_info = (struct ipa_dfs_info *) w->aux;
 110           if (w_info->new_node)
 111             {
 112               searchc (env, w, ignore_edge);
 113               v_info->low_link =
 114                 (v_info->low_link < w_info->low_link) ?
 115                 v_info->low_link : w_info->low_link;
 116             }
 117           else
 118             if ((w_info->dfn_number < v_info->dfn_number)
 119                 && (w_info->on_stack))
 120               v_info->low_link =
 121                 (w_info->dfn_number < v_info->low_link) ?
 122                 w_info->dfn_number : v_info->low_link;
 123         }
 124     }
 125
 126
 127   if (v_info->low_link == v_info->dfn_number)
 128     {
 129       struct cgraph_node *last = NULL;
 130       struct cgraph_node *x;
 131       struct ipa_dfs_info *x_info;
 132       do {
 133         x = env->stack[--(env->stack_size)];
 134         x_info = (struct ipa_dfs_info *) x->aux;
 135         x_info->on_stack = false;
 136         x_info->scc_no = v_info->dfn_number;
 137
 138         if (env->reduce)
 139           {
 140             x_info->next_cycle = last;
 141             last = x;
 142           }
 143         else
 144           env->result[env->order_pos++] = x;
 145       }
 146       while (v != x);
 147       if (env->reduce)
 148         env->result[env->order_pos++] = v;
 149     }
 150 }
 151
 152 /* Topsort the call graph by caller relation.  Put the result in ORDER.
 153
 154    The REDUCE flag is true if you want the cycles reduced to single nodes.
 155    You can use ipa_get_nodes_in_cycle to obtain a vector containing all real
 156    call graph nodes in a reduced node.
 157
 158    Set ALLOW_OVERWRITABLE if nodes with such availability should be included.
 159    IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant
 160    for the topological sort.   */
 161
 162 int
 163 ipa_reduced_postorder (struct cgraph_node **order,
 164                        bool reduce, bool allow_overwritable,
 165                        bool (*ignore_edge) (struct cgraph_edge *))
 166 {
 167   struct cgraph_node *node;
 168   struct searchc_env env;
 169   splay_tree_node result;
 170   env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 171   env.stack_size = 0;
 172   env.result = order;
 173   env.order_pos = 0;
 174   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
 175   env.count = 1;
 176   env.reduce = reduce;
 177   env.allow_overwritable = allow_overwritable;
 178
 179   FOR_EACH_DEFINED_FUNCTION (node)
 180     {
 181       enum availability avail = node->get_availability ();
 182
 183       if (avail > AVAIL_INTERPOSABLE
 184           || (allow_overwritable
 185               && (avail == AVAIL_INTERPOSABLE)))
 186         {
 187           /* Reuse the info if it is already there.  */
 188           struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
 189           if (!info)
 190             info = XCNEW (struct ipa_dfs_info);
 191           info->new_node = true;
 192           info->on_stack = false;
 193           info->next_cycle = NULL;
 194           node->aux = info;
 195
 196           splay_tree_insert (env.nodes_marked_new,
 197                              (splay_tree_key)node->uid,
 198                              (splay_tree_value)node);
 199         }
 200       else
 201         node->aux = NULL;
 202     }
 203   result = splay_tree_min (env.nodes_marked_new);
 204   while (result)
 205     {
 206       node = (struct cgraph_node *)result->value;
 207       searchc (&env, node, ignore_edge);
 208       result = splay_tree_min (env.nodes_marked_new);
 209     }
 210   splay_tree_delete (env.nodes_marked_new);
 211   free (env.stack);
 212
 213   return env.order_pos;
 214 }
 215
 216 /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call
 217    graph nodes.  */
 218
 219 void
 220 ipa_free_postorder_info (void)
 221 {
 222   struct cgraph_node *node;
 223   FOR_EACH_DEFINED_FUNCTION (node)
 224     {
 225       /* Get rid of the aux information.  */
 226       if (node->aux)
 227         {
 228           free (node->aux);
 229           node->aux = NULL;
 230         }
 231     }
 232 }
 233
 234 /* Get the set of nodes for the cycle in the reduced call graph starting
 235    from NODE.  */
 236
 237 vec<cgraph_node *>
 238 ipa_get_nodes_in_cycle (struct cgraph_node *node)
 239 {
 240   vec<cgraph_node *> v = vNULL;
 241   struct ipa_dfs_info *node_dfs_info;
 242   while (node)
 243     {
 244       v.safe_push (node);
 245       node_dfs_info = (struct ipa_dfs_info *) node->aux;
 246       node = node_dfs_info->next_cycle;
 247     }
 248   return v;
 249 }
 250
 251 /* Return true iff the CS is an edge within a strongly connected component as
 252    computed by ipa_reduced_postorder.  */
 253
 254 bool
 255 ipa_edge_within_scc (struct cgraph_edge *cs)
 256 {
 257   struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux;
 258   struct ipa_dfs_info *callee_dfs;
 259   struct cgraph_node *callee = cs->callee->function_symbol ();
 260
 261   callee_dfs = (struct ipa_dfs_info *) callee->aux;
 262   return (caller_dfs
 263           && callee_dfs
 264           && caller_dfs->scc_no == callee_dfs->scc_no);
 265 }
 266
 267 struct postorder_stack
 268 {
 269   struct cgraph_node *node;
 270   struct cgraph_edge *edge;
 271   int ref;
 272 };
 273
 274 /* Fill array order with all nodes with output flag set in the reverse
 275    topological order.  Return the number of elements in the array.
 276    FIXME: While walking, consider aliases, too.  */
 277
 278 int
 279 ipa_reverse_postorder (struct cgraph_node **order)
 280 {
 281   struct cgraph_node *node, *node2;
 282   int stack_size = 0;
 283   int order_pos = 0;
 284   struct cgraph_edge *edge;
 285   int pass;
 286   struct ipa_ref *ref = NULL;
 287
 288   struct postorder_stack *stack =
 289     XCNEWVEC (struct postorder_stack, symtab->cgraph_count);
 290
 291   /* We have to deal with cycles nicely, so use a depth first traversal
 292      output algorithm.  Ignore the fact that some functions won't need
 293      to be output and put them into order as well, so we get dependencies
 294      right through inline functions.  */
 295   FOR_EACH_FUNCTION (node)
 296     node->aux = NULL;
 297   for (pass = 0; pass < 2; pass++)
 298     FOR_EACH_FUNCTION (node)
 299       if (!node->aux
 300           && (pass
 301               || (!node->address_taken
 302                   && !node->global.inlined_to
 303                   && !node->alias && !node->thunk.thunk_p
 304                   && !node->only_called_directly_p ())))
 305         {
 306           stack_size = 0;
 307           stack[stack_size].node = node;
 308           stack[stack_size].edge = node->callers;
 309           stack[stack_size].ref = 0;
 310           node->aux = (void *)(size_t)1;
 311           while (stack_size >= 0)
 312             {
 313               while (true)
 314                 {
 315                   node2 = NULL;
 316                   while (stack[stack_size].edge && !node2)
 317                     {
 318                       edge = stack[stack_size].edge;
 319                       node2 = edge->caller;
 320                       stack[stack_size].edge = edge->next_caller;
 321                       /* Break possible cycles involving always-inline
 322                          functions by ignoring edges from always-inline
 323                          functions to non-always-inline functions.  */
 324                       if (DECL_DISREGARD_INLINE_LIMITS (edge->caller->decl)
 325                           && !DECL_DISREGARD_INLINE_LIMITS
 326                             (edge->callee->function_symbol ()->decl))
 327                         node2 = NULL;
 328                     }
 329                   for (; stack[stack_size].node->iterate_referring (
 330                                                        stack[stack_size].ref,
 331                                                        ref) && !node2;
 332                        stack[stack_size].ref++)
 333                     {
 334                       if (ref->use == IPA_REF_ALIAS)
 335                         node2 = dyn_cast <cgraph_node *> (ref->referring);
 336                     }
 337                   if (!node2)
 338                     break;
 339                   if (!node2->aux)
 340                     {
 341                       stack[++stack_size].node = node2;
 342                       stack[stack_size].edge = node2->callers;
 343                       stack[stack_size].ref = 0;
 344                       node2->aux = (void *)(size_t)1;
 345                     }
 346                 }
 347               order[order_pos++] = stack[stack_size--].node;
 348             }
 349         }
 350   free (stack);
 351   FOR_EACH_FUNCTION (node)
 352     node->aux = NULL;
 353   return order_pos;
 354 }
 355
 356
 357
 358 /* Given a memory reference T, will return the variable at the bottom
 359    of the access.  Unlike get_base_address, this will recurse through
 360    INDIRECT_REFS.  */
 361
 362 tree
 363 get_base_var (tree t)
 364 {
 365   while (!SSA_VAR_P (t)
 366          && (!CONSTANT_CLASS_P (t))
 367          && TREE_CODE (t) != LABEL_DECL
 368          && TREE_CODE (t) != FUNCTION_DECL
 369          && TREE_CODE (t) != CONST_DECL
 370          && TREE_CODE (t) != CONSTRUCTOR)
 371     {
 372       t = TREE_OPERAND (t, 0);
 373     }
 374   return t;
 375 }
 376
 377
 378 /* SRC and DST are going to be merged.  Take SRC's profile and merge it into
 379    DST so it is not going to be lost.  Possibly destroy SRC's body on the way
 380    unless PRESERVE_BODY is set.  */
 381
 382 void
 383 ipa_merge_profiles (struct cgraph_node *dst,
 384                     struct cgraph_node *src,
 385                     bool preserve_body)
 386 {
 387   tree oldsrcdecl = src->decl;
 388   struct function *srccfun, *dstcfun;
 389   bool match = true;
 390
 391   if (!src->definition
 392       || !dst->definition)
 393     return;
 394   if (src->frequency < dst->frequency)
 395     src->frequency = dst->frequency;
 396
 397   /* Time profiles are merged.  */
 398   if (dst->tp_first_run > src->tp_first_run && src->tp_first_run)
 399     dst->tp_first_run = src->tp_first_run;
 400
 401   if (src->profile_id && !dst->profile_id)
 402     dst->profile_id = src->profile_id;
 403
 404   if (!dst->count)
 405     return;
 406   if (symtab->dump_file)
 407     {
 408       fprintf (symtab->dump_file, "Merging profiles of %s/%i to %s/%i\n",
 409                xstrdup_for_dump (src->name ()), src->order,
 410                xstrdup_for_dump (dst->name ()), dst->order);
 411     }
 412   dst->count += src->count;
 413
 414   /* This is ugly.  We need to get both function bodies into memory.
 415      If declaration is merged, we need to duplicate it to be able
 416      to load body that is being replaced.  This makes symbol table
 417      temporarily inconsistent.  */
 418   if (src->decl == dst->decl)
 419     {
 420       struct lto_in_decl_state temp;
 421       struct lto_in_decl_state *state;
 422
 423       /* We are going to move the decl, we want to remove its file decl data.
 424          and link these with the new decl. */
 425       temp.fn_decl = src->decl;
 426       lto_in_decl_state **slot
 427         = src->lto_file_data->function_decl_states->find_slot (&temp,
 428                                                                NO_INSERT);
 429       state = *slot;
 430       src->lto_file_data->function_decl_states->clear_slot (slot);
 431       gcc_assert (state);
 432
 433       /* Duplicate the decl and be sure it does not link into body of DST.  */
 434       src->decl = copy_node (src->decl);
 435       DECL_STRUCT_FUNCTION (src->decl) = NULL;
 436       DECL_ARGUMENTS (src->decl) = NULL;
 437       DECL_INITIAL (src->decl) = NULL;
 438       DECL_RESULT (src->decl) = NULL;
 439
 440       /* Associate the decl state with new declaration, so LTO streamer
 441          can look it up.  */
 442       state->fn_decl = src->decl;
 443       slot
 444         = src->lto_file_data->function_decl_states->find_slot (state, INSERT);
 445       gcc_assert (!*slot);
 446       *slot = state;
 447     }
 448   src->get_untransformed_body ();
 449   dst->get_untransformed_body ();
 450   srccfun = DECL_STRUCT_FUNCTION (src->decl);
 451   dstcfun = DECL_STRUCT_FUNCTION (dst->decl);
 452   if (n_basic_blocks_for_fn (srccfun)
 453       != n_basic_blocks_for_fn (dstcfun))
 454     {
 455       if (symtab->dump_file)
 456         fprintf (symtab->dump_file,
 457                  "Giving up; number of basic block mismatch.\n");
 458       match = false;
 459     }
 460   else if (last_basic_block_for_fn (srccfun)
 461            != last_basic_block_for_fn (dstcfun))
 462     {
 463       if (symtab->dump_file)
 464         fprintf (symtab->dump_file,
 465                  "Giving up; last block mismatch.\n");
 466       match = false;
 467     }
 468   else
 469     {
 470       basic_block srcbb, dstbb;
 471
 472       FOR_ALL_BB_FN (srcbb, srccfun)
 473         {
 474           unsigned int i;
 475
 476           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 477           if (dstbb == NULL)
 478             {
 479               if (symtab->dump_file)
 480                 fprintf (symtab->dump_file,
 481                          "No matching block for bb %i.\n",
 482                          srcbb->index);
 483               match = false;
 484               break;
 485             }
 486           if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs))
 487             {
 488               if (symtab->dump_file)
 489                 fprintf (symtab->dump_file,
 490                          "Edge count mistmatch for bb %i.\n",
 491                          srcbb->index);
 492               match = false;
 493               break;
 494             }
 495           for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 496             {
 497               edge srce = EDGE_SUCC (srcbb, i);
 498               edge dste = EDGE_SUCC (dstbb, i);
 499               if (srce->dest->index != dste->dest->index)
 500                 {
 501                   if (symtab->dump_file)
 502                     fprintf (symtab->dump_file,
 503                              "Succ edge mistmatch for bb %i.\n",
 504                              srce->dest->index);
 505                   match = false;
 506                   break;
 507                 }
 508             }
 509         }
 510     }
 511   if (match)
 512     {
 513       struct cgraph_edge *e, *e2;
 514       basic_block srcbb, dstbb;
 515
 516       /* TODO: merge also statement histograms.  */
 517       FOR_ALL_BB_FN (srcbb, srccfun)
 518         {
 519           unsigned int i;
 520
 521           dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index);
 522           dstbb->count += srcbb->count;
 523           for (i = 0; i < EDGE_COUNT (srcbb->succs); i++)
 524             {
 525               edge srce = EDGE_SUCC (srcbb, i);
 526               edge dste = EDGE_SUCC (dstbb, i);
 527               dste->count += srce->count;
 528             }
 529         }
 530       push_cfun (dstcfun);
 531       counts_to_freqs ();
 532       compute_function_frequency ();
 533       pop_cfun ();
 534       for (e = dst->callees; e; e = e->next_callee)
 535         {
 536           if (e->speculative)
 537             continue;
 538           e->count = gimple_bb (e->call_stmt)->count;
 539           e->frequency = compute_call_stmt_bb_frequency
 540                              (dst->decl,
 541                               gimple_bb (e->call_stmt));
 542         }
 543       for (e = dst->indirect_calls, e2 = src->indirect_calls; e;
 544            e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee)
 545         {
 546           gcov_type count = gimple_bb (e->call_stmt)->count;
 547           int freq = compute_call_stmt_bb_frequency
 548                         (dst->decl,
 549                          gimple_bb (e->call_stmt));
 550           /* When call is speculative, we need to re-distribute probabilities
 551              the same way as they was.  This is not really correct because
 552              in the other copy the speculation may differ; but probably it
 553              is not really worth the effort.  */
 554           if (e->speculative)
 555             {
 556               cgraph_edge *direct, *indirect;
 557               cgraph_edge *direct2 = NULL, *indirect2 = NULL;
 558               ipa_ref *ref;
 559
 560               e->speculative_call_info (direct, indirect, ref);
 561               gcc_assert (e == indirect);
 562               if (e2 && e2->speculative)
 563                 e2->speculative_call_info (direct2, indirect2, ref);
 564               if (indirect->count || direct->count)
 565                 {
 566                   /* We should mismatch earlier if there is no matching
 567                      indirect edge.  */
 568                   if (!e2)
 569                     {
 570                       if (dump_file)
 571                         fprintf (dump_file,
 572                                  "Mismatch in merging indirect edges\n");
 573                     }
 574                   else if (!e2->speculative)
 575                     indirect->count += e2->count;
 576                   else if (e2->speculative)
 577                     {
 578                       if (DECL_ASSEMBLER_NAME (direct2->callee->decl)
 579                           != DECL_ASSEMBLER_NAME (direct->callee->decl))
 580                         {
 581                           if (direct2->count >= direct->count)
 582                             {
 583                               direct->redirect_callee (direct2->callee);
 584                               indirect->count += indirect2->count
 585                                                  + direct->count;
 586                               direct->count = direct2->count;
 587                             }
 588                           else
 589                             indirect->count += indirect2->count + direct2->count;
 590                         }
 591                       else
 592                         {
 593                            direct->count += direct2->count;
 594                            indirect->count += indirect2->count;
 595                         }
 596                     }
 597                   int  prob = RDIV (direct->count * REG_BR_PROB_BASE ,
 598                                     direct->count + indirect->count);
 599                   direct->frequency = RDIV (freq * prob, REG_BR_PROB_BASE);
 600                   indirect->frequency = RDIV (freq * (REG_BR_PROB_BASE - prob),
 601                                               REG_BR_PROB_BASE);
 602                 }
 603               else
 604                 /* At the moment we should have only profile feedback based
 605                    speculations when merging.  */
 606                 gcc_unreachable ();
 607             }
 608           else if (e2 && e2->speculative)
 609             {
 610               cgraph_edge *direct, *indirect;
 611               ipa_ref *ref;
 612
 613               e2->speculative_call_info (direct, indirect, ref);
 614               e->count = count;
 615               e->frequency = freq;
 616               int prob = RDIV (direct->count * REG_BR_PROB_BASE, e->count);
 617               e->make_speculative (direct->callee, direct->count,
 618                                    RDIV (freq * prob, REG_BR_PROB_BASE));
 619             }
 620           else
 621             {
 622               e->count = count;
 623               e->frequency = freq;
 624             }
 625         }
 626       if (!preserve_body)
 627         src->release_body ();
 628       inline_update_overall_summary (dst);
 629     }
 630   /* TODO: if there is no match, we can scale up.  */
 631   src->decl = oldsrcdecl;
 632 }
 633
 634 /* Return true if call to DEST is known to be self-recusive call withing FUNC.   */
 635
 636 bool
 637 recursive_call_p (tree func, tree dest)
 638 {
 639   struct cgraph_node *dest_node = cgraph_node::get_create (dest);
 640   struct cgraph_node *cnode = cgraph_node::get_create (func);
 641
 642   return dest_node->semantically_equivalent_p (cnode);
 643 }