gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "basic-block.h"
  26 #include "function.h"
  27 #include "gimple.h"
  28 #include "gimple-iterator.h"
  29 #include "gimple-ssa.h"
  30 #include "tree-phinodes.h"
  31 #include "tree-ssa.h"
  32 #include "tree-ssa-threadupdate.h"
  33 #include "dumpfile.h"
  34 #include "cfgloop.h"
  35 #include "hash-table.h"
  36 #include "dbgcnt.h"
  37
  38 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  39    one or more in-edges to B to instead reach the destination of an
  40    out-edge from B while preserving any side effects in B.
  41
  42    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  43    side effects of executing B.
  44
  45      1. Make a copy of B (including its outgoing edges and statements).  Call
  46         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  47
  48      2. Remove the control statement at the end of B' and all outgoing edges
  49         except B'->C.
  50
  51      3. Add a new argument to each PHI in C with the same value as the existing
  52         argument associated with edge B->C.  Associate the new PHI arguments
  53         with the edge B'->C.
  54
  55      4. For each PHI in B, find or create a PHI in B' with an identical
  56         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  57         value as the PHI in B associated with the edge A->B.  Associate
  58         the new argument in the PHI in B' with the edge A->B.
  59
  60      5. Change the edge A->B to A->B'.
  61
  62         5a. This automatically deletes any PHI arguments associated with the
  63             edge A->B in B.
  64
  65         5b. This automatically associates each new argument added in step 4
  66             with the edge A->B'.
  67
  68      6. Repeat for other incoming edges into B.
  69
  70      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  71
  72    Note that block duplication can be minimized by first collecting the
  73    set of unique destination blocks that the incoming edges should
  74    be threaded to.
  75
  76    We reduce the number of edges and statements we create by not copying all
  77    the outgoing edges and the control statement in step #1.  We instead create
  78    a template block without the outgoing edges and duplicate the template.
  79
  80    Another case this code handles is threading through a "joiner" block.  In
  81    this case, we do not know the destination of the joiner block, but one
  82    of the outgoing edges from the joiner block leads to a threadable path.  This
  83    case largely works as outlined above, except the duplicate of the joiner
  84    block still contains a full set of outgoing edges and its control statement.
  85    We just redirect one of its outgoing edges to our jump threading path.  */
  86
  87
  88 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  89    all the incoming edges which thread to the same destination edge at
  90    the same time.  That avoids lots of table lookups to get information
  91    for the destination edge.
  92
  93    To realize that implementation we create a list of incoming edges
  94    which thread to the same outgoing edge.  Thus to implement steps
  95    #5 and #6 we traverse our hash table of outgoing edge information.
  96    For each entry we walk the list of incoming edges which thread to
  97    the current outgoing edge.  */
  98
  99 struct el
 100 {
 101   edge e;
 102   struct el *next;
 103 };
 104
 105 /* Main data structure recording information regarding B's duplicate
 106    blocks.  */
 107
 108 /* We need to efficiently record the unique thread destinations of this
 109    block and specific information associated with those destinations.  We
 110    may have many incoming edges threaded to the same outgoing edge.  This
 111    can be naturally implemented with a hash table.  */
 112
 113 struct redirection_data : typed_free_remove<redirection_data>
 114 {
 115   /* We support wiring up two block duplicates in a jump threading path.
 116
 117      One is a normal block copy where we remove the control statement
 118      and wire up its single remaining outgoing edge to the thread path.
 119
 120      The other is a joiner block where we leave the control statement
 121      in place, but wire one of the outgoing edges to a thread path.
 122
 123      In theory we could have multiple block duplicates in a jump
 124      threading path, but I haven't tried that.
 125
 126      The duplicate blocks appear in this array in the same order in
 127      which they appear in the jump thread path.  */
 128   basic_block dup_blocks[2];
 129
 130   /* The jump threading path.  */
 131   vec<jump_thread_edge *> *path;
 132
 133   /* A list of incoming edges which we want to thread to the
 134      same path.  */
 135   struct el *incoming_edges;
 136
 137   /* hash_table support.  */
 138   typedef redirection_data value_type;
 139   typedef redirection_data compare_type;
 140   static inline hashval_t hash (const value_type *);
 141   static inline int equal (const value_type *, const compare_type *);
 142 };
 143
 144 /* Simple hashing function.  For any given incoming edge E, we're going
 145    to be most concerned with the final destination of its jump thread
 146    path.  So hash on the block index of the final edge in the path.  */
 147
 148 inline hashval_t
 149 redirection_data::hash (const value_type *p)
 150 {
 151   vec<jump_thread_edge *> *path = p->path;
 152   return path->last ()->e->dest->index;
 153 }
 154
 155 /* Given two hash table entries, return true if they have the same
 156    jump threading path.  */
 157 inline int
 158 redirection_data::equal (const value_type *p1, const compare_type *p2)
 159 {
 160   vec<jump_thread_edge *> *path1 = p1->path;
 161   vec<jump_thread_edge *> *path2 = p2->path;
 162
 163   if (path1->length () != path2->length ())
 164     return false;
 165
 166   for (unsigned int i = 1; i < path1->length (); i++)
 167     {
 168       if ((*path1)[i]->type != (*path2)[i]->type
 169           || (*path1)[i]->e != (*path2)[i]->e)
 170         return false;
 171     }
 172
 173   return true;
 174 }
 175
 176 /* Data structure of information to pass to hash table traversal routines.  */
 177 struct ssa_local_info_t
 178 {
 179   /* The current block we are working on.  */
 180   basic_block bb;
 181
 182   /* We only create a template block for the first duplicated block in a
 183      jump threading path as we may need many duplicates of that block.
 184
 185      The second duplicate block in a path is specific to that path.  Creating
 186      and sharing a template for that block is considerably more difficult.  */
 187   basic_block template_block;
 188
 189   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 190   bool jumps_threaded;
 191 };
 192
 193 /* Passes which use the jump threading code register jump threading
 194    opportunities as they are discovered.  We keep the registered
 195    jump threading opportunities in this vector as edge pairs
 196    (original_edge, target_edge).  */
 197 static vec<vec<jump_thread_edge *> *> paths;
 198
 199 /* When we start updating the CFG for threading, data necessary for jump
 200    threading is attached to the AUX field for the incoming edge.  Use these
 201    macros to access the underlying structure attached to the AUX field.  */
 202 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 203
 204 /* Jump threading statistics.  */
 205
 206 struct thread_stats_d
 207 {
 208   unsigned long num_threaded_edges;
 209 };
 210
 211 struct thread_stats_d thread_stats;
 212
 213
 214 /* Remove the last statement in block BB if it is a control statement
 215    Also remove all outgoing edges except the edge which reaches DEST_BB.
 216    If DEST_BB is NULL, then remove all outgoing edges.  */
 217
 218 static void
 219 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 220 {
 221   gimple_stmt_iterator gsi;
 222   edge e;
 223   edge_iterator ei;
 224
 225   gsi = gsi_last_bb (bb);
 226
 227   /* If the duplicate ends with a control statement, then remove it.
 228
 229      Note that if we are duplicating the template block rather than the
 230      original basic block, then the duplicate might not have any real
 231      statements in it.  */
 232   if (!gsi_end_p (gsi)
 233       && gsi_stmt (gsi)
 234       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 235           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 236           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 237     gsi_remove (&gsi, true);
 238
 239   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 240     {
 241       if (e->dest != dest_bb)
 242         remove_edge (e);
 243       else
 244         ei_next (&ei);
 245     }
 246 }
 247
 248 /* Create a duplicate of BB.  Record the duplicate block in an array
 249    indexed by COUNT stored in RD.  */
 250
 251 static void
 252 create_block_for_threading (basic_block bb,
 253                             struct redirection_data *rd,
 254                             unsigned int count)
 255 {
 256   edge_iterator ei;
 257   edge e;
 258
 259   /* We can use the generic block duplication code and simply remove
 260      the stuff we do not need.  */
 261   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 262
 263   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 264     e->aux = NULL;
 265
 266   /* Zero out the profile, since the block is unreachable for now.  */
 267   rd->dup_blocks[count]->frequency = 0;
 268   rd->dup_blocks[count]->count = 0;
 269 }
 270
 271 /* Main data structure to hold information for duplicates of BB.  */
 272
 273 static hash_table <redirection_data> redirection_data;
 274
 275 /* Given an outgoing edge E lookup and return its entry in our hash table.
 276
 277    If INSERT is true, then we insert the entry into the hash table if
 278    it is not already present.  INCOMING_EDGE is added to the list of incoming
 279    edges associated with E in the hash table.  */
 280
 281 static struct redirection_data *
 282 lookup_redirection_data (edge e, enum insert_option insert)
 283 {
 284   struct redirection_data **slot;
 285   struct redirection_data *elt;
 286   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 287
 288  /* Build a hash table element so we can see if E is already
 289      in the table.  */
 290   elt = XNEW (struct redirection_data);
 291   elt->path = path;
 292   elt->dup_blocks[0] = NULL;
 293   elt->dup_blocks[1] = NULL;
 294   elt->incoming_edges = NULL;
 295
 296   slot = redirection_data.find_slot (elt, insert);
 297
 298   /* This will only happen if INSERT is false and the entry is not
 299      in the hash table.  */
 300   if (slot == NULL)
 301     {
 302       free (elt);
 303       return NULL;
 304     }
 305
 306   /* This will only happen if E was not in the hash table and
 307      INSERT is true.  */
 308   if (*slot == NULL)
 309     {
 310       *slot = elt;
 311       elt->incoming_edges = XNEW (struct el);
 312       elt->incoming_edges->e = e;
 313       elt->incoming_edges->next = NULL;
 314       return elt;
 315     }
 316   /* E was in the hash table.  */
 317   else
 318     {
 319       /* Free ELT as we do not need it anymore, we will extract the
 320          relevant entry from the hash table itself.  */
 321       free (elt);
 322
 323       /* Get the entry stored in the hash table.  */
 324       elt = *slot;
 325
 326       /* If insertion was requested, then we need to add INCOMING_EDGE
 327          to the list of incoming edges associated with E.  */
 328       if (insert)
 329         {
 330           struct el *el = XNEW (struct el);
 331           el->next = elt->incoming_edges;
 332           el->e = e;
 333           elt->incoming_edges = el;
 334         }
 335
 336       return elt;
 337     }
 338 }
 339
 340 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
 341
 342 static void
 343 copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
 344 {
 345   gimple_stmt_iterator gsi;
 346   int src_indx = src_e->dest_idx;
 347
 348   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 349     {
 350       gimple phi = gsi_stmt (gsi);
 351       source_location locus = gimple_phi_arg_location (phi, src_indx);
 352       add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
 353     }
 354 }
 355
 356 /* We have recently made a copy of ORIG_BB, including its outgoing
 357    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 358    ORIG_BB has a new argument associated with edge from NEW_BB to the
 359    successor.  Initialize the PHI argument so that it is equal to the PHI
 360    argument associated with the edge from ORIG_BB to the successor.  */
 361
 362 static void
 363 update_destination_phis (basic_block orig_bb, basic_block new_bb)
 364 {
 365   edge_iterator ei;
 366   edge e;
 367
 368   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 369     {
 370       edge e2 = find_edge (new_bb, e->dest);
 371       copy_phi_args (e->dest, e, e2);
 372     }
 373 }
 374
 375 /* Given a duplicate block and its single destination (both stored
 376    in RD).  Create an edge between the duplicate and its single
 377    destination.
 378
 379    Add an additional argument to any PHI nodes at the single
 380    destination.  */
 381
 382 static void
 383 create_edge_and_update_destination_phis (struct redirection_data *rd,
 384                                          basic_block bb)
 385 {
 386   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 387
 388   rescan_loop_exit (e, true, false);
 389   e->probability = REG_BR_PROB_BASE;
 390   e->count = bb->count;
 391
 392   /* We have to copy path -- which means creating a new vector as well
 393      as all the jump_thread_edge entries.  */
 394   if (rd->path->last ()->e->aux)
 395     {
 396       vec<jump_thread_edge *> *path = THREAD_PATH (rd->path->last ()->e);
 397       vec<jump_thread_edge *> *copy = new vec<jump_thread_edge *> ();
 398
 399       /* Sadly, the elements of the vector are pointers and need to
 400          be copied as well.  */
 401       for (unsigned int i = 0; i < path->length (); i++)
 402         {
 403           jump_thread_edge *x
 404             = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type);
 405           copy->safe_push (x);
 406         }
 407       e->aux = (void *)copy;
 408     }
 409   else
 410     {
 411       e->aux = NULL;
 412     }
 413
 414   /* If there are any PHI nodes at the destination of the outgoing edge
 415      from the duplicate block, then we will need to add a new argument
 416      to them.  The argument should have the same value as the argument
 417      associated with the outgoing edge stored in RD.  */
 418   copy_phi_args (e->dest, rd->path->last ()->e, e);
 419 }
 420
 421 /* Wire up the outgoing edges from the duplicate block and
 422    update any PHIs as needed.  */
 423 void
 424 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
 425                                ssa_local_info_t *local_info)
 426 {
 427   edge e = rd->incoming_edges->e;
 428   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 429
 430   /* If we were threading through an joiner block, then we want
 431      to keep its control statement and redirect an outgoing edge.
 432      Else we want to remove the control statement & edges, then create
 433      a new outgoing edge.  In both cases we may need to update PHIs.  */
 434   if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 435     {
 436       edge victim;
 437       edge e2;
 438
 439       /* This updates the PHIs at the destination of the duplicate
 440          block.  */
 441       update_destination_phis (local_info->bb, rd->dup_blocks[0]);
 442
 443       /* Find the edge from the duplicate block to the block we're
 444          threading through.  That's the edge we want to redirect.  */
 445       victim = find_edge (rd->dup_blocks[0], (*path)[1]->e->dest);
 446       e2 = redirect_edge_and_branch (victim, path->last ()->e->dest);
 447       e2->count = path->last ()->e->count;
 448
 449       /* If we redirected the edge, then we need to copy PHI arguments
 450          at the target.  If the edge already existed (e2 != victim case),
 451          then the PHIs in the target already have the correct arguments.  */
 452       if (e2 == victim)
 453         copy_phi_args (e2->dest, path->last ()->e, e2);
 454     }
 455   else
 456     {
 457       remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[0], NULL);
 458       create_edge_and_update_destination_phis (rd, rd->dup_blocks[0]);
 459     }
 460 }
 461 /* Hash table traversal callback routine to create duplicate blocks.  */
 462
 463 int
 464 ssa_create_duplicates (struct redirection_data **slot,
 465                        ssa_local_info_t *local_info)
 466 {
 467   struct redirection_data *rd = *slot;
 468
 469   /* The second duplicated block in a jump threading path is specific
 470      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
 471
 472      Each time we're called, we have to look through the path and see
 473      if a second block needs to be duplicated.
 474
 475      Note the search starts with the third edge on the path.  The first
 476      edge is the incoming edge, the second edge always has its source
 477      duplicated.  Thus we start our search with the third edge.  */
 478   vec<jump_thread_edge *> *path = rd->path;
 479   for (unsigned int i = 2; i < path->length (); i++)
 480     {
 481       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
 482           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 483         {
 484           create_block_for_threading ((*path)[i]->e->src, rd, 1);
 485           break;
 486         }
 487     }
 488
 489   /* Create a template block if we have not done so already.  Otherwise
 490      use the template to create a new block.  */
 491   if (local_info->template_block == NULL)
 492     {
 493       create_block_for_threading ((*path)[1]->e->src, rd, 0);
 494       local_info->template_block = rd->dup_blocks[0];
 495
 496       /* We do not create any outgoing edges for the template.  We will
 497          take care of that in a later traversal.  That way we do not
 498          create edges that are going to just be deleted.  */
 499     }
 500   else
 501     {
 502       create_block_for_threading (local_info->template_block, rd, 0);
 503
 504       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 505          block.   */
 506       ssa_fix_duplicate_block_edges (rd, local_info);
 507     }
 508
 509   /* Keep walking the hash table.  */
 510   return 1;
 511 }
 512
 513 /* We did not create any outgoing edges for the template block during
 514    block creation.  This hash table traversal callback creates the
 515    outgoing edge for the template block.  */
 516
 517 inline int
 518 ssa_fixup_template_block (struct redirection_data **slot,
 519                           ssa_local_info_t *local_info)
 520 {
 521   struct redirection_data *rd = *slot;
 522
 523   /* If this is the template block halt the traversal after updating
 524      it appropriately.
 525
 526      If we were threading through an joiner block, then we want
 527      to keep its control statement and redirect an outgoing edge.
 528      Else we want to remove the control statement & edges, then create
 529      a new outgoing edge.  In both cases we may need to update PHIs.  */
 530   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
 531     {
 532       ssa_fix_duplicate_block_edges (rd, local_info);
 533       return 0;
 534     }
 535
 536   return 1;
 537 }
 538
 539 /* Hash table traversal callback to redirect each incoming edge
 540    associated with this hash table element to its new destination.  */
 541
 542 int
 543 ssa_redirect_edges (struct redirection_data **slot,
 544                     ssa_local_info_t *local_info)
 545 {
 546   struct redirection_data *rd = *slot;
 547   struct el *next, *el;
 548
 549   /* Walk over all the incoming edges associated associated with this
 550      hash table entry.  */
 551   for (el = rd->incoming_edges; el; el = next)
 552     {
 553       edge e = el->e;
 554       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 555
 556       /* Go ahead and free this element from the list.  Doing this now
 557          avoids the need for another list walk when we destroy the hash
 558          table.  */
 559       next = el->next;
 560       free (el);
 561
 562       thread_stats.num_threaded_edges++;
 563
 564       if (rd->dup_blocks[0])
 565         {
 566           edge e2;
 567
 568           if (dump_file && (dump_flags & TDF_DETAILS))
 569             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 570                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
 571
 572           rd->dup_blocks[0]->count += e->count;
 573
 574           /* Excessive jump threading may make frequencies large enough so
 575              the computation overflows.  */
 576           if (rd->dup_blocks[0]->frequency < BB_FREQ_MAX * 2)
 577             rd->dup_blocks[0]->frequency += EDGE_FREQUENCY (e);
 578
 579           /* In the case of threading through a joiner block, the outgoing
 580              edges from the duplicate block were updated when they were
 581              redirected during ssa_fix_duplicate_block_edges.  */
 582           if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 583             EDGE_SUCC (rd->dup_blocks[0], 0)->count += e->count;
 584
 585           /* Redirect the incoming edge (possibly to the joiner block) to the
 586              appropriate duplicate block.  */
 587           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
 588           gcc_assert (e == e2);
 589           flush_pending_stmts (e2);
 590         }
 591
 592       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 593          to clear it will cause all kinds of unpleasant problems later.  */
 594       delete_jump_thread_path (path);
 595       e->aux = NULL;
 596
 597     }
 598
 599   /* Indicate that we actually threaded one or more jumps.  */
 600   if (rd->incoming_edges)
 601     local_info->jumps_threaded = true;
 602
 603   return 1;
 604 }
 605
 606 /* Return true if this block has no executable statements other than
 607    a simple ctrl flow instruction.  When the number of outgoing edges
 608    is one, this is equivalent to a "forwarder" block.  */
 609
 610 static bool
 611 redirection_block_p (basic_block bb)
 612 {
 613   gimple_stmt_iterator gsi;
 614
 615   /* Advance to the first executable statement.  */
 616   gsi = gsi_start_bb (bb);
 617   while (!gsi_end_p (gsi)
 618          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 619              || is_gimple_debug (gsi_stmt (gsi))
 620              || gimple_nop_p (gsi_stmt (gsi))))
 621     gsi_next (&gsi);
 622
 623   /* Check if this is an empty block.  */
 624   if (gsi_end_p (gsi))
 625     return true;
 626
 627   /* Test that we've reached the terminating control statement.  */
 628   return gsi_stmt (gsi)
 629          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 630              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 631              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 632 }
 633
 634 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 635    is reached via one or more specific incoming edges, we know which
 636    outgoing edge from BB will be traversed.
 637
 638    We want to redirect those incoming edges to the target of the
 639    appropriate outgoing edge.  Doing so avoids a conditional branch
 640    and may expose new optimization opportunities.  Note that we have
 641    to update dominator tree and SSA graph after such changes.
 642
 643    The key to keeping the SSA graph update manageable is to duplicate
 644    the side effects occurring in BB so that those side effects still
 645    occur on the paths which bypass BB after redirecting edges.
 646
 647    We accomplish this by creating duplicates of BB and arranging for
 648    the duplicates to unconditionally pass control to one specific
 649    successor of BB.  We then revector the incoming edges into BB to
 650    the appropriate duplicate of BB.
 651
 652    If NOLOOP_ONLY is true, we only perform the threading as long as it
 653    does not affect the structure of the loops in a nontrivial way.
 654
 655    If JOINERS is true, then thread through joiner blocks as well.  */
 656
 657 static bool
 658 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
 659 {
 660   /* E is an incoming edge into BB that we may or may not want to
 661      redirect to a duplicate of BB.  */
 662   edge e, e2;
 663   edge_iterator ei;
 664   ssa_local_info_t local_info;
 665   struct loop *loop = bb->loop_father;
 666
 667   /* To avoid scanning a linear array for the element we need we instead
 668      use a hash table.  For normal code there should be no noticeable
 669      difference.  However, if we have a block with a large number of
 670      incoming and outgoing edges such linear searches can get expensive.  */
 671   redirection_data.create (EDGE_COUNT (bb->succs));
 672
 673   /* If we thread the latch of the loop to its exit, the loop ceases to
 674      exist.  Make sure we do not restrict ourselves in order to preserve
 675      this loop.  */
 676   if (loop->header == bb)
 677     {
 678       e = loop_latch_edge (loop);
 679       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 680
 681       if (path
 682           && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
 683               || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
 684         {
 685           for (unsigned int i = 1; i < path->length (); i++)
 686             {
 687               edge e2 = (*path)[i]->e;
 688
 689               if (loop_exit_edge_p (loop, e2))
 690                 {
 691                   loop->header = NULL;
 692                   loop->latch = NULL;
 693                   loops_state_set (LOOPS_NEED_FIXUP);
 694                 }
 695             }
 696         }
 697     }
 698
 699   /* Record each unique threaded destination into a hash table for
 700      efficient lookups.  */
 701   FOR_EACH_EDGE (e, ei, bb->preds)
 702     {
 703       if (e->aux == NULL)
 704         continue;
 705
 706       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 707
 708       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
 709           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
 710         continue;
 711
 712       e2 = path->last ()->e;
 713       if (!e2 || noloop_only)
 714         {
 715           /* If NOLOOP_ONLY is true, we only allow threading through the
 716              header of a loop to exit edges.
 717
 718              There are two cases to consider.  The first when BB is the
 719              loop header.  We will attempt to thread this elsewhere, so
 720              we can just continue here.  */
 721
 722           if (bb == bb->loop_father->header
 723               && (!loop_exit_edge_p (bb->loop_father, e2)
 724                   || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
 725             continue;
 726
 727
 728           /* The second occurs when there was loop header buried in a jump
 729              threading path.  We do not try and thread this elsewhere, so
 730              just cancel the jump threading request by clearing the AUX
 731              field now.  */
 732           if ((bb->loop_father != e2->src->loop_father
 733                && !loop_exit_edge_p (e2->src->loop_father, e2))
 734               || (e2->src->loop_father != e2->dest->loop_father
 735                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
 736             {
 737               /* Since this case is not handled by our special code
 738                  to thread through a loop header, we must explicitly
 739                  cancel the threading request here.  */
 740               delete_jump_thread_path (path);
 741               e->aux = NULL;
 742               continue;
 743             }
 744         }
 745
 746       if (e->dest == e2->src)
 747         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 748                                          e->count, (*THREAD_PATH (e))[1]->e);
 749
 750       /* Insert the outgoing edge into the hash table if it is not
 751          already in the hash table.  */
 752       lookup_redirection_data (e, INSERT);
 753     }
 754
 755   /* We do not update dominance info.  */
 756   free_dominance_info (CDI_DOMINATORS);
 757
 758   /* We know we only thread through the loop header to loop exits.
 759      Let the basic block duplication hook know we are not creating
 760      a multiple entry loop.  */
 761   if (noloop_only
 762       && bb == bb->loop_father->header)
 763     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
 764
 765   /* Now create duplicates of BB.
 766
 767      Note that for a block with a high outgoing degree we can waste
 768      a lot of time and memory creating and destroying useless edges.
 769
 770      So we first duplicate BB and remove the control structure at the
 771      tail of the duplicate as well as all outgoing edges from the
 772      duplicate.  We then use that duplicate block as a template for
 773      the rest of the duplicates.  */
 774   local_info.template_block = NULL;
 775   local_info.bb = bb;
 776   local_info.jumps_threaded = false;
 777   redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates>
 778                             (&local_info);
 779
 780   /* The template does not have an outgoing edge.  Create that outgoing
 781      edge and update PHI nodes as the edge's target as necessary.
 782
 783      We do this after creating all the duplicates to avoid creating
 784      unnecessary edges.  */
 785   redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block>
 786                             (&local_info);
 787
 788   /* The hash table traversals above created the duplicate blocks (and the
 789      statements within the duplicate blocks).  This loop creates PHI nodes for
 790      the duplicated blocks and redirects the incoming edges into BB to reach
 791      the duplicates of BB.  */
 792   redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges>
 793                             (&local_info);
 794
 795   /* Done with this block.  Clear REDIRECTION_DATA.  */
 796   redirection_data.dispose ();
 797
 798   if (noloop_only
 799       && bb == bb->loop_father->header)
 800     set_loop_copy (bb->loop_father, NULL);
 801
 802   /* Indicate to our caller whether or not any jumps were threaded.  */
 803   return local_info.jumps_threaded;
 804 }
 805
 806 /* Wrapper for thread_block_1 so that we can first handle jump
 807    thread paths which do not involve copying joiner blocks, then
 808    handle jump thread paths which have joiner blocks.
 809
 810    By doing things this way we can be as aggressive as possible and
 811    not worry that copying a joiner block will create a jump threading
 812    opportunity.  */
 813
 814 static bool
 815 thread_block (basic_block bb, bool noloop_only)
 816 {
 817   bool retval;
 818   retval = thread_block_1 (bb, noloop_only, false);
 819   retval |= thread_block_1 (bb, noloop_only, true);
 820   return retval;
 821 }
 822
 823
 824 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
 825    copy of E->dest created during threading, or E->dest if it was not necessary
 826    to copy it (E is its single predecessor).  */
 827
 828 static basic_block
 829 thread_single_edge (edge e)
 830 {
 831   basic_block bb = e->dest;
 832   struct redirection_data rd;
 833   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 834   edge eto = (*path)[1]->e;
 835
 836   for (unsigned int i = 0; i < path->length (); i++)
 837     delete (*path)[i];
 838   delete path;
 839   e->aux = NULL;
 840
 841   thread_stats.num_threaded_edges++;
 842
 843   if (single_pred_p (bb))
 844     {
 845       /* If BB has just a single predecessor, we should only remove the
 846          control statements at its end, and successors except for ETO.  */
 847       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 848
 849       /* And fixup the flags on the single remaining edge.  */
 850       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 851       eto->flags |= EDGE_FALLTHRU;
 852
 853       return bb;
 854     }
 855
 856   /* Otherwise, we need to create a copy.  */
 857   if (e->dest == eto->src)
 858     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 859
 860   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
 861   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
 862   npath->safe_push (x);
 863
 864   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
 865   npath->safe_push (x);
 866   rd.path = npath;
 867
 868   create_block_for_threading (bb, &rd, 0);
 869   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
 870   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0]);
 871
 872   if (dump_file && (dump_flags & TDF_DETAILS))
 873     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 874              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
 875
 876   rd.dup_blocks[0]->count = e->count;
 877   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
 878   single_succ_edge (rd.dup_blocks[0])->count = e->count;
 879   redirect_edge_and_branch (e, rd.dup_blocks[0]);
 880   flush_pending_stmts (e);
 881
 882   return rd.dup_blocks[0];
 883 }
 884
 885 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 886    from STOP and DBDS_CE_STOP.  */
 887
 888 static basic_block dbds_ce_stop;
 889 static bool
 890 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 891 {
 892   return (bb != (const_basic_block) stop
 893           && bb != dbds_ce_stop);
 894 }
 895
 896 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 897    returns the state.  */
 898
 899 enum bb_dom_status
 900 {
 901   /* BB does not dominate latch of the LOOP.  */
 902   DOMST_NONDOMINATING,
 903   /* The LOOP is broken (there is no path from the header to its latch.  */
 904   DOMST_LOOP_BROKEN,
 905   /* BB dominates the latch of the LOOP.  */
 906   DOMST_DOMINATING
 907 };
 908
 909 static enum bb_dom_status
 910 determine_bb_domination_status (struct loop *loop, basic_block bb)
 911 {
 912   basic_block *bblocks;
 913   unsigned nblocks, i;
 914   bool bb_reachable = false;
 915   edge_iterator ei;
 916   edge e;
 917
 918   /* This function assumes BB is a successor of LOOP->header.
 919      If that is not the case return DOMST_NONDOMINATING which
 920      is always safe.  */
 921     {
 922       bool ok = false;
 923
 924       FOR_EACH_EDGE (e, ei, bb->preds)
 925         {
 926           if (e->src == loop->header)
 927             {
 928               ok = true;
 929               break;
 930             }
 931         }
 932
 933       if (!ok)
 934         return DOMST_NONDOMINATING;
 935     }
 936
 937   if (bb == loop->latch)
 938     return DOMST_DOMINATING;
 939
 940   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 941      from it.  */
 942
 943   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 944   dbds_ce_stop = loop->header;
 945   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 946                                 bblocks, loop->num_nodes, bb);
 947   for (i = 0; i < nblocks; i++)
 948     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 949       {
 950         if (e->src == loop->header)
 951           {
 952             free (bblocks);
 953             return DOMST_NONDOMINATING;
 954           }
 955         if (e->src == bb)
 956           bb_reachable = true;
 957       }
 958
 959   free (bblocks);
 960   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 961 }
 962
 963 /* Return true if BB is part of the new pre-header that is created
 964    when threading the latch to DATA.  */
 965
 966 static bool
 967 def_split_header_continue_p (const_basic_block bb, const void *data)
 968 {
 969   const_basic_block new_header = (const_basic_block) data;
 970   const struct loop *l;
 971
 972   if (bb == new_header
 973       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
 974     return false;
 975   for (l = bb->loop_father; l; l = loop_outer (l))
 976     if (l == new_header->loop_father)
 977       return true;
 978   return false;
 979 }
 980
 981 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 982    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 983    to the inside of the loop.  */
 984
 985 static bool
 986 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 987 {
 988   basic_block header = loop->header;
 989   edge e, tgt_edge, latch = loop_latch_edge (loop);
 990   edge_iterator ei;
 991   basic_block tgt_bb, atgt_bb;
 992   enum bb_dom_status domst;
 993
 994   /* We have already threaded through headers to exits, so all the threading
 995      requests now are to the inside of the loop.  We need to avoid creating
 996      irreducible regions (i.e., loops with more than one entry block), and
 997      also loop with several latch edges, or new subloops of the loop (although
 998      there are cases where it might be appropriate, it is difficult to decide,
 999      and doing it wrongly may confuse other optimizers).
1000
1001      We could handle more general cases here.  However, the intention is to
1002      preserve some information about the loop, which is impossible if its
1003      structure changes significantly, in a way that is not well understood.
1004      Thus we only handle few important special cases, in which also updating
1005      of the loop-carried information should be feasible:
1006
1007      1) Propagation of latch edge to a block that dominates the latch block
1008         of a loop.  This aims to handle the following idiom:
1009
1010         first = 1;
1011         while (1)
1012           {
1013             if (first)
1014               initialize;
1015             first = 0;
1016             body;
1017           }
1018
1019         After threading the latch edge, this becomes
1020
1021         first = 1;
1022         if (first)
1023           initialize;
1024         while (1)
1025           {
1026             first = 0;
1027             body;
1028           }
1029
1030         The original header of the loop is moved out of it, and we may thread
1031         the remaining edges through it without further constraints.
1032
1033      2) All entry edges are propagated to a single basic block that dominates
1034         the latch block of the loop.  This aims to handle the following idiom
1035         (normally created for "for" loops):
1036
1037         i = 0;
1038         while (1)
1039           {
1040             if (i >= 100)
1041               break;
1042             body;
1043             i++;
1044           }
1045
1046         This becomes
1047
1048         i = 0;
1049         while (1)
1050           {
1051             body;
1052             i++;
1053             if (i >= 100)
1054               break;
1055           }
1056      */
1057
1058   /* Threading through the header won't improve the code if the header has just
1059      one successor.  */
1060   if (single_succ_p (header))
1061     goto fail;
1062
1063   if (latch->aux)
1064     {
1065       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1066       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1067         goto fail;
1068       tgt_edge = (*path)[1]->e;
1069       tgt_bb = tgt_edge->dest;
1070     }
1071   else if (!may_peel_loop_headers
1072            && !redirection_block_p (loop->header))
1073     goto fail;
1074   else
1075     {
1076       tgt_bb = NULL;
1077       tgt_edge = NULL;
1078       FOR_EACH_EDGE (e, ei, header->preds)
1079         {
1080           if (!e->aux)
1081             {
1082               if (e == latch)
1083                 continue;
1084
1085               /* If latch is not threaded, and there is a header
1086                  edge that is not threaded, we would create loop
1087                  with multiple entries.  */
1088               goto fail;
1089             }
1090
1091           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1092
1093           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1094             goto fail;
1095           tgt_edge = (*path)[1]->e;
1096           atgt_bb = tgt_edge->dest;
1097           if (!tgt_bb)
1098             tgt_bb = atgt_bb;
1099           /* Two targets of threading would make us create loop
1100              with multiple entries.  */
1101           else if (tgt_bb != atgt_bb)
1102             goto fail;
1103         }
1104
1105       if (!tgt_bb)
1106         {
1107           /* There are no threading requests.  */
1108           return false;
1109         }
1110
1111       /* Redirecting to empty loop latch is useless.  */
1112       if (tgt_bb == loop->latch
1113           && empty_block_p (loop->latch))
1114         goto fail;
1115     }
1116
1117   /* The target block must dominate the loop latch, otherwise we would be
1118      creating a subloop.  */
1119   domst = determine_bb_domination_status (loop, tgt_bb);
1120   if (domst == DOMST_NONDOMINATING)
1121     goto fail;
1122   if (domst == DOMST_LOOP_BROKEN)
1123     {
1124       /* If the loop ceased to exist, mark it as such, and thread through its
1125          original header.  */
1126       loop->header = NULL;
1127       loop->latch = NULL;
1128       loops_state_set (LOOPS_NEED_FIXUP);
1129       return thread_block (header, false);
1130     }
1131
1132   if (tgt_bb->loop_father->header == tgt_bb)
1133     {
1134       /* If the target of the threading is a header of a subloop, we need
1135          to create a preheader for it, so that the headers of the two loops
1136          do not merge.  */
1137       if (EDGE_COUNT (tgt_bb->preds) > 2)
1138         {
1139           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1140           gcc_assert (tgt_bb != NULL);
1141         }
1142       else
1143         tgt_bb = split_edge (tgt_edge);
1144     }
1145
1146   if (latch->aux)
1147     {
1148       basic_block *bblocks;
1149       unsigned nblocks, i;
1150
1151       /* First handle the case latch edge is redirected.  We are copying
1152          the loop header but not creating a multiple entry loop.  Make the
1153          cfg manipulation code aware of that fact.  */
1154       set_loop_copy (loop, loop);
1155       loop->latch = thread_single_edge (latch);
1156       set_loop_copy (loop, NULL);
1157       gcc_assert (single_succ (loop->latch) == tgt_bb);
1158       loop->header = tgt_bb;
1159
1160       /* Remove the new pre-header blocks from our loop.  */
1161       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1162       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1163                                     bblocks, loop->num_nodes, tgt_bb);
1164       for (i = 0; i < nblocks; i++)
1165         if (bblocks[i]->loop_father == loop)
1166           {
1167             remove_bb_from_loops (bblocks[i]);
1168             add_bb_to_loop (bblocks[i], loop_outer (loop));
1169           }
1170       free (bblocks);
1171
1172       /* If the new header has multiple latches mark it so.  */
1173       FOR_EACH_EDGE (e, ei, loop->header->preds)
1174         if (e->src->loop_father == loop
1175             && e->src != loop->latch)
1176           {
1177             loop->latch = NULL;
1178             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1179           }
1180
1181       /* Cancel remaining threading requests that would make the
1182          loop a multiple entry loop.  */
1183       FOR_EACH_EDGE (e, ei, header->preds)
1184         {
1185           edge e2;
1186
1187           if (e->aux == NULL)
1188             continue;
1189
1190           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1191           e2 = path->last ()->e;
1192
1193           if (e->src->loop_father != e2->dest->loop_father
1194               && e2->dest != loop->header)
1195             {
1196               delete_jump_thread_path (path);
1197               e->aux = NULL;
1198             }
1199         }
1200
1201       /* Thread the remaining edges through the former header.  */
1202       thread_block (header, false);
1203     }
1204   else
1205     {
1206       basic_block new_preheader;
1207
1208       /* Now consider the case entry edges are redirected to the new entry
1209          block.  Remember one entry edge, so that we can find the new
1210          preheader (its destination after threading).  */
1211       FOR_EACH_EDGE (e, ei, header->preds)
1212         {
1213           if (e->aux)
1214             break;
1215         }
1216
1217       /* The duplicate of the header is the new preheader of the loop.  Ensure
1218          that it is placed correctly in the loop hierarchy.  */
1219       set_loop_copy (loop, loop_outer (loop));
1220
1221       thread_block (header, false);
1222       set_loop_copy (loop, NULL);
1223       new_preheader = e->dest;
1224
1225       /* Create the new latch block.  This is always necessary, as the latch
1226          must have only a single successor, but the original header had at
1227          least two successors.  */
1228       loop->latch = NULL;
1229       mfb_kj_edge = single_succ_edge (new_preheader);
1230       loop->header = mfb_kj_edge->dest;
1231       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1232       loop->header = latch->dest;
1233       loop->latch = latch->src;
1234     }
1235
1236   return true;
1237
1238 fail:
1239   /* We failed to thread anything.  Cancel the requests.  */
1240   FOR_EACH_EDGE (e, ei, header->preds)
1241     {
1242       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1243
1244       if (path)
1245         {
1246           delete_jump_thread_path (path);
1247           e->aux = NULL;
1248         }
1249     }
1250   return false;
1251 }
1252
1253 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1254    PHI arguments associated with those edges are equal or there are no
1255    PHI arguments, otherwise return FALSE.  */
1256
1257 static bool
1258 phi_args_equal_on_edges (edge e1, edge e2)
1259 {
1260   gimple_stmt_iterator gsi;
1261   int indx1 = e1->dest_idx;
1262   int indx2 = e2->dest_idx;
1263
1264   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1265     {
1266       gimple phi = gsi_stmt (gsi);
1267
1268       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1269                             gimple_phi_arg_def (phi, indx2), 0))
1270         return false;
1271     }
1272   return true;
1273 }
1274
1275 /* Walk through the registered jump threads and convert them into a
1276    form convenient for this pass.
1277
1278    Any block which has incoming edges threaded to outgoing edges
1279    will have its entry in THREADED_BLOCK set.
1280
1281    Any threaded edge will have its new outgoing edge stored in the
1282    original edge's AUX field.
1283
1284    This form avoids the need to walk all the edges in the CFG to
1285    discover blocks which need processing and avoids unnecessary
1286    hash table lookups to map from threaded edge to new target.  */
1287
1288 static void
1289 mark_threaded_blocks (bitmap threaded_blocks)
1290 {
1291   unsigned int i;
1292   bitmap_iterator bi;
1293   bitmap tmp = BITMAP_ALLOC (NULL);
1294   basic_block bb;
1295   edge e;
1296   edge_iterator ei;
1297
1298   /* Move the jump threading requests from PATHS to each edge
1299      which starts a jump thread path.  */
1300   for (i = 0; i < paths.length (); i++)
1301     {
1302       vec<jump_thread_edge *> *path = paths[i];
1303       edge e = (*path)[0]->e;
1304       e->aux = (void *)path;
1305       bitmap_set_bit (tmp, e->dest->index);
1306     }
1307
1308
1309
1310   /* If optimizing for size, only thread through block if we don't have
1311      to duplicate it or it's an otherwise empty redirection block.  */
1312   if (optimize_function_for_size_p (cfun))
1313     {
1314       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1315         {
1316           bb = BASIC_BLOCK (i);
1317           if (EDGE_COUNT (bb->preds) > 1
1318               && !redirection_block_p (bb))
1319             {
1320               FOR_EACH_EDGE (e, ei, bb->preds)
1321                 {
1322                   if (e->aux)
1323                     {
1324                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1325                       delete_jump_thread_path (path);
1326                       e->aux = NULL;
1327                     }
1328                 }
1329             }
1330           else
1331             bitmap_set_bit (threaded_blocks, i);
1332         }
1333     }
1334   else
1335     bitmap_copy (threaded_blocks, tmp);
1336
1337   /* Look for jump threading paths which cross multiple loop headers.
1338
1339      The code to thread through loop headers will change the CFG in ways
1340      that break assumptions made by the loop optimization code.
1341
1342      We don't want to blindly cancel the requests.  We can instead do better
1343      by trimming off the end of the jump thread path.  */
1344   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1345     {
1346       basic_block bb = BASIC_BLOCK (i);
1347       FOR_EACH_EDGE (e, ei, bb->preds)
1348         {
1349           if (e->aux)
1350             {
1351               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1352
1353               /* Basically we're looking for a situation where we can see
1354                  3 or more loop structures on a jump threading path.  */
1355
1356               struct loop *first_father = (*path)[0]->e->src->loop_father;
1357               struct loop *second_father = NULL;
1358               for (unsigned int i = 0; i < path->length (); i++)
1359                 {
1360                   /* See if this is a loop father we have not seen before.  */
1361                   if ((*path)[i]->e->dest->loop_father != first_father
1362                       && (*path)[i]->e->dest->loop_father != second_father)
1363                     {
1364                       /* We've already seen two loop fathers, so we
1365                          need to trim this jump threading path.  */
1366                       if (second_father != NULL)
1367                         {
1368                           /* Trim from entry I onwards.  */
1369                           for (unsigned int j = i; j < path->length (); j++)
1370                             delete (*path)[j];
1371                           path->truncate (i);
1372
1373                           /* Now that we've truncated the path, make sure
1374                              what's left is still valid.   We need at least
1375                              two edges on the path and the last edge can not
1376                              be a joiner.  This should never happen, but let's
1377                              be safe.  */
1378                           if (path->length () < 2
1379                               || (path->last ()->type
1380                                   == EDGE_COPY_SRC_JOINER_BLOCK))
1381                             {
1382                               delete_jump_thread_path (path);
1383                               e->aux = NULL;
1384                             }
1385                           break;
1386                         }
1387                       else
1388                         {
1389                           second_father = (*path)[i]->e->dest->loop_father;
1390                         }
1391                     }
1392                 }
1393             }
1394         }
1395     }
1396
1397   /* If we have a joiner block (J) which has two successors S1 and S2 and
1398      we are threading though S1 and the final destination of the thread
1399      is S2, then we must verify that any PHI nodes in S2 have the same
1400      PHI arguments for the edge J->S2 and J->S1->...->S2.
1401
1402      We used to detect this prior to registering the jump thread, but
1403      that prohibits propagation of edge equivalences into non-dominated
1404      PHI nodes as the equivalency test might occur before propagation.
1405
1406      This must also occur after we truncate any jump threading paths
1407      as this scenario may only show up after truncation.
1408
1409      This works for now, but will need improvement as part of the FSA
1410      optimization.
1411
1412      Note since we've moved the thread request data to the edges,
1413      we have to iterate on those rather than the threaded_edges vector.  */
1414   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1415     {
1416       bb = BASIC_BLOCK (i);
1417       FOR_EACH_EDGE (e, ei, bb->preds)
1418         {
1419           if (e->aux)
1420             {
1421               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1422               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1423
1424               if (have_joiner)
1425                 {
1426                   basic_block joiner = e->dest;
1427                   edge final_edge = path->last ()->e;
1428                   basic_block final_dest = final_edge->dest;
1429                   edge e2 = find_edge (joiner, final_dest);
1430
1431                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1432                     {
1433                       delete_jump_thread_path (path);
1434                       e->aux = NULL;
1435                     }
1436                 }
1437             }
1438         }
1439     }
1440
1441   BITMAP_FREE (tmp);
1442 }
1443
1444
1445 /* Walk through all blocks and thread incoming edges to the appropriate
1446    outgoing edge for each edge pair recorded in THREADED_EDGES.
1447
1448    It is the caller's responsibility to fix the dominance information
1449    and rewrite duplicated SSA_NAMEs back into SSA form.
1450
1451    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1452    loop headers if it does not simplify the loop.
1453
1454    Returns true if one or more edges were threaded, false otherwise.  */
1455
1456 bool
1457 thread_through_all_blocks (bool may_peel_loop_headers)
1458 {
1459   bool retval = false;
1460   unsigned int i;
1461   bitmap_iterator bi;
1462   bitmap threaded_blocks;
1463   struct loop *loop;
1464   loop_iterator li;
1465
1466   /* We must know about loops in order to preserve them.  */
1467   gcc_assert (current_loops != NULL);
1468
1469   if (!paths.exists ())
1470     return false;
1471
1472   threaded_blocks = BITMAP_ALLOC (NULL);
1473   memset (&thread_stats, 0, sizeof (thread_stats));
1474
1475   mark_threaded_blocks (threaded_blocks);
1476
1477   initialize_original_copy_tables ();
1478
1479   /* First perform the threading requests that do not affect
1480      loop structure.  */
1481   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1482     {
1483       basic_block bb = BASIC_BLOCK (i);
1484
1485       if (EDGE_COUNT (bb->preds) > 0)
1486         retval |= thread_block (bb, true);
1487     }
1488
1489   /* Then perform the threading through loop headers.  We start with the
1490      innermost loop, so that the changes in cfg we perform won't affect
1491      further threading.  */
1492   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1493     {
1494       if (!loop->header
1495           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1496         continue;
1497
1498       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1499     }
1500
1501   /* Assume we had a jump thread path which went from the latch to the exit
1502      and a path which goes from outside to inside the same loop.
1503
1504      If the latch to exit was handled first, we will thread it and clear
1505      loop->header.
1506
1507      The second path will be ignored by thread_block because we're going
1508      through a loop header.  It will also be ignored by the loop above
1509      because loop->header is NULL.
1510
1511      This results in the second path never being threaded.  The failure
1512      mode is a dangling AUX field.
1513
1514      This is inherently a bit of a pain to fix, so we just walk all the
1515      blocks and all the incoming edges to those blocks and clear their
1516      AUX fields.  */
1517   basic_block bb;
1518   edge_iterator ei;
1519   edge e;
1520   FOR_EACH_BB (bb)
1521     {
1522       FOR_EACH_EDGE (e, ei, bb->preds)
1523         if (e->aux)
1524           {
1525             vec<jump_thread_edge *> *path = THREAD_PATH (e);
1526
1527             delete_jump_thread_path (path);
1528             e->aux = NULL;
1529           }
1530     }
1531
1532   statistics_counter_event (cfun, "Jumps threaded",
1533                             thread_stats.num_threaded_edges);
1534
1535   free_original_copy_tables ();
1536
1537   BITMAP_FREE (threaded_blocks);
1538   threaded_blocks = NULL;
1539   paths.release ();
1540
1541   if (retval)
1542     loops_state_set (LOOPS_NEED_FIXUP);
1543
1544   return retval;
1545 }
1546
1547 /* Delete the jump threading path PATH.  We have to explcitly delete
1548    each entry in the vector, then the container.  */
1549
1550 void
1551 delete_jump_thread_path (vec<jump_thread_edge *> *path)
1552 {
1553   for (unsigned int i = 0; i < path->length (); i++)
1554     delete (*path)[i];
1555   path->release();
1556 }
1557
1558 /* Dump a jump threading path, including annotations about each
1559    edge in the path.  */
1560
1561 static void
1562 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path)
1563 {
1564   fprintf (dump_file,
1565            "  Registering jump thread: (%d, %d) incoming edge; ",
1566            path[0]->e->src->index, path[0]->e->dest->index);
1567
1568   for (unsigned int i = 1; i < path.length (); i++)
1569     {
1570       /* We can get paths with a NULL edge when the final destination
1571          of a jump thread turns out to be a constant address.  We dump
1572          those paths when debugging, so we have to be prepared for that
1573          possibility here.  */
1574       if (path[i]->e == NULL)
1575         continue;
1576
1577       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1578         fprintf (dump_file, " (%d, %d) joiner; ",
1579                  path[i]->e->src->index, path[i]->e->dest->index);
1580       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
1581        fprintf (dump_file, " (%d, %d) normal;",
1582                  path[i]->e->src->index, path[i]->e->dest->index);
1583       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
1584        fprintf (dump_file, " (%d, %d) nocopy;",
1585                  path[i]->e->src->index, path[i]->e->dest->index);
1586     }
1587   fputc ('\n', dump_file);
1588 }
1589
1590 /* Register a jump threading opportunity.  We queue up all the jump
1591    threading opportunities discovered by a pass and update the CFG
1592    and SSA form all at once.
1593
1594    E is the edge we can thread, E2 is the new target edge, i.e., we
1595    are effectively recording that E->dest can be changed to E2->dest
1596    after fixing the SSA graph.  */
1597
1598 void
1599 register_jump_thread (vec<jump_thread_edge *> *path)
1600 {
1601   if (!dbg_cnt (registered_jump_thread))
1602     {
1603       delete_jump_thread_path (path);
1604       return;
1605     }
1606
1607   /* First make sure there are no NULL outgoing edges on the jump threading
1608      path.  That can happen for jumping to a constant address.  */
1609   for (unsigned int i = 0; i < path->length (); i++)
1610     if ((*path)[i]->e == NULL)
1611       {
1612         if (dump_file && (dump_flags & TDF_DETAILS))
1613           {
1614             fprintf (dump_file,
1615                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
1616             dump_jump_thread_path (dump_file, *path);
1617           }
1618
1619         delete_jump_thread_path (path);
1620         return;
1621       }
1622
1623   if (dump_file && (dump_flags & TDF_DETAILS))
1624     dump_jump_thread_path (dump_file, *path);
1625
1626   if (!paths.exists ())
1627     paths.create (5);
1628
1629   paths.safe_push (path);
1630 }