gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010, 201
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "tm.h"
  25 #include "tree.h"
  26 #include "flags.h"
  27 #include "tm_p.h"
  28 #include "basic-block.h"
  29 #include "function.h"
  30 #include "tree-flow.h"
  31 #include "dumpfile.h"
  32 #include "cfgloop.h"
  33
  34 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  35    one or more in-edges to B to instead reach the destination of an
  36    out-edge from B while preserving any side effects in B.
  37
  38    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  39    side effects of executing B.
  40
  41      1. Make a copy of B (including its outgoing edges and statements).  Call
  42         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  43
  44      2. Remove the control statement at the end of B' and all outgoing edges
  45         except B'->C.
  46
  47      3. Add a new argument to each PHI in C with the same value as the existing
  48         argument associated with edge B->C.  Associate the new PHI arguments
  49         with the edge B'->C.
  50
  51      4. For each PHI in B, find or create a PHI in B' with an identical
  52         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  53         value as the PHI in B associated with the edge A->B.  Associate
  54         the new argument in the PHI in B' with the edge A->B.
  55
  56      5. Change the edge A->B to A->B'.
  57
  58         5a. This automatically deletes any PHI arguments associated with the
  59             edge A->B in B.
  60
  61         5b. This automatically associates each new argument added in step 4
  62             with the edge A->B'.
  63
  64      6. Repeat for other incoming edges into B.
  65
  66      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  67
  68    Note that block duplication can be minimized by first collecting the
  69    set of unique destination blocks that the incoming edges should
  70    be threaded to.
  71
  72    Block duplication can be further minimized by using B instead of
  73    creating B' for one destination if all edges into B are going to be
  74    threaded to a successor of B.  We had code to do this at one time, but
  75    I'm not convinced it is correct with the changes to avoid mucking up
  76    the loop structure (which may cancel threading requests, thus a block
  77    which we thought was going to become unreachable may still be reachable).
  78    This code was also going to get ugly with the introduction of the ability
  79    for a single jump thread request to bypass multiple blocks.
  80
  81    We further reduce the number of edges and statements we create by
  82    not copying all the outgoing edges and the control statement in
  83    step #1.  We instead create a template block without the outgoing
  84    edges and duplicate the template.  */
  85
  86
  87 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  88    all the incoming edges which thread to the same destination edge at
  89    the same time.  That avoids lots of table lookups to get information
  90    for the destination edge.
  91
  92    To realize that implementation we create a list of incoming edges
  93    which thread to the same outgoing edge.  Thus to implement steps
  94    #5 and #6 we traverse our hash table of outgoing edge information.
  95    For each entry we walk the list of incoming edges which thread to
  96    the current outgoing edge.  */
  97
  98 struct el
  99 {
 100   edge e;
 101   struct el *next;
 102 };
 103
 104 /* Main data structure recording information regarding B's duplicate
 105    blocks.  */
 106
 107 /* We need to efficiently record the unique thread destinations of this
 108    block and specific information associated with those destinations.  We
 109    may have many incoming edges threaded to the same outgoing edge.  This
 110    can be naturally implemented with a hash table.  */
 111
 112 struct redirection_data
 113 {
 114   /* A duplicate of B with the trailing control statement removed and which
 115      targets a single successor of B.  */
 116   basic_block dup_block;
 117
 118   /* An outgoing edge from B.  DUP_BLOCK will have OUTGOING_EDGE->dest as
 119      its single successor.  */
 120   edge outgoing_edge;
 121
 122   edge intermediate_edge;
 123
 124   /* A list of incoming edges which we want to thread to
 125      OUTGOING_EDGE->dest.  */
 126   struct el *incoming_edges;
 127 };
 128
 129 /* Main data structure to hold information for duplicates of BB.  */
 130 static htab_t redirection_data;
 131
 132 /* Data structure of information to pass to hash table traversal routines.  */
 133 struct local_info
 134 {
 135   /* The current block we are working on.  */
 136   basic_block bb;
 137
 138   /* A template copy of BB with no outgoing edges or control statement that
 139      we use for creating copies.  */
 140   basic_block template_block;
 141
 142   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 143   bool jumps_threaded;
 144 };
 145
 146 /* Passes which use the jump threading code register jump threading
 147    opportunities as they are discovered.  We keep the registered
 148    jump threading opportunities in this vector as edge pairs
 149    (original_edge, target_edge).  */
 150 static VEC(edge,heap) *threaded_edges;
 151
 152 /* When we start updating the CFG for threading, data necessary for jump
 153    threading is attached to the AUX field for the incoming edge.  Use these
 154    macros to access the underlying structure attached to the AUX field.  */
 155 #define THREAD_TARGET(E) ((edge *)(E)->aux)[0]
 156 #define THREAD_TARGET2(E) ((edge *)(E)->aux)[1]
 157
 158 /* Jump threading statistics.  */
 159
 160 struct thread_stats_d
 161 {
 162   unsigned long num_threaded_edges;
 163 };
 164
 165 struct thread_stats_d thread_stats;
 166
 167
 168 /* Remove the last statement in block BB if it is a control statement
 169    Also remove all outgoing edges except the edge which reaches DEST_BB.
 170    If DEST_BB is NULL, then remove all outgoing edges.  */
 171
 172 static void
 173 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 174 {
 175   gimple_stmt_iterator gsi;
 176   edge e;
 177   edge_iterator ei;
 178
 179   gsi = gsi_last_bb (bb);
 180
 181   /* If the duplicate ends with a control statement, then remove it.
 182
 183      Note that if we are duplicating the template block rather than the
 184      original basic block, then the duplicate might not have any real
 185      statements in it.  */
 186   if (!gsi_end_p (gsi)
 187       && gsi_stmt (gsi)
 188       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 189           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 190           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 191     gsi_remove (&gsi, true);
 192
 193   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 194     {
 195       if (e->dest != dest_bb)
 196         remove_edge (e);
 197       else
 198         ei_next (&ei);
 199     }
 200 }
 201
 202 /* Create a duplicate of BB.  Record the duplicate block in RD.  */
 203
 204 static void
 205 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 206 {
 207   edge_iterator ei;
 208   edge e;
 209
 210   /* We can use the generic block duplication code and simply remove
 211      the stuff we do not need.  */
 212   rd->dup_block = duplicate_block (bb, NULL, NULL);
 213
 214   FOR_EACH_EDGE (e, ei, rd->dup_block->succs)
 215     e->aux = NULL;
 216
 217   /* Zero out the profile, since the block is unreachable for now.  */
 218   rd->dup_block->frequency = 0;
 219   rd->dup_block->count = 0;
 220 }
 221
 222 /* Hashing and equality routines for our hash table.  */
 223 static hashval_t
 224 redirection_data_hash (const void *p)
 225 {
 226   edge e = ((const struct redirection_data *)p)->outgoing_edge;
 227   return e->dest->index;
 228 }
 229
 230 static int
 231 redirection_data_eq (const void *p1, const void *p2)
 232 {
 233   edge e1 = ((const struct redirection_data *)p1)->outgoing_edge;
 234   edge e2 = ((const struct redirection_data *)p2)->outgoing_edge;
 235   edge e3 = ((const struct redirection_data *)p1)->intermediate_edge;
 236   edge e4 = ((const struct redirection_data *)p2)->intermediate_edge;
 237
 238   return e1 == e2 && e3 == e4;
 239 }
 240
 241 /* Given an outgoing edge E lookup and return its entry in our hash table.
 242
 243    If INSERT is true, then we insert the entry into the hash table if
 244    it is not already present.  INCOMING_EDGE is added to the list of incoming
 245    edges associated with E in the hash table.  */
 246
 247 static struct redirection_data *
 248 lookup_redirection_data (edge e, enum insert_option insert)
 249 {
 250   void **slot;
 251   struct redirection_data *elt;
 252
 253  /* Build a hash table element so we can see if E is already
 254      in the table.  */
 255   elt = XNEW (struct redirection_data);
 256   elt->intermediate_edge = THREAD_TARGET2 (e) ? THREAD_TARGET (e) : NULL;
 257   elt->outgoing_edge = THREAD_TARGET2 (e) ? THREAD_TARGET2 (e)
 258                                           : THREAD_TARGET (e);
 259   elt->dup_block = NULL;
 260   elt->incoming_edges = NULL;
 261
 262   slot = htab_find_slot (redirection_data, elt, insert);
 263
 264   /* This will only happen if INSERT is false and the entry is not
 265      in the hash table.  */
 266   if (slot == NULL)
 267     {
 268       free (elt);
 269       return NULL;
 270     }
 271
 272   /* This will only happen if E was not in the hash table and
 273      INSERT is true.  */
 274   if (*slot == NULL)
 275     {
 276       *slot = (void *)elt;
 277       elt->incoming_edges = XNEW (struct el);
 278       elt->incoming_edges->e = e;
 279       elt->incoming_edges->next = NULL;
 280       return elt;
 281     }
 282   /* E was in the hash table.  */
 283   else
 284     {
 285       /* Free ELT as we do not need it anymore, we will extract the
 286          relevant entry from the hash table itself.  */
 287       free (elt);
 288
 289       /* Get the entry stored in the hash table.  */
 290       elt = (struct redirection_data *) *slot;
 291
 292       /* If insertion was requested, then we need to add INCOMING_EDGE
 293          to the list of incoming edges associated with E.  */
 294       if (insert)
 295         {
 296           struct el *el = XNEW (struct el);
 297           el->next = elt->incoming_edges;
 298           el->e = e;
 299           elt->incoming_edges = el;
 300         }
 301
 302       return elt;
 303     }
 304 }
 305
 306 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
 307
 308 static void
 309 copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
 310 {
 311   gimple_stmt_iterator gsi;
 312   int src_indx = src_e->dest_idx;
 313
 314   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 315     {
 316       gimple phi = gsi_stmt (gsi);
 317       source_location locus = gimple_phi_arg_location (phi, src_indx);
 318       add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
 319     }
 320 }
 321
 322 /* We have recently made a copy of ORIG_BB, including its outgoing
 323    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 324    ORIG_BB has a new argument associated with edge from NEW_BB to the
 325    successor.  Initialize the PHI argument so that it is equal to the PHI
 326    argument associated with the edge from ORIG_BB to the successor.  */
 327
 328 static void
 329 update_destination_phis (basic_block orig_bb, basic_block new_bb)
 330 {
 331   edge_iterator ei;
 332   edge e;
 333
 334   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 335     {
 336       edge e2 = find_edge (new_bb, e->dest);
 337       copy_phi_args (e->dest, e, e2);
 338     }
 339 }
 340
 341 /* Given a duplicate block and its single destination (both stored
 342    in RD).  Create an edge between the duplicate and its single
 343    destination.
 344
 345    Add an additional argument to any PHI nodes at the single
 346    destination.  */
 347
 348 static void
 349 create_edge_and_update_destination_phis (struct redirection_data *rd,
 350                                          basic_block bb)
 351 {
 352   edge e = make_edge (bb, rd->outgoing_edge->dest, EDGE_FALLTHRU);
 353
 354   rescan_loop_exit (e, true, false);
 355   e->probability = REG_BR_PROB_BASE;
 356   e->count = bb->count;
 357
 358   if (rd->outgoing_edge->aux)
 359     {
 360       e->aux = (edge *) XNEWVEC (edge, 2);
 361       THREAD_TARGET(e) = THREAD_TARGET (rd->outgoing_edge);
 362       THREAD_TARGET2(e) = THREAD_TARGET2 (rd->outgoing_edge);
 363     }
 364   else
 365     {
 366       e->aux = NULL;
 367     }
 368
 369   /* If there are any PHI nodes at the destination of the outgoing edge
 370      from the duplicate block, then we will need to add a new argument
 371      to them.  The argument should have the same value as the argument
 372      associated with the outgoing edge stored in RD.  */
 373   copy_phi_args (e->dest, rd->outgoing_edge, e);
 374 }
 375
 376 /* Wire up the outgoing edges from the duplicate block and
 377    update any PHIs as needed.  */
 378 static void
 379 fix_duplicate_block_edges (struct redirection_data *rd,
 380                            struct local_info *local_info)
 381 {
 382   /* If we were threading through an joiner block, then we want
 383      to keep its control statement and redirect an outgoing edge.
 384      Else we want to remove the control statement & edges, then create
 385      a new outgoing edge.  In both cases we may need to update PHIs.  */
 386   if (THREAD_TARGET2 (rd->incoming_edges->e))
 387     {
 388       edge victim;
 389       edge e2;
 390       edge e = rd->incoming_edges->e;
 391
 392       /* This updates the PHIs at the destination of the duplicate
 393          block.  */
 394       update_destination_phis (local_info->bb, rd->dup_block);
 395
 396       /* Find the edge from the duplicate block to the block we're
 397          threading through.  That's the edge we want to redirect.  */
 398       victim = find_edge (rd->dup_block, THREAD_TARGET (e)->dest);
 399       e2 = redirect_edge_and_branch (victim, THREAD_TARGET2 (e)->dest);
 400
 401       /* If we redirected the edge, then we need to copy PHI arguments
 402          at the target.  If the edge already existed (e2 != victim case),
 403          then the PHIs in the target already have the correct arguments.  */
 404       if (e2 == victim)
 405         copy_phi_args (e2->dest, THREAD_TARGET2 (e), e2);
 406     }
 407   else
 408     {
 409       remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 410       create_edge_and_update_destination_phis (rd, rd->dup_block);
 411     }
 412 }
 413 /* Hash table traversal callback routine to create duplicate blocks.  */
 414
 415 static int
 416 create_duplicates (void **slot, void *data)
 417 {
 418   struct redirection_data *rd = (struct redirection_data *) *slot;
 419   struct local_info *local_info = (struct local_info *)data;
 420
 421   /* Create a template block if we have not done so already.  Otherwise
 422      use the template to create a new block.  */
 423   if (local_info->template_block == NULL)
 424     {
 425       create_block_for_threading (local_info->bb, rd);
 426       local_info->template_block = rd->dup_block;
 427
 428       /* We do not create any outgoing edges for the template.  We will
 429          take care of that in a later traversal.  That way we do not
 430          create edges that are going to just be deleted.  */
 431     }
 432   else
 433     {
 434       create_block_for_threading (local_info->template_block, rd);
 435
 436       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 437          block.   */
 438       fix_duplicate_block_edges (rd, local_info);
 439     }
 440
 441   /* Keep walking the hash table.  */
 442   return 1;
 443 }
 444
 445 /* We did not create any outgoing edges for the template block during
 446    block creation.  This hash table traversal callback creates the
 447    outgoing edge for the template block.  */
 448
 449 static int
 450 fixup_template_block (void **slot, void *data)
 451 {
 452   struct redirection_data *rd = (struct redirection_data *) *slot;
 453   struct local_info *local_info = (struct local_info *)data;
 454
 455   /* If this is the template block halt the traversal after updating
 456      it appropriately.
 457
 458      If we were threading through an joiner block, then we want
 459      to keep its control statement and redirect an outgoing edge.
 460      Else we want to remove the control statement & edges, then create
 461      a new outgoing edge.  In both cases we may need to update PHIs.  */
 462   if (rd->dup_block && rd->dup_block == local_info->template_block)
 463     {
 464       fix_duplicate_block_edges (rd, local_info);
 465       return 0;
 466     }
 467
 468   return 1;
 469 }
 470
 471 /* Hash table traversal callback to redirect each incoming edge
 472    associated with this hash table element to its new destination.  */
 473
 474 static int
 475 redirect_edges (void **slot, void *data)
 476 {
 477   struct redirection_data *rd = (struct redirection_data *) *slot;
 478   struct local_info *local_info = (struct local_info *)data;
 479   struct el *next, *el;
 480
 481   /* Walk over all the incoming edges associated associated with this
 482      hash table entry.  */
 483   for (el = rd->incoming_edges; el; el = next)
 484     {
 485       edge e = el->e;
 486
 487       /* Go ahead and free this element from the list.  Doing this now
 488          avoids the need for another list walk when we destroy the hash
 489          table.  */
 490       next = el->next;
 491       free (el);
 492
 493       thread_stats.num_threaded_edges++;
 494       /* If we are threading through a joiner block, then we have to
 495          find the edge we want to redirect and update some PHI nodes.  */
 496       if (THREAD_TARGET2 (e))
 497         {
 498           edge e2;
 499
 500           /* We want to redirect the incoming edge to the joiner block (E)
 501              to instead reach the duplicate of the joiner block.  */
 502           e2 = redirect_edge_and_branch (e, rd->dup_block);
 503           flush_pending_stmts (e2);
 504         }
 505       else if (rd->dup_block)
 506         {
 507           edge e2;
 508
 509           if (dump_file && (dump_flags & TDF_DETAILS))
 510             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 511                      e->src->index, e->dest->index, rd->dup_block->index);
 512
 513           rd->dup_block->count += e->count;
 514
 515           /* Excessive jump threading may make frequencies large enough so
 516              the computation overflows.  */
 517           if (rd->dup_block->frequency < BB_FREQ_MAX * 2)
 518             rd->dup_block->frequency += EDGE_FREQUENCY (e);
 519           EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 520           /* Redirect the incoming edge to the appropriate duplicate
 521              block.  */
 522           e2 = redirect_edge_and_branch (e, rd->dup_block);
 523           gcc_assert (e == e2);
 524           flush_pending_stmts (e2);
 525         }
 526
 527       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 528          to clear it will cause all kinds of unpleasant problems later.  */
 529       free (e->aux);
 530       e->aux = NULL;
 531
 532     }
 533
 534   /* Indicate that we actually threaded one or more jumps.  */
 535   if (rd->incoming_edges)
 536     local_info->jumps_threaded = true;
 537
 538   return 1;
 539 }
 540
 541 /* Return true if this block has no executable statements other than
 542    a simple ctrl flow instruction.  When the number of outgoing edges
 543    is one, this is equivalent to a "forwarder" block.  */
 544
 545 static bool
 546 redirection_block_p (basic_block bb)
 547 {
 548   gimple_stmt_iterator gsi;
 549
 550   /* Advance to the first executable statement.  */
 551   gsi = gsi_start_bb (bb);
 552   while (!gsi_end_p (gsi)
 553          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 554              || is_gimple_debug (gsi_stmt (gsi))
 555              || gimple_nop_p (gsi_stmt (gsi))))
 556     gsi_next (&gsi);
 557
 558   /* Check if this is an empty block.  */
 559   if (gsi_end_p (gsi))
 560     return true;
 561
 562   /* Test that we've reached the terminating control statement.  */
 563   return gsi_stmt (gsi)
 564          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 565              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 566              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 567 }
 568
 569 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 570    is reached via one or more specific incoming edges, we know which
 571    outgoing edge from BB will be traversed.
 572
 573    We want to redirect those incoming edges to the target of the
 574    appropriate outgoing edge.  Doing so avoids a conditional branch
 575    and may expose new optimization opportunities.  Note that we have
 576    to update dominator tree and SSA graph after such changes.
 577
 578    The key to keeping the SSA graph update manageable is to duplicate
 579    the side effects occurring in BB so that those side effects still
 580    occur on the paths which bypass BB after redirecting edges.
 581
 582    We accomplish this by creating duplicates of BB and arranging for
 583    the duplicates to unconditionally pass control to one specific
 584    successor of BB.  We then revector the incoming edges into BB to
 585    the appropriate duplicate of BB.
 586
 587    If NOLOOP_ONLY is true, we only perform the threading as long as it
 588    does not affect the structure of the loops in a nontrivial way.  */
 589
 590 static bool
 591 thread_block (basic_block bb, bool noloop_only)
 592 {
 593   /* E is an incoming edge into BB that we may or may not want to
 594      redirect to a duplicate of BB.  */
 595   edge e, e2;
 596   edge_iterator ei;
 597   struct local_info local_info;
 598   struct loop *loop = bb->loop_father;
 599
 600   /* To avoid scanning a linear array for the element we need we instead
 601      use a hash table.  For normal code there should be no noticeable
 602      difference.  However, if we have a block with a large number of
 603      incoming and outgoing edges such linear searches can get expensive.  */
 604   redirection_data = htab_create (EDGE_COUNT (bb->succs),
 605                                   redirection_data_hash,
 606                                   redirection_data_eq,
 607                                   free);
 608
 609   /* If we thread the latch of the loop to its exit, the loop ceases to
 610      exist.  Make sure we do not restrict ourselves in order to preserve
 611      this loop.  */
 612   if (loop->header == bb)
 613     {
 614       e = loop_latch_edge (loop);
 615
 616       if (e->aux)
 617         e2 = THREAD_TARGET (e);
 618       else
 619         e2 = NULL;
 620
 621       if (e2 && loop_exit_edge_p (loop, e2))
 622         {
 623           loop->header = NULL;
 624           loop->latch = NULL;
 625           loops_state_set (LOOPS_NEED_FIXUP);
 626         }
 627     }
 628
 629   /* Record each unique threaded destination into a hash table for
 630      efficient lookups.  */
 631   FOR_EACH_EDGE (e, ei, bb->preds)
 632     {
 633       if (e->aux == NULL)
 634         continue;
 635
 636       if (THREAD_TARGET2 (e))
 637         e2 = THREAD_TARGET2 (e);
 638       else
 639         e2 = THREAD_TARGET (e);
 640
 641       if (!e2
 642           /* If NOLOOP_ONLY is true, we only allow threading through the
 643              header of a loop to exit edges.  */
 644           || (noloop_only
 645               && bb == bb->loop_father->header
 646               && (!loop_exit_edge_p (bb->loop_father, e2)
 647                   || THREAD_TARGET2 (e))))
 648         continue;
 649
 650       if (e->dest == e2->src)
 651         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 652                                          e->count, THREAD_TARGET (e));
 653
 654       /* Insert the outgoing edge into the hash table if it is not
 655          already in the hash table.  */
 656       lookup_redirection_data (e, INSERT);
 657     }
 658
 659   /* We do not update dominance info.  */
 660   free_dominance_info (CDI_DOMINATORS);
 661
 662   /* We know we only thread through the loop header to loop exits.
 663      Let the basic block duplication hook know we are not creating
 664      a multiple entry loop.  */
 665   if (noloop_only
 666       && bb == bb->loop_father->header)
 667     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
 668
 669   /* Now create duplicates of BB.
 670
 671      Note that for a block with a high outgoing degree we can waste
 672      a lot of time and memory creating and destroying useless edges.
 673
 674      So we first duplicate BB and remove the control structure at the
 675      tail of the duplicate as well as all outgoing edges from the
 676      duplicate.  We then use that duplicate block as a template for
 677      the rest of the duplicates.  */
 678   local_info.template_block = NULL;
 679   local_info.bb = bb;
 680   local_info.jumps_threaded = false;
 681   htab_traverse (redirection_data, create_duplicates, &local_info);
 682
 683   /* The template does not have an outgoing edge.  Create that outgoing
 684      edge and update PHI nodes as the edge's target as necessary.
 685
 686      We do this after creating all the duplicates to avoid creating
 687      unnecessary edges.  */
 688   htab_traverse (redirection_data, fixup_template_block, &local_info);
 689
 690   /* The hash table traversals above created the duplicate blocks (and the
 691      statements within the duplicate blocks).  This loop creates PHI nodes for
 692      the duplicated blocks and redirects the incoming edges into BB to reach
 693      the duplicates of BB.  */
 694   htab_traverse (redirection_data, redirect_edges, &local_info);
 695
 696   /* Done with this block.  Clear REDIRECTION_DATA.  */
 697   htab_delete (redirection_data);
 698   redirection_data = NULL;
 699
 700   if (noloop_only
 701       && bb == bb->loop_father->header)
 702     set_loop_copy (bb->loop_father, NULL);
 703
 704   /* Indicate to our caller whether or not any jumps were threaded.  */
 705   return local_info.jumps_threaded;
 706 }
 707
 708 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
 709    copy of E->dest created during threading, or E->dest if it was not necessary
 710    to copy it (E is its single predecessor).  */
 711
 712 static basic_block
 713 thread_single_edge (edge e)
 714 {
 715   basic_block bb = e->dest;
 716   edge eto = THREAD_TARGET (e);
 717   struct redirection_data rd;
 718
 719   free (e->aux);
 720   e->aux = NULL;
 721
 722   thread_stats.num_threaded_edges++;
 723
 724   if (single_pred_p (bb))
 725     {
 726       /* If BB has just a single predecessor, we should only remove the
 727          control statements at its end, and successors except for ETO.  */
 728       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 729
 730       /* And fixup the flags on the single remaining edge.  */
 731       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 732       eto->flags |= EDGE_FALLTHRU;
 733
 734       return bb;
 735     }
 736
 737   /* Otherwise, we need to create a copy.  */
 738   if (e->dest == eto->src)
 739     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 740
 741   rd.outgoing_edge = eto;
 742
 743   create_block_for_threading (bb, &rd);
 744   remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL);
 745   create_edge_and_update_destination_phis (&rd, rd.dup_block);
 746
 747   if (dump_file && (dump_flags & TDF_DETAILS))
 748     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 749              e->src->index, e->dest->index, rd.dup_block->index);
 750
 751   rd.dup_block->count = e->count;
 752   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 753   single_succ_edge (rd.dup_block)->count = e->count;
 754   redirect_edge_and_branch (e, rd.dup_block);
 755   flush_pending_stmts (e);
 756
 757   return rd.dup_block;
 758 }
 759
 760 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 761    from STOP and DBDS_CE_STOP.  */
 762
 763 static basic_block dbds_ce_stop;
 764 static bool
 765 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 766 {
 767   return (bb != (const_basic_block) stop
 768           && bb != dbds_ce_stop);
 769 }
 770
 771 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 772    returns the state.  */
 773
 774 enum bb_dom_status
 775 {
 776   /* BB does not dominate latch of the LOOP.  */
 777   DOMST_NONDOMINATING,
 778   /* The LOOP is broken (there is no path from the header to its latch.  */
 779   DOMST_LOOP_BROKEN,
 780   /* BB dominates the latch of the LOOP.  */
 781   DOMST_DOMINATING
 782 };
 783
 784 static enum bb_dom_status
 785 determine_bb_domination_status (struct loop *loop, basic_block bb)
 786 {
 787   basic_block *bblocks;
 788   unsigned nblocks, i;
 789   bool bb_reachable = false;
 790   edge_iterator ei;
 791   edge e;
 792
 793   /* This function assumes BB is a successor of LOOP->header.
 794      If that is not the case return DOMST_NONDOMINATING which
 795      is always safe.  */
 796     {
 797       bool ok = false;
 798
 799       FOR_EACH_EDGE (e, ei, bb->preds)
 800         {
 801           if (e->src == loop->header)
 802             {
 803               ok = true;
 804               break;
 805             }
 806         }
 807
 808       if (!ok)
 809         return DOMST_NONDOMINATING;
 810     }
 811
 812   if (bb == loop->latch)
 813     return DOMST_DOMINATING;
 814
 815   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 816      from it.  */
 817
 818   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 819   dbds_ce_stop = loop->header;
 820   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 821                                 bblocks, loop->num_nodes, bb);
 822   for (i = 0; i < nblocks; i++)
 823     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 824       {
 825         if (e->src == loop->header)
 826           {
 827             free (bblocks);
 828             return DOMST_NONDOMINATING;
 829           }
 830         if (e->src == bb)
 831           bb_reachable = true;
 832       }
 833
 834   free (bblocks);
 835   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 836 }
 837
 838 /* Return true if BB is part of the new pre-header that is created
 839    when threading the latch to DATA.  */
 840
 841 static bool
 842 def_split_header_continue_p (const_basic_block bb, const void *data)
 843 {
 844   const_basic_block new_header = (const_basic_block) data;
 845   return (bb->loop_father == new_header->loop_father
 846           && bb != new_header);
 847 }
 848
 849 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 850    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 851    to the inside of the loop.  */
 852
 853 static bool
 854 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 855 {
 856   basic_block header = loop->header;
 857   edge e, tgt_edge, latch = loop_latch_edge (loop);
 858   edge_iterator ei;
 859   basic_block tgt_bb, atgt_bb;
 860   enum bb_dom_status domst;
 861
 862   /* We have already threaded through headers to exits, so all the threading
 863      requests now are to the inside of the loop.  We need to avoid creating
 864      irreducible regions (i.e., loops with more than one entry block), and
 865      also loop with several latch edges, or new subloops of the loop (although
 866      there are cases where it might be appropriate, it is difficult to decide,
 867      and doing it wrongly may confuse other optimizers).
 868
 869      We could handle more general cases here.  However, the intention is to
 870      preserve some information about the loop, which is impossible if its
 871      structure changes significantly, in a way that is not well understood.
 872      Thus we only handle few important special cases, in which also updating
 873      of the loop-carried information should be feasible:
 874
 875      1) Propagation of latch edge to a block that dominates the latch block
 876         of a loop.  This aims to handle the following idiom:
 877
 878         first = 1;
 879         while (1)
 880           {
 881             if (first)
 882               initialize;
 883             first = 0;
 884             body;
 885           }
 886
 887         After threading the latch edge, this becomes
 888
 889         first = 1;
 890         if (first)
 891           initialize;
 892         while (1)
 893           {
 894             first = 0;
 895             body;
 896           }
 897
 898         The original header of the loop is moved out of it, and we may thread
 899         the remaining edges through it without further constraints.
 900
 901      2) All entry edges are propagated to a single basic block that dominates
 902         the latch block of the loop.  This aims to handle the following idiom
 903         (normally created for "for" loops):
 904
 905         i = 0;
 906         while (1)
 907           {
 908             if (i >= 100)
 909               break;
 910             body;
 911             i++;
 912           }
 913
 914         This becomes
 915
 916         i = 0;
 917         while (1)
 918           {
 919             body;
 920             i++;
 921             if (i >= 100)
 922               break;
 923           }
 924      */
 925
 926   /* Threading through the header won't improve the code if the header has just
 927      one successor.  */
 928   if (single_succ_p (header))
 929     goto fail;
 930
 931   if (latch->aux)
 932     {
 933       if (THREAD_TARGET2 (latch))
 934         goto fail;
 935       tgt_edge = THREAD_TARGET (latch);
 936       tgt_bb = tgt_edge->dest;
 937     }
 938   else if (!may_peel_loop_headers
 939            && !redirection_block_p (loop->header))
 940     goto fail;
 941   else
 942     {
 943       tgt_bb = NULL;
 944       tgt_edge = NULL;
 945       FOR_EACH_EDGE (e, ei, header->preds)
 946         {
 947           if (!e->aux)
 948             {
 949               if (e == latch)
 950                 continue;
 951
 952               /* If latch is not threaded, and there is a header
 953                  edge that is not threaded, we would create loop
 954                  with multiple entries.  */
 955               goto fail;
 956             }
 957
 958           if (THREAD_TARGET2 (e))
 959             goto fail;
 960           tgt_edge = THREAD_TARGET (e);
 961           atgt_bb = tgt_edge->dest;
 962           if (!tgt_bb)
 963             tgt_bb = atgt_bb;
 964           /* Two targets of threading would make us create loop
 965              with multiple entries.  */
 966           else if (tgt_bb != atgt_bb)
 967             goto fail;
 968         }
 969
 970       if (!tgt_bb)
 971         {
 972           /* There are no threading requests.  */
 973           return false;
 974         }
 975
 976       /* Redirecting to empty loop latch is useless.  */
 977       if (tgt_bb == loop->latch
 978           && empty_block_p (loop->latch))
 979         goto fail;
 980     }
 981
 982   /* The target block must dominate the loop latch, otherwise we would be
 983      creating a subloop.  */
 984   domst = determine_bb_domination_status (loop, tgt_bb);
 985   if (domst == DOMST_NONDOMINATING)
 986     goto fail;
 987   if (domst == DOMST_LOOP_BROKEN)
 988     {
 989       /* If the loop ceased to exist, mark it as such, and thread through its
 990          original header.  */
 991       loop->header = NULL;
 992       loop->latch = NULL;
 993       loops_state_set (LOOPS_NEED_FIXUP);
 994       return thread_block (header, false);
 995     }
 996
 997   if (tgt_bb->loop_father->header == tgt_bb)
 998     {
 999       /* If the target of the threading is a header of a subloop, we need
1000          to create a preheader for it, so that the headers of the two loops
1001          do not merge.  */
1002       if (EDGE_COUNT (tgt_bb->preds) > 2)
1003         {
1004           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1005           gcc_assert (tgt_bb != NULL);
1006         }
1007       else
1008         tgt_bb = split_edge (tgt_edge);
1009     }
1010
1011   if (latch->aux)
1012     {
1013       basic_block *bblocks;
1014       unsigned nblocks, i;
1015
1016       /* First handle the case latch edge is redirected.  We are copying
1017          the loop header but not creating a multiple entry loop.  Make the
1018          cfg manipulation code aware of that fact.  */
1019       set_loop_copy (loop, loop);
1020       loop->latch = thread_single_edge (latch);
1021       set_loop_copy (loop, NULL);
1022       gcc_assert (single_succ (loop->latch) == tgt_bb);
1023       loop->header = tgt_bb;
1024
1025       /* Remove the new pre-header blocks from our loop.  */
1026       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1027       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1028                                     bblocks, loop->num_nodes, tgt_bb);
1029       for (i = 0; i < nblocks; i++)
1030         {
1031           remove_bb_from_loops (bblocks[i]);
1032           add_bb_to_loop (bblocks[i], loop_outer (loop));
1033         }
1034       free (bblocks);
1035
1036       /* Cancel remaining threading requests that would make the
1037          loop a multiple entry loop.  */
1038       FOR_EACH_EDGE (e, ei, header->preds)
1039         {
1040           edge e2;
1041           if (e->aux == NULL)
1042             continue;
1043
1044           if (THREAD_TARGET2 (e))
1045             e2 = THREAD_TARGET2 (e);
1046           else
1047             e2 = THREAD_TARGET (e);
1048
1049           if (e->src->loop_father != e2->dest->loop_father
1050               && e2->dest != loop->header)
1051             {
1052               free (e->aux);
1053               e->aux = NULL;
1054             }
1055         }
1056
1057       /* Thread the remaining edges through the former header.  */
1058       thread_block (header, false);
1059     }
1060   else
1061     {
1062       basic_block new_preheader;
1063
1064       /* Now consider the case entry edges are redirected to the new entry
1065          block.  Remember one entry edge, so that we can find the new
1066          preheader (its destination after threading).  */
1067       FOR_EACH_EDGE (e, ei, header->preds)
1068         {
1069           if (e->aux)
1070             break;
1071         }
1072
1073       /* The duplicate of the header is the new preheader of the loop.  Ensure
1074          that it is placed correctly in the loop hierarchy.  */
1075       set_loop_copy (loop, loop_outer (loop));
1076
1077       thread_block (header, false);
1078       set_loop_copy (loop, NULL);
1079       new_preheader = e->dest;
1080
1081       /* Create the new latch block.  This is always necessary, as the latch
1082          must have only a single successor, but the original header had at
1083          least two successors.  */
1084       loop->latch = NULL;
1085       mfb_kj_edge = single_succ_edge (new_preheader);
1086       loop->header = mfb_kj_edge->dest;
1087       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1088       loop->header = latch->dest;
1089       loop->latch = latch->src;
1090     }
1091
1092   return true;
1093
1094 fail:
1095   /* We failed to thread anything.  Cancel the requests.  */
1096   FOR_EACH_EDGE (e, ei, header->preds)
1097     {
1098       free (e->aux);
1099       e->aux = NULL;
1100     }
1101   return false;
1102 }
1103
1104 /* Walk through the registered jump threads and convert them into a
1105    form convenient for this pass.
1106
1107    Any block which has incoming edges threaded to outgoing edges
1108    will have its entry in THREADED_BLOCK set.
1109
1110    Any threaded edge will have its new outgoing edge stored in the
1111    original edge's AUX field.
1112
1113    This form avoids the need to walk all the edges in the CFG to
1114    discover blocks which need processing and avoids unnecessary
1115    hash table lookups to map from threaded edge to new target.  */
1116
1117 static void
1118 mark_threaded_blocks (bitmap threaded_blocks)
1119 {
1120   unsigned int i;
1121   bitmap_iterator bi;
1122   bitmap tmp = BITMAP_ALLOC (NULL);
1123   basic_block bb;
1124   edge e;
1125   edge_iterator ei;
1126
1127   for (i = 0; i < VEC_length (edge, threaded_edges); i += 3)
1128     {
1129       edge e = VEC_index (edge, threaded_edges, i);
1130       edge *x = (edge *) XNEWVEC (edge, 2);
1131
1132       e->aux = x;
1133       THREAD_TARGET (e) = VEC_index (edge, threaded_edges, i + 1);
1134       THREAD_TARGET2 (e) = VEC_index (edge, threaded_edges, i + 2);
1135       bitmap_set_bit (tmp, e->dest->index);
1136     }
1137
1138   /* If optimizing for size, only thread through block if we don't have
1139      to duplicate it or it's an otherwise empty redirection block.  */
1140   if (optimize_function_for_size_p (cfun))
1141     {
1142       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1143         {
1144           bb = BASIC_BLOCK (i);
1145           if (EDGE_COUNT (bb->preds) > 1
1146               && !redirection_block_p (bb))
1147             {
1148               FOR_EACH_EDGE (e, ei, bb->preds)
1149                 {
1150                   free (e->aux);
1151                   e->aux = NULL;
1152                 }
1153             }
1154           else
1155             bitmap_set_bit (threaded_blocks, i);
1156         }
1157     }
1158   else
1159     bitmap_copy (threaded_blocks, tmp);
1160
1161   BITMAP_FREE(tmp);
1162 }
1163
1164
1165 /* Walk through all blocks and thread incoming edges to the appropriate
1166    outgoing edge for each edge pair recorded in THREADED_EDGES.
1167
1168    It is the caller's responsibility to fix the dominance information
1169    and rewrite duplicated SSA_NAMEs back into SSA form.
1170
1171    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1172    loop headers if it does not simplify the loop.
1173
1174    Returns true if one or more edges were threaded, false otherwise.  */
1175
1176 bool
1177 thread_through_all_blocks (bool may_peel_loop_headers)
1178 {
1179   bool retval = false;
1180   unsigned int i;
1181   bitmap_iterator bi;
1182   bitmap threaded_blocks;
1183   struct loop *loop;
1184   loop_iterator li;
1185
1186   /* We must know about loops in order to preserve them.  */
1187   gcc_assert (current_loops != NULL);
1188
1189   if (threaded_edges == NULL)
1190     return false;
1191
1192   threaded_blocks = BITMAP_ALLOC (NULL);
1193   memset (&thread_stats, 0, sizeof (thread_stats));
1194
1195   mark_threaded_blocks (threaded_blocks);
1196
1197   initialize_original_copy_tables ();
1198
1199   /* First perform the threading requests that do not affect
1200      loop structure.  */
1201   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1202     {
1203       basic_block bb = BASIC_BLOCK (i);
1204
1205       if (EDGE_COUNT (bb->preds) > 0)
1206         retval |= thread_block (bb, true);
1207     }
1208
1209   /* Then perform the threading through loop headers.  We start with the
1210      innermost loop, so that the changes in cfg we perform won't affect
1211      further threading.  */
1212   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1213     {
1214       if (!loop->header
1215           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1216         continue;
1217
1218       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1219     }
1220
1221   statistics_counter_event (cfun, "Jumps threaded",
1222                             thread_stats.num_threaded_edges);
1223
1224   free_original_copy_tables ();
1225
1226   BITMAP_FREE (threaded_blocks);
1227   threaded_blocks = NULL;
1228   VEC_free (edge, heap, threaded_edges);
1229   threaded_edges = NULL;
1230
1231   if (retval)
1232     loops_state_set (LOOPS_NEED_FIXUP);
1233
1234   return retval;
1235 }
1236
1237 /* Register a jump threading opportunity.  We queue up all the jump
1238    threading opportunities discovered by a pass and update the CFG
1239    and SSA form all at once.
1240
1241    E is the edge we can thread, E2 is the new target edge, i.e., we
1242    are effectively recording that E->dest can be changed to E2->dest
1243    after fixing the SSA graph.  */
1244
1245 void
1246 register_jump_thread (edge e, edge e2, edge e3)
1247 {
1248   /* This can occur if we're jumping to a constant address or
1249      or something similar.  Just get out now.  */
1250   if (e2 == NULL)
1251     return;
1252
1253   if (threaded_edges == NULL)
1254     threaded_edges = VEC_alloc (edge, heap, 15);
1255
1256   if (dump_file && (dump_flags & TDF_DETAILS)
1257       && e->dest != e2->src)
1258     fprintf (dump_file,
1259              "  Registering jump thread around one or more intermediate blocks\n");
1260
1261   VEC_safe_push (edge, heap, threaded_edges, e);
1262   VEC_safe_push (edge, heap, threaded_edges, e2);
1263   VEC_safe_push (edge, heap, threaded_edges, e3);
1264 }