gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "basic-block.h"
  26 #include "function.h"
  27 #include "gimple.h"
  28 #include "gimple-iterator.h"
  29 #include "gimple-ssa.h"
  30 #include "tree-phinodes.h"
  31 #include "tree-ssa.h"
  32 #include "tree-ssa-threadupdate.h"
  33 #include "dumpfile.h"
  34 #include "cfgloop.h"
  35 #include "hash-table.h"
  36 #include "dbgcnt.h"
  37
  38 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  39    one or more in-edges to B to instead reach the destination of an
  40    out-edge from B while preserving any side effects in B.
  41
  42    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  43    side effects of executing B.
  44
  45      1. Make a copy of B (including its outgoing edges and statements).  Call
  46         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  47
  48      2. Remove the control statement at the end of B' and all outgoing edges
  49         except B'->C.
  50
  51      3. Add a new argument to each PHI in C with the same value as the existing
  52         argument associated with edge B->C.  Associate the new PHI arguments
  53         with the edge B'->C.
  54
  55      4. For each PHI in B, find or create a PHI in B' with an identical
  56         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  57         value as the PHI in B associated with the edge A->B.  Associate
  58         the new argument in the PHI in B' with the edge A->B.
  59
  60      5. Change the edge A->B to A->B'.
  61
  62         5a. This automatically deletes any PHI arguments associated with the
  63             edge A->B in B.
  64
  65         5b. This automatically associates each new argument added in step 4
  66             with the edge A->B'.
  67
  68      6. Repeat for other incoming edges into B.
  69
  70      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  71
  72    Note that block duplication can be minimized by first collecting the
  73    set of unique destination blocks that the incoming edges should
  74    be threaded to.
  75
  76    Block duplication can be further minimized by using B instead of
  77    creating B' for one destination if all edges into B are going to be
  78    threaded to a successor of B.  We had code to do this at one time, but
  79    I'm not convinced it is correct with the changes to avoid mucking up
  80    the loop structure (which may cancel threading requests, thus a block
  81    which we thought was going to become unreachable may still be reachable).
  82    This code was also going to get ugly with the introduction of the ability
  83    for a single jump thread request to bypass multiple blocks.
  84
  85    We further reduce the number of edges and statements we create by
  86    not copying all the outgoing edges and the control statement in
  87    step #1.  We instead create a template block without the outgoing
  88    edges and duplicate the template.  */
  89
  90
  91 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  92    all the incoming edges which thread to the same destination edge at
  93    the same time.  That avoids lots of table lookups to get information
  94    for the destination edge.
  95
  96    To realize that implementation we create a list of incoming edges
  97    which thread to the same outgoing edge.  Thus to implement steps
  98    #5 and #6 we traverse our hash table of outgoing edge information.
  99    For each entry we walk the list of incoming edges which thread to
 100    the current outgoing edge.  */
 101
 102 struct el
 103 {
 104   edge e;
 105   struct el *next;
 106 };
 107
 108 /* Main data structure recording information regarding B's duplicate
 109    blocks.  */
 110
 111 /* We need to efficiently record the unique thread destinations of this
 112    block and specific information associated with those destinations.  We
 113    may have many incoming edges threaded to the same outgoing edge.  This
 114    can be naturally implemented with a hash table.  */
 115
 116 struct redirection_data : typed_free_remove<redirection_data>
 117 {
 118   /* A duplicate of B with the trailing control statement removed and which
 119      targets a single successor of B.  */
 120   basic_block dup_block;
 121
 122   /* The jump threading path.  */
 123   vec<jump_thread_edge *> *path;
 124
 125   /* A list of incoming edges which we want to thread to the
 126      same path.  */
 127   struct el *incoming_edges;
 128
 129   /* hash_table support.  */
 130   typedef redirection_data value_type;
 131   typedef redirection_data compare_type;
 132   static inline hashval_t hash (const value_type *);
 133   static inline int equal (const value_type *, const compare_type *);
 134 };
 135
 136 /* Simple hashing function.  For any given incoming edge E, we're going
 137    to be most concerned with the final destination of its jump thread
 138    path.  So hash on the block index of the final edge in the path.  */
 139
 140 inline hashval_t
 141 redirection_data::hash (const value_type *p)
 142 {
 143   vec<jump_thread_edge *> *path = p->path;
 144   return path->last ()->e->dest->index;
 145 }
 146
 147 /* Given two hash table entries, return true if they have the same
 148    jump threading path.  */
 149 inline int
 150 redirection_data::equal (const value_type *p1, const compare_type *p2)
 151 {
 152   vec<jump_thread_edge *> *path1 = p1->path;
 153   vec<jump_thread_edge *> *path2 = p2->path;
 154
 155   if (path1->length () != path2->length ())
 156     return false;
 157
 158   for (unsigned int i = 1; i < path1->length (); i++)
 159     {
 160       if ((*path1)[i]->type != (*path2)[i]->type
 161           || (*path1)[i]->e != (*path2)[i]->e)
 162         return false;
 163     }
 164
 165   return true;
 166 }
 167
 168 /* Data structure of information to pass to hash table traversal routines.  */
 169 struct ssa_local_info_t
 170 {
 171   /* The current block we are working on.  */
 172   basic_block bb;
 173
 174   /* A template copy of BB with no outgoing edges or control statement that
 175      we use for creating copies.  */
 176   basic_block template_block;
 177
 178   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 179   bool jumps_threaded;
 180 };
 181
 182 /* Passes which use the jump threading code register jump threading
 183    opportunities as they are discovered.  We keep the registered
 184    jump threading opportunities in this vector as edge pairs
 185    (original_edge, target_edge).  */
 186 static vec<vec<jump_thread_edge *> *> paths;
 187
 188 /* When we start updating the CFG for threading, data necessary for jump
 189    threading is attached to the AUX field for the incoming edge.  Use these
 190    macros to access the underlying structure attached to the AUX field.  */
 191 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 192
 193 /* Jump threading statistics.  */
 194
 195 struct thread_stats_d
 196 {
 197   unsigned long num_threaded_edges;
 198 };
 199
 200 struct thread_stats_d thread_stats;
 201
 202
 203 /* Remove the last statement in block BB if it is a control statement
 204    Also remove all outgoing edges except the edge which reaches DEST_BB.
 205    If DEST_BB is NULL, then remove all outgoing edges.  */
 206
 207 static void
 208 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 209 {
 210   gimple_stmt_iterator gsi;
 211   edge e;
 212   edge_iterator ei;
 213
 214   gsi = gsi_last_bb (bb);
 215
 216   /* If the duplicate ends with a control statement, then remove it.
 217
 218      Note that if we are duplicating the template block rather than the
 219      original basic block, then the duplicate might not have any real
 220      statements in it.  */
 221   if (!gsi_end_p (gsi)
 222       && gsi_stmt (gsi)
 223       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 224           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 225           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 226     gsi_remove (&gsi, true);
 227
 228   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 229     {
 230       if (e->dest != dest_bb)
 231         remove_edge (e);
 232       else
 233         ei_next (&ei);
 234     }
 235 }
 236
 237 /* Create a duplicate of BB.  Record the duplicate block in RD.  */
 238
 239 static void
 240 create_block_for_threading (basic_block bb, struct redirection_data *rd)
 241 {
 242   edge_iterator ei;
 243   edge e;
 244
 245   /* We can use the generic block duplication code and simply remove
 246      the stuff we do not need.  */
 247   rd->dup_block = duplicate_block (bb, NULL, NULL);
 248
 249   FOR_EACH_EDGE (e, ei, rd->dup_block->succs)
 250     e->aux = NULL;
 251
 252   /* Zero out the profile, since the block is unreachable for now.  */
 253   rd->dup_block->frequency = 0;
 254   rd->dup_block->count = 0;
 255 }
 256
 257 /* Main data structure to hold information for duplicates of BB.  */
 258
 259 static hash_table <redirection_data> redirection_data;
 260
 261 /* Given an outgoing edge E lookup and return its entry in our hash table.
 262
 263    If INSERT is true, then we insert the entry into the hash table if
 264    it is not already present.  INCOMING_EDGE is added to the list of incoming
 265    edges associated with E in the hash table.  */
 266
 267 static struct redirection_data *
 268 lookup_redirection_data (edge e, enum insert_option insert)
 269 {
 270   struct redirection_data **slot;
 271   struct redirection_data *elt;
 272   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 273
 274  /* Build a hash table element so we can see if E is already
 275      in the table.  */
 276   elt = XNEW (struct redirection_data);
 277   elt->path = path;
 278   elt->dup_block = NULL;
 279   elt->incoming_edges = NULL;
 280
 281   slot = redirection_data.find_slot (elt, insert);
 282
 283   /* This will only happen if INSERT is false and the entry is not
 284      in the hash table.  */
 285   if (slot == NULL)
 286     {
 287       free (elt);
 288       return NULL;
 289     }
 290
 291   /* This will only happen if E was not in the hash table and
 292      INSERT is true.  */
 293   if (*slot == NULL)
 294     {
 295       *slot = elt;
 296       elt->incoming_edges = XNEW (struct el);
 297       elt->incoming_edges->e = e;
 298       elt->incoming_edges->next = NULL;
 299       return elt;
 300     }
 301   /* E was in the hash table.  */
 302   else
 303     {
 304       /* Free ELT as we do not need it anymore, we will extract the
 305          relevant entry from the hash table itself.  */
 306       free (elt);
 307
 308       /* Get the entry stored in the hash table.  */
 309       elt = *slot;
 310
 311       /* If insertion was requested, then we need to add INCOMING_EDGE
 312          to the list of incoming edges associated with E.  */
 313       if (insert)
 314         {
 315           struct el *el = XNEW (struct el);
 316           el->next = elt->incoming_edges;
 317           el->e = e;
 318           elt->incoming_edges = el;
 319         }
 320
 321       return elt;
 322     }
 323 }
 324
 325 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.  */
 326
 327 static void
 328 copy_phi_args (basic_block bb, edge src_e, edge tgt_e)
 329 {
 330   gimple_stmt_iterator gsi;
 331   int src_indx = src_e->dest_idx;
 332
 333   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 334     {
 335       gimple phi = gsi_stmt (gsi);
 336       source_location locus = gimple_phi_arg_location (phi, src_indx);
 337       add_phi_arg (phi, gimple_phi_arg_def (phi, src_indx), tgt_e, locus);
 338     }
 339 }
 340
 341 /* We have recently made a copy of ORIG_BB, including its outgoing
 342    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 343    ORIG_BB has a new argument associated with edge from NEW_BB to the
 344    successor.  Initialize the PHI argument so that it is equal to the PHI
 345    argument associated with the edge from ORIG_BB to the successor.  */
 346
 347 static void
 348 update_destination_phis (basic_block orig_bb, basic_block new_bb)
 349 {
 350   edge_iterator ei;
 351   edge e;
 352
 353   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 354     {
 355       edge e2 = find_edge (new_bb, e->dest);
 356       copy_phi_args (e->dest, e, e2);
 357     }
 358 }
 359
 360 /* Given a duplicate block and its single destination (both stored
 361    in RD).  Create an edge between the duplicate and its single
 362    destination.
 363
 364    Add an additional argument to any PHI nodes at the single
 365    destination.  */
 366
 367 static void
 368 create_edge_and_update_destination_phis (struct redirection_data *rd,
 369                                          basic_block bb)
 370 {
 371   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 372
 373   rescan_loop_exit (e, true, false);
 374   e->probability = REG_BR_PROB_BASE;
 375   e->count = bb->count;
 376
 377   /* We have to copy path -- which means creating a new vector as well
 378      as all the jump_thread_edge entries.  */
 379   if (rd->path->last ()->e->aux)
 380     {
 381       vec<jump_thread_edge *> *path = THREAD_PATH (rd->path->last ()->e);
 382       vec<jump_thread_edge *> *copy = new vec<jump_thread_edge *> ();
 383
 384       /* Sadly, the elements of the vector are pointers and need to
 385          be copied as well.  */
 386       for (unsigned int i = 0; i < path->length (); i++)
 387         {
 388           jump_thread_edge *x
 389             = new jump_thread_edge ((*path)[i]->e, (*path)[i]->type);
 390           copy->safe_push (x);
 391         }
 392      e->aux = (void *)copy;
 393     }
 394   else
 395     {
 396       e->aux = NULL;
 397     }
 398
 399   /* If there are any PHI nodes at the destination of the outgoing edge
 400      from the duplicate block, then we will need to add a new argument
 401      to them.  The argument should have the same value as the argument
 402      associated with the outgoing edge stored in RD.  */
 403   copy_phi_args (e->dest, rd->path->last ()->e, e);
 404 }
 405
 406 /* Wire up the outgoing edges from the duplicate block and
 407    update any PHIs as needed.  */
 408 void
 409 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
 410                                ssa_local_info_t *local_info)
 411 {
 412   edge e = rd->incoming_edges->e;
 413   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 414
 415   /* If we were threading through an joiner block, then we want
 416      to keep its control statement and redirect an outgoing edge.
 417      Else we want to remove the control statement & edges, then create
 418      a new outgoing edge.  In both cases we may need to update PHIs.  */
 419   if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 420     {
 421       edge victim;
 422       edge e2;
 423
 424       /* This updates the PHIs at the destination of the duplicate
 425          block.  */
 426       update_destination_phis (local_info->bb, rd->dup_block);
 427
 428       /* Find the edge from the duplicate block to the block we're
 429          threading through.  That's the edge we want to redirect.  */
 430       victim = find_edge (rd->dup_block, (*path)[1]->e->dest);
 431       e2 = redirect_edge_and_branch (victim, path->last ()->e->dest);
 432       e2->count = path->last ()->e->count;
 433
 434       /* If we redirected the edge, then we need to copy PHI arguments
 435          at the target.  If the edge already existed (e2 != victim case),
 436          then the PHIs in the target already have the correct arguments.  */
 437       if (e2 == victim)
 438         copy_phi_args (e2->dest, path->last ()->e, e2);
 439     }
 440   else
 441     {
 442       remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL);
 443       create_edge_and_update_destination_phis (rd, rd->dup_block);
 444     }
 445 }
 446 /* Hash table traversal callback routine to create duplicate blocks.  */
 447
 448 int
 449 ssa_create_duplicates (struct redirection_data **slot,
 450                        ssa_local_info_t *local_info)
 451 {
 452   struct redirection_data *rd = *slot;
 453
 454   /* Create a template block if we have not done so already.  Otherwise
 455      use the template to create a new block.  */
 456   if (local_info->template_block == NULL)
 457     {
 458       create_block_for_threading (local_info->bb, rd);
 459       local_info->template_block = rd->dup_block;
 460
 461       /* We do not create any outgoing edges for the template.  We will
 462          take care of that in a later traversal.  That way we do not
 463          create edges that are going to just be deleted.  */
 464     }
 465   else
 466     {
 467       create_block_for_threading (local_info->template_block, rd);
 468
 469       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
 470          block.   */
 471       ssa_fix_duplicate_block_edges (rd, local_info);
 472     }
 473
 474   /* Keep walking the hash table.  */
 475   return 1;
 476 }
 477
 478 /* We did not create any outgoing edges for the template block during
 479    block creation.  This hash table traversal callback creates the
 480    outgoing edge for the template block.  */
 481
 482 inline int
 483 ssa_fixup_template_block (struct redirection_data **slot,
 484                           ssa_local_info_t *local_info)
 485 {
 486   struct redirection_data *rd = *slot;
 487
 488   /* If this is the template block halt the traversal after updating
 489      it appropriately.
 490
 491      If we were threading through an joiner block, then we want
 492      to keep its control statement and redirect an outgoing edge.
 493      Else we want to remove the control statement & edges, then create
 494      a new outgoing edge.  In both cases we may need to update PHIs.  */
 495   if (rd->dup_block && rd->dup_block == local_info->template_block)
 496     {
 497       ssa_fix_duplicate_block_edges (rd, local_info);
 498       return 0;
 499     }
 500
 501   return 1;
 502 }
 503
 504 /* Hash table traversal callback to redirect each incoming edge
 505    associated with this hash table element to its new destination.  */
 506
 507 int
 508 ssa_redirect_edges (struct redirection_data **slot,
 509                     ssa_local_info_t *local_info)
 510 {
 511   struct redirection_data *rd = *slot;
 512   struct el *next, *el;
 513
 514   /* Walk over all the incoming edges associated associated with this
 515      hash table entry.  */
 516   for (el = rd->incoming_edges; el; el = next)
 517     {
 518       edge e = el->e;
 519       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 520
 521       /* Go ahead and free this element from the list.  Doing this now
 522          avoids the need for another list walk when we destroy the hash
 523          table.  */
 524       next = el->next;
 525       free (el);
 526
 527       thread_stats.num_threaded_edges++;
 528
 529       if (rd->dup_block)
 530         {
 531           edge e2;
 532
 533           if (dump_file && (dump_flags & TDF_DETAILS))
 534             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 535                      e->src->index, e->dest->index, rd->dup_block->index);
 536
 537           rd->dup_block->count += e->count;
 538
 539           /* Excessive jump threading may make frequencies large enough so
 540              the computation overflows.  */
 541           if (rd->dup_block->frequency < BB_FREQ_MAX * 2)
 542             rd->dup_block->frequency += EDGE_FREQUENCY (e);
 543
 544           /* In the case of threading through a joiner block, the outgoing
 545              edges from the duplicate block were updated when they were
 546              redirected during ssa_fix_duplicate_block_edges.  */
 547           if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
 548             EDGE_SUCC (rd->dup_block, 0)->count += e->count;
 549
 550           /* Redirect the incoming edge (possibly to the joiner block) to the
 551              appropriate duplicate block.  */
 552           e2 = redirect_edge_and_branch (e, rd->dup_block);
 553           gcc_assert (e == e2);
 554           flush_pending_stmts (e2);
 555         }
 556
 557       /* Go ahead and clear E->aux.  It's not needed anymore and failure
 558          to clear it will cause all kinds of unpleasant problems later.  */
 559       delete_jump_thread_path (path);
 560       e->aux = NULL;
 561
 562     }
 563
 564   /* Indicate that we actually threaded one or more jumps.  */
 565   if (rd->incoming_edges)
 566     local_info->jumps_threaded = true;
 567
 568   return 1;
 569 }
 570
 571 /* Return true if this block has no executable statements other than
 572    a simple ctrl flow instruction.  When the number of outgoing edges
 573    is one, this is equivalent to a "forwarder" block.  */
 574
 575 static bool
 576 redirection_block_p (basic_block bb)
 577 {
 578   gimple_stmt_iterator gsi;
 579
 580   /* Advance to the first executable statement.  */
 581   gsi = gsi_start_bb (bb);
 582   while (!gsi_end_p (gsi)
 583          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
 584              || is_gimple_debug (gsi_stmt (gsi))
 585              || gimple_nop_p (gsi_stmt (gsi))))
 586     gsi_next (&gsi);
 587
 588   /* Check if this is an empty block.  */
 589   if (gsi_end_p (gsi))
 590     return true;
 591
 592   /* Test that we've reached the terminating control statement.  */
 593   return gsi_stmt (gsi)
 594          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 595              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 596              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
 597 }
 598
 599 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
 600    is reached via one or more specific incoming edges, we know which
 601    outgoing edge from BB will be traversed.
 602
 603    We want to redirect those incoming edges to the target of the
 604    appropriate outgoing edge.  Doing so avoids a conditional branch
 605    and may expose new optimization opportunities.  Note that we have
 606    to update dominator tree and SSA graph after such changes.
 607
 608    The key to keeping the SSA graph update manageable is to duplicate
 609    the side effects occurring in BB so that those side effects still
 610    occur on the paths which bypass BB after redirecting edges.
 611
 612    We accomplish this by creating duplicates of BB and arranging for
 613    the duplicates to unconditionally pass control to one specific
 614    successor of BB.  We then revector the incoming edges into BB to
 615    the appropriate duplicate of BB.
 616
 617    If NOLOOP_ONLY is true, we only perform the threading as long as it
 618    does not affect the structure of the loops in a nontrivial way.
 619
 620    If JOINERS is true, then thread through joiner blocks as well.  */
 621
 622 static bool
 623 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
 624 {
 625   /* E is an incoming edge into BB that we may or may not want to
 626      redirect to a duplicate of BB.  */
 627   edge e, e2;
 628   edge_iterator ei;
 629   ssa_local_info_t local_info;
 630   struct loop *loop = bb->loop_father;
 631
 632   /* To avoid scanning a linear array for the element we need we instead
 633      use a hash table.  For normal code there should be no noticeable
 634      difference.  However, if we have a block with a large number of
 635      incoming and outgoing edges such linear searches can get expensive.  */
 636   redirection_data.create (EDGE_COUNT (bb->succs));
 637
 638   /* If we thread the latch of the loop to its exit, the loop ceases to
 639      exist.  Make sure we do not restrict ourselves in order to preserve
 640      this loop.  */
 641   if (loop->header == bb)
 642     {
 643       e = loop_latch_edge (loop);
 644       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 645
 646       if (path
 647           && (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && joiners)
 648               || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && !joiners)))
 649         {
 650           for (unsigned int i = 1; i < path->length (); i++)
 651             {
 652               edge e2 = (*path)[i]->e;
 653
 654               if (loop_exit_edge_p (loop, e2))
 655                 {
 656                   loop->header = NULL;
 657                   loop->latch = NULL;
 658                   loops_state_set (LOOPS_NEED_FIXUP);
 659                 }
 660             }
 661         }
 662     }
 663
 664   /* Record each unique threaded destination into a hash table for
 665      efficient lookups.  */
 666   FOR_EACH_EDGE (e, ei, bb->preds)
 667     {
 668       if (e->aux == NULL)
 669         continue;
 670
 671       vec<jump_thread_edge *> *path = THREAD_PATH (e);
 672
 673       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
 674           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
 675         continue;
 676
 677       e2 = path->last ()->e;
 678       if (!e2 || noloop_only)
 679         {
 680           /* If NOLOOP_ONLY is true, we only allow threading through the
 681              header of a loop to exit edges.
 682
 683              There are two cases to consider.  The first when BB is the
 684              loop header.  We will attempt to thread this elsewhere, so
 685              we can just continue here.  */
 686
 687           if (bb == bb->loop_father->header
 688               && (!loop_exit_edge_p (bb->loop_father, e2)
 689                   || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
 690             continue;
 691
 692
 693           /* The second occurs when there was loop header buried in a jump
 694              threading path.  We do not try and thread this elsewhere, so
 695              just cancel the jump threading request by clearing the AUX
 696              field now.  */
 697           if ((bb->loop_father != e2->src->loop_father
 698                && !loop_exit_edge_p (e2->src->loop_father, e2))
 699               || (e2->src->loop_father != e2->dest->loop_father
 700                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
 701             {
 702               /* Since this case is not handled by our special code
 703                  to thread through a loop header, we must explicitly
 704                  cancel the threading request here.  */
 705               delete_jump_thread_path (path);
 706               e->aux = NULL;
 707               continue;
 708             }
 709         }
 710
 711       if (e->dest == e2->src)
 712         update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e),
 713                                          e->count, (*THREAD_PATH (e))[1]->e);
 714
 715       /* Insert the outgoing edge into the hash table if it is not
 716          already in the hash table.  */
 717       lookup_redirection_data (e, INSERT);
 718     }
 719
 720   /* We do not update dominance info.  */
 721   free_dominance_info (CDI_DOMINATORS);
 722
 723   /* We know we only thread through the loop header to loop exits.
 724      Let the basic block duplication hook know we are not creating
 725      a multiple entry loop.  */
 726   if (noloop_only
 727       && bb == bb->loop_father->header)
 728     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
 729
 730   /* Now create duplicates of BB.
 731
 732      Note that for a block with a high outgoing degree we can waste
 733      a lot of time and memory creating and destroying useless edges.
 734
 735      So we first duplicate BB and remove the control structure at the
 736      tail of the duplicate as well as all outgoing edges from the
 737      duplicate.  We then use that duplicate block as a template for
 738      the rest of the duplicates.  */
 739   local_info.template_block = NULL;
 740   local_info.bb = bb;
 741   local_info.jumps_threaded = false;
 742   redirection_data.traverse <ssa_local_info_t *, ssa_create_duplicates>
 743                             (&local_info);
 744
 745   /* The template does not have an outgoing edge.  Create that outgoing
 746      edge and update PHI nodes as the edge's target as necessary.
 747
 748      We do this after creating all the duplicates to avoid creating
 749      unnecessary edges.  */
 750   redirection_data.traverse <ssa_local_info_t *, ssa_fixup_template_block>
 751                             (&local_info);
 752
 753   /* The hash table traversals above created the duplicate blocks (and the
 754      statements within the duplicate blocks).  This loop creates PHI nodes for
 755      the duplicated blocks and redirects the incoming edges into BB to reach
 756      the duplicates of BB.  */
 757   redirection_data.traverse <ssa_local_info_t *, ssa_redirect_edges>
 758                             (&local_info);
 759
 760   /* Done with this block.  Clear REDIRECTION_DATA.  */
 761   redirection_data.dispose ();
 762
 763   if (noloop_only
 764       && bb == bb->loop_father->header)
 765     set_loop_copy (bb->loop_father, NULL);
 766
 767   /* Indicate to our caller whether or not any jumps were threaded.  */
 768   return local_info.jumps_threaded;
 769 }
 770
 771 /* Wrapper for thread_block_1 so that we can first handle jump
 772    thread paths which do not involve copying joiner blocks, then
 773    handle jump thread paths which have joiner blocks.
 774
 775    By doing things this way we can be as aggressive as possible and
 776    not worry that copying a joiner block will create a jump threading
 777    opportunity.  */
 778
 779 static bool
 780 thread_block (basic_block bb, bool noloop_only)
 781 {
 782   bool retval;
 783   retval = thread_block_1 (bb, noloop_only, false);
 784   retval |= thread_block_1 (bb, noloop_only, true);
 785   return retval;
 786 }
 787
 788
 789 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
 790    copy of E->dest created during threading, or E->dest if it was not necessary
 791    to copy it (E is its single predecessor).  */
 792
 793 static basic_block
 794 thread_single_edge (edge e)
 795 {
 796   basic_block bb = e->dest;
 797   struct redirection_data rd;
 798   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 799   edge eto = (*path)[1]->e;
 800
 801   for (unsigned int i = 0; i < path->length (); i++)
 802     delete (*path)[i];
 803   delete path;
 804   e->aux = NULL;
 805
 806   thread_stats.num_threaded_edges++;
 807
 808   if (single_pred_p (bb))
 809     {
 810       /* If BB has just a single predecessor, we should only remove the
 811          control statements at its end, and successors except for ETO.  */
 812       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
 813
 814       /* And fixup the flags on the single remaining edge.  */
 815       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
 816       eto->flags |= EDGE_FALLTHRU;
 817
 818       return bb;
 819     }
 820
 821   /* Otherwise, we need to create a copy.  */
 822   if (e->dest == eto->src)
 823     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
 824
 825   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
 826   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
 827   npath->safe_push (x);
 828
 829   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
 830   npath->safe_push (x);
 831   rd.path = npath;
 832
 833   create_block_for_threading (bb, &rd);
 834   remove_ctrl_stmt_and_useless_edges (rd.dup_block, NULL);
 835   create_edge_and_update_destination_phis (&rd, rd.dup_block);
 836
 837   if (dump_file && (dump_flags & TDF_DETAILS))
 838     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
 839              e->src->index, e->dest->index, rd.dup_block->index);
 840
 841   rd.dup_block->count = e->count;
 842   rd.dup_block->frequency = EDGE_FREQUENCY (e);
 843   single_succ_edge (rd.dup_block)->count = e->count;
 844   redirect_edge_and_branch (e, rd.dup_block);
 845   flush_pending_stmts (e);
 846
 847   return rd.dup_block;
 848 }
 849
 850 /* Callback for dfs_enumerate_from.  Returns true if BB is different
 851    from STOP and DBDS_CE_STOP.  */
 852
 853 static basic_block dbds_ce_stop;
 854 static bool
 855 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
 856 {
 857   return (bb != (const_basic_block) stop
 858           && bb != dbds_ce_stop);
 859 }
 860
 861 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
 862    returns the state.  */
 863
 864 enum bb_dom_status
 865 {
 866   /* BB does not dominate latch of the LOOP.  */
 867   DOMST_NONDOMINATING,
 868   /* The LOOP is broken (there is no path from the header to its latch.  */
 869   DOMST_LOOP_BROKEN,
 870   /* BB dominates the latch of the LOOP.  */
 871   DOMST_DOMINATING
 872 };
 873
 874 static enum bb_dom_status
 875 determine_bb_domination_status (struct loop *loop, basic_block bb)
 876 {
 877   basic_block *bblocks;
 878   unsigned nblocks, i;
 879   bool bb_reachable = false;
 880   edge_iterator ei;
 881   edge e;
 882
 883   /* This function assumes BB is a successor of LOOP->header.
 884      If that is not the case return DOMST_NONDOMINATING which
 885      is always safe.  */
 886     {
 887       bool ok = false;
 888
 889       FOR_EACH_EDGE (e, ei, bb->preds)
 890         {
 891           if (e->src == loop->header)
 892             {
 893               ok = true;
 894               break;
 895             }
 896         }
 897
 898       if (!ok)
 899         return DOMST_NONDOMINATING;
 900     }
 901
 902   if (bb == loop->latch)
 903     return DOMST_DOMINATING;
 904
 905   /* Check that BB dominates LOOP->latch, and that it is back-reachable
 906      from it.  */
 907
 908   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
 909   dbds_ce_stop = loop->header;
 910   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
 911                                 bblocks, loop->num_nodes, bb);
 912   for (i = 0; i < nblocks; i++)
 913     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
 914       {
 915         if (e->src == loop->header)
 916           {
 917             free (bblocks);
 918             return DOMST_NONDOMINATING;
 919           }
 920         if (e->src == bb)
 921           bb_reachable = true;
 922       }
 923
 924   free (bblocks);
 925   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
 926 }
 927
 928 /* Return true if BB is part of the new pre-header that is created
 929    when threading the latch to DATA.  */
 930
 931 static bool
 932 def_split_header_continue_p (const_basic_block bb, const void *data)
 933 {
 934   const_basic_block new_header = (const_basic_block) data;
 935   const struct loop *l;
 936
 937   if (bb == new_header
 938       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
 939     return false;
 940   for (l = bb->loop_father; l; l = loop_outer (l))
 941     if (l == new_header->loop_father)
 942       return true;
 943   return false;
 944 }
 945
 946 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
 947    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
 948    to the inside of the loop.  */
 949
 950 static bool
 951 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
 952 {
 953   basic_block header = loop->header;
 954   edge e, tgt_edge, latch = loop_latch_edge (loop);
 955   edge_iterator ei;
 956   basic_block tgt_bb, atgt_bb;
 957   enum bb_dom_status domst;
 958
 959   /* We have already threaded through headers to exits, so all the threading
 960      requests now are to the inside of the loop.  We need to avoid creating
 961      irreducible regions (i.e., loops with more than one entry block), and
 962      also loop with several latch edges, or new subloops of the loop (although
 963      there are cases where it might be appropriate, it is difficult to decide,
 964      and doing it wrongly may confuse other optimizers).
 965
 966      We could handle more general cases here.  However, the intention is to
 967      preserve some information about the loop, which is impossible if its
 968      structure changes significantly, in a way that is not well understood.
 969      Thus we only handle few important special cases, in which also updating
 970      of the loop-carried information should be feasible:
 971
 972      1) Propagation of latch edge to a block that dominates the latch block
 973         of a loop.  This aims to handle the following idiom:
 974
 975         first = 1;
 976         while (1)
 977           {
 978             if (first)
 979               initialize;
 980             first = 0;
 981             body;
 982           }
 983
 984         After threading the latch edge, this becomes
 985
 986         first = 1;
 987         if (first)
 988           initialize;
 989         while (1)
 990           {
 991             first = 0;
 992             body;
 993           }
 994
 995         The original header of the loop is moved out of it, and we may thread
 996         the remaining edges through it without further constraints.
 997
 998      2) All entry edges are propagated to a single basic block that dominates
 999         the latch block of the loop.  This aims to handle the following idiom
1000         (normally created for "for" loops):
1001
1002         i = 0;
1003         while (1)
1004           {
1005             if (i >= 100)
1006               break;
1007             body;
1008             i++;
1009           }
1010
1011         This becomes
1012
1013         i = 0;
1014         while (1)
1015           {
1016             body;
1017             i++;
1018             if (i >= 100)
1019               break;
1020           }
1021      */
1022
1023   /* Threading through the header won't improve the code if the header has just
1024      one successor.  */
1025   if (single_succ_p (header))
1026     goto fail;
1027
1028   if (latch->aux)
1029     {
1030       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1031       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1032         goto fail;
1033       tgt_edge = (*path)[1]->e;
1034       tgt_bb = tgt_edge->dest;
1035     }
1036   else if (!may_peel_loop_headers
1037            && !redirection_block_p (loop->header))
1038     goto fail;
1039   else
1040     {
1041       tgt_bb = NULL;
1042       tgt_edge = NULL;
1043       FOR_EACH_EDGE (e, ei, header->preds)
1044         {
1045           if (!e->aux)
1046             {
1047               if (e == latch)
1048                 continue;
1049
1050               /* If latch is not threaded, and there is a header
1051                  edge that is not threaded, we would create loop
1052                  with multiple entries.  */
1053               goto fail;
1054             }
1055
1056           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1057
1058           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1059             goto fail;
1060           tgt_edge = (*path)[1]->e;
1061           atgt_bb = tgt_edge->dest;
1062           if (!tgt_bb)
1063             tgt_bb = atgt_bb;
1064           /* Two targets of threading would make us create loop
1065              with multiple entries.  */
1066           else if (tgt_bb != atgt_bb)
1067             goto fail;
1068         }
1069
1070       if (!tgt_bb)
1071         {
1072           /* There are no threading requests.  */
1073           return false;
1074         }
1075
1076       /* Redirecting to empty loop latch is useless.  */
1077       if (tgt_bb == loop->latch
1078           && empty_block_p (loop->latch))
1079         goto fail;
1080     }
1081
1082   /* The target block must dominate the loop latch, otherwise we would be
1083      creating a subloop.  */
1084   domst = determine_bb_domination_status (loop, tgt_bb);
1085   if (domst == DOMST_NONDOMINATING)
1086     goto fail;
1087   if (domst == DOMST_LOOP_BROKEN)
1088     {
1089       /* If the loop ceased to exist, mark it as such, and thread through its
1090          original header.  */
1091       loop->header = NULL;
1092       loop->latch = NULL;
1093       loops_state_set (LOOPS_NEED_FIXUP);
1094       return thread_block (header, false);
1095     }
1096
1097   if (tgt_bb->loop_father->header == tgt_bb)
1098     {
1099       /* If the target of the threading is a header of a subloop, we need
1100          to create a preheader for it, so that the headers of the two loops
1101          do not merge.  */
1102       if (EDGE_COUNT (tgt_bb->preds) > 2)
1103         {
1104           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1105           gcc_assert (tgt_bb != NULL);
1106         }
1107       else
1108         tgt_bb = split_edge (tgt_edge);
1109     }
1110
1111   if (latch->aux)
1112     {
1113       basic_block *bblocks;
1114       unsigned nblocks, i;
1115
1116       /* First handle the case latch edge is redirected.  We are copying
1117          the loop header but not creating a multiple entry loop.  Make the
1118          cfg manipulation code aware of that fact.  */
1119       set_loop_copy (loop, loop);
1120       loop->latch = thread_single_edge (latch);
1121       set_loop_copy (loop, NULL);
1122       gcc_assert (single_succ (loop->latch) == tgt_bb);
1123       loop->header = tgt_bb;
1124
1125       /* Remove the new pre-header blocks from our loop.  */
1126       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1127       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1128                                     bblocks, loop->num_nodes, tgt_bb);
1129       for (i = 0; i < nblocks; i++)
1130         if (bblocks[i]->loop_father == loop)
1131           {
1132             remove_bb_from_loops (bblocks[i]);
1133             add_bb_to_loop (bblocks[i], loop_outer (loop));
1134           }
1135       free (bblocks);
1136
1137       /* If the new header has multiple latches mark it so.  */
1138       FOR_EACH_EDGE (e, ei, loop->header->preds)
1139         if (e->src->loop_father == loop
1140             && e->src != loop->latch)
1141           {
1142             loop->latch = NULL;
1143             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1144           }
1145
1146       /* Cancel remaining threading requests that would make the
1147          loop a multiple entry loop.  */
1148       FOR_EACH_EDGE (e, ei, header->preds)
1149         {
1150           edge e2;
1151
1152           if (e->aux == NULL)
1153             continue;
1154
1155           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1156           e2 = path->last ()->e;
1157
1158           if (e->src->loop_father != e2->dest->loop_father
1159               && e2->dest != loop->header)
1160             {
1161               delete_jump_thread_path (path);
1162               e->aux = NULL;
1163             }
1164         }
1165
1166       /* Thread the remaining edges through the former header.  */
1167       thread_block (header, false);
1168     }
1169   else
1170     {
1171       basic_block new_preheader;
1172
1173       /* Now consider the case entry edges are redirected to the new entry
1174          block.  Remember one entry edge, so that we can find the new
1175          preheader (its destination after threading).  */
1176       FOR_EACH_EDGE (e, ei, header->preds)
1177         {
1178           if (e->aux)
1179             break;
1180         }
1181
1182       /* The duplicate of the header is the new preheader of the loop.  Ensure
1183          that it is placed correctly in the loop hierarchy.  */
1184       set_loop_copy (loop, loop_outer (loop));
1185
1186       thread_block (header, false);
1187       set_loop_copy (loop, NULL);
1188       new_preheader = e->dest;
1189
1190       /* Create the new latch block.  This is always necessary, as the latch
1191          must have only a single successor, but the original header had at
1192          least two successors.  */
1193       loop->latch = NULL;
1194       mfb_kj_edge = single_succ_edge (new_preheader);
1195       loop->header = mfb_kj_edge->dest;
1196       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1197       loop->header = latch->dest;
1198       loop->latch = latch->src;
1199     }
1200
1201   return true;
1202
1203 fail:
1204   /* We failed to thread anything.  Cancel the requests.  */
1205   FOR_EACH_EDGE (e, ei, header->preds)
1206     {
1207       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1208
1209       if (path)
1210         {
1211           delete_jump_thread_path (path);
1212           e->aux = NULL;
1213         }
1214     }
1215   return false;
1216 }
1217
1218 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1219    PHI arguments associated with those edges are equal or there are no
1220    PHI arguments, otherwise return FALSE.  */
1221
1222 static bool
1223 phi_args_equal_on_edges (edge e1, edge e2)
1224 {
1225   gimple_stmt_iterator gsi;
1226   int indx1 = e1->dest_idx;
1227   int indx2 = e2->dest_idx;
1228
1229   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1230     {
1231       gimple phi = gsi_stmt (gsi);
1232
1233       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1234                             gimple_phi_arg_def (phi, indx2), 0))
1235         return false;
1236     }
1237   return true;
1238 }
1239
1240 /* Walk through the registered jump threads and convert them into a
1241    form convenient for this pass.
1242
1243    Any block which has incoming edges threaded to outgoing edges
1244    will have its entry in THREADED_BLOCK set.
1245
1246    Any threaded edge will have its new outgoing edge stored in the
1247    original edge's AUX field.
1248
1249    This form avoids the need to walk all the edges in the CFG to
1250    discover blocks which need processing and avoids unnecessary
1251    hash table lookups to map from threaded edge to new target.  */
1252
1253 static void
1254 mark_threaded_blocks (bitmap threaded_blocks)
1255 {
1256   unsigned int i;
1257   bitmap_iterator bi;
1258   bitmap tmp = BITMAP_ALLOC (NULL);
1259   basic_block bb;
1260   edge e;
1261   edge_iterator ei;
1262
1263   /* Move the jump threading requests from PATHS to each edge
1264      which starts a jump thread path.  */
1265   for (i = 0; i < paths.length (); i++)
1266     {
1267       vec<jump_thread_edge *> *path = paths[i];
1268       edge e = (*path)[0]->e;
1269       e->aux = (void *)path;
1270       bitmap_set_bit (tmp, e->dest->index);
1271     }
1272
1273
1274
1275   /* If optimizing for size, only thread through block if we don't have
1276      to duplicate it or it's an otherwise empty redirection block.  */
1277   if (optimize_function_for_size_p (cfun))
1278     {
1279       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1280         {
1281           bb = BASIC_BLOCK (i);
1282           if (EDGE_COUNT (bb->preds) > 1
1283               && !redirection_block_p (bb))
1284             {
1285               FOR_EACH_EDGE (e, ei, bb->preds)
1286                 {
1287                   if (e->aux)
1288                     {
1289                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1290                       delete_jump_thread_path (path);
1291                       e->aux = NULL;
1292                     }
1293                 }
1294             }
1295           else
1296             bitmap_set_bit (threaded_blocks, i);
1297         }
1298     }
1299   else
1300     bitmap_copy (threaded_blocks, tmp);
1301
1302   /* Look for jump threading paths which cross multiple loop headers.
1303
1304      The code to thread through loop headers will change the CFG in ways
1305      that break assumptions made by the loop optimization code.
1306
1307      We don't want to blindly cancel the requests.  We can instead do better
1308      by trimming off the end of the jump thread path.  */
1309   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1310     {
1311       basic_block bb = BASIC_BLOCK (i);
1312       FOR_EACH_EDGE (e, ei, bb->preds)
1313         {
1314           if (e->aux)
1315             {
1316               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1317
1318               /* Basically we're looking for a situation where we can see
1319                  3 or more loop structures on a jump threading path.  */
1320
1321               struct loop *first_father = (*path)[0]->e->src->loop_father;
1322               struct loop *second_father = NULL;
1323               for (unsigned int i = 0; i < path->length (); i++)
1324                 {
1325                   /* See if this is a loop father we have not seen before.  */
1326                   if ((*path)[i]->e->dest->loop_father != first_father
1327                       && (*path)[i]->e->dest->loop_father != second_father)
1328                     {
1329                       /* We've already seen two loop fathers, so we
1330                          need to trim this jump threading path.  */
1331                       if (second_father != NULL)
1332                         {
1333                           /* Trim from entry I onwards.  */
1334                           for (unsigned int j = i; j < path->length (); j++)
1335                             delete (*path)[j];
1336                           path->truncate (i);
1337
1338                           /* Now that we've truncated the path, make sure
1339                              what's left is still valid.   We need at least
1340                              two edges on the path and the last edge can not
1341                              be a joiner.  This should never happen, but let's
1342                              be safe.  */
1343                           if (path->length () < 2
1344                               || (path->last ()->type
1345                                   == EDGE_COPY_SRC_JOINER_BLOCK))
1346                             {
1347                               delete_jump_thread_path (path);
1348                               e->aux = NULL;
1349                             }
1350                           break;
1351                         }
1352                       else
1353                         {
1354                           second_father = (*path)[i]->e->dest->loop_father;
1355                         }
1356                     }
1357                 }
1358             }
1359         }
1360     }
1361
1362   /* If we have a joiner block (J) which has two successors S1 and S2 and
1363      we are threading though S1 and the final destination of the thread
1364      is S2, then we must verify that any PHI nodes in S2 have the same
1365      PHI arguments for the edge J->S2 and J->S1->...->S2.
1366
1367      We used to detect this prior to registering the jump thread, but
1368      that prohibits propagation of edge equivalences into non-dominated
1369      PHI nodes as the equivalency test might occur before propagation.
1370
1371      This must also occur after we truncate any jump threading paths
1372      as this scenario may only show up after truncation.
1373
1374      This works for now, but will need improvement as part of the FSA
1375      optimization.
1376
1377      Note since we've moved the thread request data to the edges,
1378      we have to iterate on those rather than the threaded_edges vector.  */
1379   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1380     {
1381       bb = BASIC_BLOCK (i);
1382       FOR_EACH_EDGE (e, ei, bb->preds)
1383         {
1384           if (e->aux)
1385             {
1386               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1387               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1388
1389               if (have_joiner)
1390                 {
1391                   basic_block joiner = e->dest;
1392                   edge final_edge = path->last ()->e;
1393                   basic_block final_dest = final_edge->dest;
1394                   edge e2 = find_edge (joiner, final_dest);
1395
1396                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1397                     {
1398                       delete_jump_thread_path (path);
1399                       e->aux = NULL;
1400                     }
1401                 }
1402             }
1403         }
1404     }
1405
1406   BITMAP_FREE (tmp);
1407 }
1408
1409
1410 /* Walk through all blocks and thread incoming edges to the appropriate
1411    outgoing edge for each edge pair recorded in THREADED_EDGES.
1412
1413    It is the caller's responsibility to fix the dominance information
1414    and rewrite duplicated SSA_NAMEs back into SSA form.
1415
1416    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
1417    loop headers if it does not simplify the loop.
1418
1419    Returns true if one or more edges were threaded, false otherwise.  */
1420
1421 bool
1422 thread_through_all_blocks (bool may_peel_loop_headers)
1423 {
1424   bool retval = false;
1425   unsigned int i;
1426   bitmap_iterator bi;
1427   bitmap threaded_blocks;
1428   struct loop *loop;
1429   loop_iterator li;
1430
1431   /* We must know about loops in order to preserve them.  */
1432   gcc_assert (current_loops != NULL);
1433
1434   if (!paths.exists ())
1435     return false;
1436
1437   threaded_blocks = BITMAP_ALLOC (NULL);
1438   memset (&thread_stats, 0, sizeof (thread_stats));
1439
1440   mark_threaded_blocks (threaded_blocks);
1441
1442   initialize_original_copy_tables ();
1443
1444   /* First perform the threading requests that do not affect
1445      loop structure.  */
1446   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
1447     {
1448       basic_block bb = BASIC_BLOCK (i);
1449
1450       if (EDGE_COUNT (bb->preds) > 0)
1451         retval |= thread_block (bb, true);
1452     }
1453
1454   /* Then perform the threading through loop headers.  We start with the
1455      innermost loop, so that the changes in cfg we perform won't affect
1456      further threading.  */
1457   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1458     {
1459       if (!loop->header
1460           || !bitmap_bit_p (threaded_blocks, loop->header->index))
1461         continue;
1462
1463       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
1464     }
1465
1466   /* Assume we had a jump thread path which went from the latch to the exit
1467      and a path which goes from outside to inside the same loop.
1468
1469      If the latch to exit was handled first, we will thread it and clear
1470      loop->header.
1471
1472      The second path will be ignored by thread_block because we're going
1473      through a loop header.  It will also be ignored by the loop above
1474      because loop->header is NULL.
1475
1476      This results in the second path never being threaded.  The failure
1477      mode is a dangling AUX field.
1478
1479      This is inherently a bit of a pain to fix, so we just walk all the
1480      blocks and all the incoming edges to those blocks and clear their
1481      AUX fields.  */
1482   basic_block bb;
1483   edge_iterator ei;
1484   edge e;
1485   FOR_EACH_BB (bb)
1486     {
1487       FOR_EACH_EDGE (e, ei, bb->preds)
1488         if (e->aux)
1489           {
1490             vec<jump_thread_edge *> *path = THREAD_PATH (e);
1491
1492             delete_jump_thread_path (path);
1493             e->aux = NULL;
1494           }
1495     }
1496
1497   statistics_counter_event (cfun, "Jumps threaded",
1498                             thread_stats.num_threaded_edges);
1499
1500   free_original_copy_tables ();
1501
1502   BITMAP_FREE (threaded_blocks);
1503   threaded_blocks = NULL;
1504   paths.release ();
1505
1506   if (retval)
1507     loops_state_set (LOOPS_NEED_FIXUP);
1508
1509   return retval;
1510 }
1511
1512 /* Delete the jump threading path PATH.  We have to explcitly delete
1513    each entry in the vector, then the container.  */
1514
1515 void
1516 delete_jump_thread_path (vec<jump_thread_edge *> *path)
1517 {
1518   for (unsigned int i = 0; i < path->length (); i++)
1519     delete (*path)[i];
1520   path->release();
1521 }
1522
1523 /* Dump a jump threading path, including annotations about each
1524    edge in the path.  */
1525
1526 static void
1527 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path)
1528 {
1529   fprintf (dump_file,
1530            "  Registering jump thread: (%d, %d) incoming edge; ",
1531            path[0]->e->src->index, path[0]->e->dest->index);
1532
1533   for (unsigned int i = 1; i < path.length (); i++)
1534     {
1535       /* We can get paths with a NULL edge when the final destination
1536          of a jump thread turns out to be a constant address.  We dump
1537          those paths when debugging, so we have to be prepared for that
1538          possibility here.  */
1539       if (path[i]->e == NULL)
1540         continue;
1541
1542       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1543         fprintf (dump_file, " (%d, %d) joiner; ",
1544                  path[i]->e->src->index, path[i]->e->dest->index);
1545       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
1546        fprintf (dump_file, " (%d, %d) normal;",
1547                  path[i]->e->src->index, path[i]->e->dest->index);
1548       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
1549        fprintf (dump_file, " (%d, %d) nocopy;",
1550                  path[i]->e->src->index, path[i]->e->dest->index);
1551     }
1552   fputc ('\n', dump_file);
1553 }
1554
1555 /* Register a jump threading opportunity.  We queue up all the jump
1556    threading opportunities discovered by a pass and update the CFG
1557    and SSA form all at once.
1558
1559    E is the edge we can thread, E2 is the new target edge, i.e., we
1560    are effectively recording that E->dest can be changed to E2->dest
1561    after fixing the SSA graph.  */
1562
1563 void
1564 register_jump_thread (vec<jump_thread_edge *> *path)
1565 {
1566   if (!dbg_cnt (registered_jump_thread))
1567     {
1568       delete_jump_thread_path (path);
1569       return;
1570     }
1571
1572   /* First make sure there are no NULL outgoing edges on the jump threading
1573      path.  That can happen for jumping to a constant address.  */
1574   for (unsigned int i = 0; i < path->length (); i++)
1575     if ((*path)[i]->e == NULL)
1576       {
1577         if (dump_file && (dump_flags & TDF_DETAILS))
1578           {
1579             fprintf (dump_file,
1580                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
1581             dump_jump_thread_path (dump_file, *path);
1582           }
1583
1584         delete_jump_thread_path (path);
1585         return;
1586       }
1587
1588   if (dump_file && (dump_flags & TDF_DETAILS))
1589     dump_jump_thread_path (dump_file, *path);
1590
1591   if (!paths.exists ())
1592     paths.create (5);
1593
1594   paths.safe_push (path);
1595 }