gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "alias.h"
  24 #include "symtab.h"
  25 #include "options.h"
  26 #include "tree.h"
  27 #include "fold-const.h"
  28 #include "flags.h"
  29 #include "predict.h"
  30 #include "tm.h"
  31 #include "hard-reg-set.h"
  32 #include "function.h"
  33 #include "dominance.h"
  34 #include "cfg.h"
  35 #include "cfganal.h"
  36 #include "basic-block.h"
  37 #include "tree-ssa-alias.h"
  38 #include "internal-fn.h"
  39 #include "gimple-expr.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimple-ssa.h"
  43 #include "tree-phinodes.h"
  44 #include "tree-ssa.h"
  45 #include "tree-ssa-threadupdate.h"
  46 #include "ssa-iterators.h"
  47 #include "dumpfile.h"
  48 #include "cfgloop.h"
  49 #include "dbgcnt.h"
  50 #include "tree-cfg.h"
  51 #include "tree-pass.h"
  52
  53 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  54    one or more in-edges to B to instead reach the destination of an
  55    out-edge from B while preserving any side effects in B.
  56
  57    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  58    side effects of executing B.
  59
  60      1. Make a copy of B (including its outgoing edges and statements).  Call
  61         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  62
  63      2. Remove the control statement at the end of B' and all outgoing edges
  64         except B'->C.
  65
  66      3. Add a new argument to each PHI in C with the same value as the existing
  67         argument associated with edge B->C.  Associate the new PHI arguments
  68         with the edge B'->C.
  69
  70      4. For each PHI in B, find or create a PHI in B' with an identical
  71         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  72         value as the PHI in B associated with the edge A->B.  Associate
  73         the new argument in the PHI in B' with the edge A->B.
  74
  75      5. Change the edge A->B to A->B'.
  76
  77         5a. This automatically deletes any PHI arguments associated with the
  78             edge A->B in B.
  79
  80         5b. This automatically associates each new argument added in step 4
  81             with the edge A->B'.
  82
  83      6. Repeat for other incoming edges into B.
  84
  85      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  86
  87    Note that block duplication can be minimized by first collecting the
  88    set of unique destination blocks that the incoming edges should
  89    be threaded to.
  90
  91    We reduce the number of edges and statements we create by not copying all
  92    the outgoing edges and the control statement in step #1.  We instead create
  93    a template block without the outgoing edges and duplicate the template.
  94
  95    Another case this code handles is threading through a "joiner" block.  In
  96    this case, we do not know the destination of the joiner block, but one
  97    of the outgoing edges from the joiner block leads to a threadable path.  This
  98    case largely works as outlined above, except the duplicate of the joiner
  99    block still contains a full set of outgoing edges and its control statement.
 100    We just redirect one of its outgoing edges to our jump threading path.  */
 101
 102
 103 /* Steps #5 and #6 of the above algorithm are best implemented by walking
 104    all the incoming edges which thread to the same destination edge at
 105    the same time.  That avoids lots of table lookups to get information
 106    for the destination edge.
 107
 108    To realize that implementation we create a list of incoming edges
 109    which thread to the same outgoing edge.  Thus to implement steps
 110    #5 and #6 we traverse our hash table of outgoing edge information.
 111    For each entry we walk the list of incoming edges which thread to
 112    the current outgoing edge.  */
 113
 114 struct el
 115 {
 116   edge e;
 117   struct el *next;
 118 };
 119
 120 /* Main data structure recording information regarding B's duplicate
 121    blocks.  */
 122
 123 /* We need to efficiently record the unique thread destinations of this
 124    block and specific information associated with those destinations.  We
 125    may have many incoming edges threaded to the same outgoing edge.  This
 126    can be naturally implemented with a hash table.  */
 127
 128 struct redirection_data : typed_free_remove<redirection_data>
 129 {
 130   /* We support wiring up two block duplicates in a jump threading path.
 131
 132      One is a normal block copy where we remove the control statement
 133      and wire up its single remaining outgoing edge to the thread path.
 134
 135      The other is a joiner block where we leave the control statement
 136      in place, but wire one of the outgoing edges to a thread path.
 137
 138      In theory we could have multiple block duplicates in a jump
 139      threading path, but I haven't tried that.
 140
 141      The duplicate blocks appear in this array in the same order in
 142      which they appear in the jump thread path.  */
 143   basic_block dup_blocks[2];
 144
 145   /* The jump threading path.  */
 146   vec<jump_thread_edge *> *path;
 147
 148   /* A list of incoming edges which we want to thread to the
 149      same path.  */
 150   struct el *incoming_edges;
 151
 152   /* hash_table support.  */
 153   typedef redirection_data *value_type;
 154   typedef redirection_data *compare_type;
 155   static inline hashval_t hash (const redirection_data *);
 156   static inline int equal (const redirection_data *, const redirection_data *);
 157 };
 158
 159 /* Dump a jump threading path, including annotations about each
 160    edge in the path.  */
 161
 162 static void
 163 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 164                        bool registering)
 165 {
 166   fprintf (dump_file,
 167            "  %s%s jump thread: (%d, %d) incoming edge; ",
 168            (registering ? "Registering" : "Cancelling"),
 169            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 170            path[0]->e->src->index, path[0]->e->dest->index);
 171
 172   for (unsigned int i = 1; i < path.length (); i++)
 173     {
 174       /* We can get paths with a NULL edge when the final destination
 175          of a jump thread turns out to be a constant address.  We dump
 176          those paths when debugging, so we have to be prepared for that
 177          possibility here.  */
 178       if (path[i]->e == NULL)
 179         continue;
 180
 181       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 182         fprintf (dump_file, " (%d, %d) joiner; ",
 183                  path[i]->e->src->index, path[i]->e->dest->index);
 184       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 185        fprintf (dump_file, " (%d, %d) normal;",
 186                  path[i]->e->src->index, path[i]->e->dest->index);
 187       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 188        fprintf (dump_file, " (%d, %d) nocopy;",
 189                  path[i]->e->src->index, path[i]->e->dest->index);
 190       if (path[0]->type == EDGE_FSM_THREAD)
 191         fprintf (dump_file, " (%d, %d) ",
 192                  path[i]->e->src->index, path[i]->e->dest->index);
 193     }
 194   fputc ('\n', dump_file);
 195 }
 196
 197 /* Simple hashing function.  For any given incoming edge E, we're going
 198    to be most concerned with the final destination of its jump thread
 199    path.  So hash on the block index of the final edge in the path.  */
 200
 201 inline hashval_t
 202 redirection_data::hash (const redirection_data *p)
 203 {
 204   vec<jump_thread_edge *> *path = p->path;
 205   return path->last ()->e->dest->index;
 206 }
 207
 208 /* Given two hash table entries, return true if they have the same
 209    jump threading path.  */
 210 inline int
 211 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 212 {
 213   vec<jump_thread_edge *> *path1 = p1->path;
 214   vec<jump_thread_edge *> *path2 = p2->path;
 215
 216   if (path1->length () != path2->length ())
 217     return false;
 218
 219   for (unsigned int i = 1; i < path1->length (); i++)
 220     {
 221       if ((*path1)[i]->type != (*path2)[i]->type
 222           || (*path1)[i]->e != (*path2)[i]->e)
 223         return false;
 224     }
 225
 226   return true;
 227 }
 228
 229 /* Data structure of information to pass to hash table traversal routines.  */
 230 struct ssa_local_info_t
 231 {
 232   /* The current block we are working on.  */
 233   basic_block bb;
 234
 235   /* We only create a template block for the first duplicated block in a
 236      jump threading path as we may need many duplicates of that block.
 237
 238      The second duplicate block in a path is specific to that path.  Creating
 239      and sharing a template for that block is considerably more difficult.  */
 240   basic_block template_block;
 241
 242   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 243   bool jumps_threaded;
 244
 245   /* Blocks duplicated for the thread.  */
 246   bitmap duplicate_blocks;
 247 };
 248
 249 /* Passes which use the jump threading code register jump threading
 250    opportunities as they are discovered.  We keep the registered
 251    jump threading opportunities in this vector as edge pairs
 252    (original_edge, target_edge).  */
 253 static vec<vec<jump_thread_edge *> *> paths;
 254
 255 /* When we start updating the CFG for threading, data necessary for jump
 256    threading is attached to the AUX field for the incoming edge.  Use these
 257    macros to access the underlying structure attached to the AUX field.  */
 258 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 259
 260 /* Jump threading statistics.  */
 261
 262 struct thread_stats_d
 263 {
 264   unsigned long num_threaded_edges;
 265 };
 266
 267 struct thread_stats_d thread_stats;
 268
 269
 270 /* Remove the last statement in block BB if it is a control statement
 271    Also remove all outgoing edges except the edge which reaches DEST_BB.
 272    If DEST_BB is NULL, then remove all outgoing edges.  */
 273
 274 static void
 275 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 276 {
 277   gimple_stmt_iterator gsi;
 278   edge e;
 279   edge_iterator ei;
 280
 281   gsi = gsi_last_bb (bb);
 282
 283   /* If the duplicate ends with a control statement, then remove it.
 284
 285      Note that if we are duplicating the template block rather than the
 286      original basic block, then the duplicate might not have any real
 287      statements in it.  */
 288   if (!gsi_end_p (gsi)
 289       && gsi_stmt (gsi)
 290       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 291           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 292           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 293     gsi_remove (&gsi, true);
 294
 295   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 296     {
 297       if (e->dest != dest_bb)
 298         remove_edge (e);
 299       else
 300         ei_next (&ei);
 301     }
 302 }
 303
 304 /* Create a duplicate of BB.  Record the duplicate block in an array
 305    indexed by COUNT stored in RD.  */
 306
 307 static void
 308 create_block_for_threading (basic_block bb,
 309                             struct redirection_data *rd,
 310                             unsigned int count,
 311                             bitmap *duplicate_blocks)
 312 {
 313   edge_iterator ei;
 314   edge e;
 315
 316   /* We can use the generic block duplication code and simply remove
 317      the stuff we do not need.  */
 318   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 319
 320   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 321     e->aux = NULL;
 322
 323   /* Zero out the profile, since the block is unreachable for now.  */
 324   rd->dup_blocks[count]->frequency = 0;
 325   rd->dup_blocks[count]->count = 0;
 326   if (duplicate_blocks)
 327     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 328 }
 329
 330 /* Main data structure to hold information for duplicates of BB.  */
 331
 332 static hash_table<redirection_data> *redirection_data;
 333
 334 /* Given an outgoing edge E lookup and return its entry in our hash table.
 335
 336    If INSERT is true, then we insert the entry into the hash table if
 337    it is not already present.  INCOMING_EDGE is added to the list of incoming
 338    edges associated with E in the hash table.  */
 339
 340 static struct redirection_data *
 341 lookup_redirection_data (edge e, enum insert_option insert)
 342 {
 343   struct redirection_data **slot;
 344   struct redirection_data *elt;
 345   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 346
 347  /* Build a hash table element so we can see if E is already
 348      in the table.  */
 349   elt = XNEW (struct redirection_data);
 350   elt->path = path;
 351   elt->dup_blocks[0] = NULL;
 352   elt->dup_blocks[1] = NULL;
 353   elt->incoming_edges = NULL;
 354
 355   slot = redirection_data->find_slot (elt, insert);
 356
 357   /* This will only happen if INSERT is false and the entry is not
 358      in the hash table.  */
 359   if (slot == NULL)
 360     {
 361       free (elt);
 362       return NULL;
 363     }
 364
 365   /* This will only happen if E was not in the hash table and
 366      INSERT is true.  */
 367   if (*slot == NULL)
 368     {
 369       *slot = elt;
 370       elt->incoming_edges = XNEW (struct el);
 371       elt->incoming_edges->e = e;
 372       elt->incoming_edges->next = NULL;
 373       return elt;
 374     }
 375   /* E was in the hash table.  */
 376   else
 377     {
 378       /* Free ELT as we do not need it anymore, we will extract the
 379          relevant entry from the hash table itself.  */
 380       free (elt);
 381
 382       /* Get the entry stored in the hash table.  */
 383       elt = *slot;
 384
 385       /* If insertion was requested, then we need to add INCOMING_EDGE
 386          to the list of incoming edges associated with E.  */
 387       if (insert)
 388         {
 389           struct el *el = XNEW (struct el);
 390           el->next = elt->incoming_edges;
 391           el->e = e;
 392           elt->incoming_edges = el;
 393         }
 394
 395       return elt;
 396     }
 397 }
 398
 399 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 400    does not have a value associated with it.  */
 401
 402 static void
 403 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 404 {
 405   int src_idx = src_e->dest_idx;
 406   int tgt_idx = tgt_e->dest_idx;
 407
 408   /* Iterate over each PHI in e->dest.  */
 409   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 410                            gsi2 = gsi_start_phis (tgt_e->dest);
 411        !gsi_end_p (gsi);
 412        gsi_next (&gsi), gsi_next (&gsi2))
 413     {
 414       gphi *src_phi = gsi.phi ();
 415       gphi *dest_phi = gsi2.phi ();
 416       tree val = gimple_phi_arg_def (src_phi, src_idx);
 417       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 418
 419       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 420       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 421     }
 422 }
 423
 424 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 425    to see if it has constant value in a flow sensitive manner.  Set
 426    LOCUS to location of the constant phi arg and return the value.
 427    Return DEF directly if either PATH or idx is ZERO.  */
 428
 429 static tree
 430 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 431                          basic_block bb, int idx, source_location *locus)
 432 {
 433   tree arg;
 434   gphi *def_phi;
 435   basic_block def_bb;
 436
 437   if (path == NULL || idx == 0)
 438     return def;
 439
 440   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 441   if (!def_phi)
 442     return def;
 443
 444   def_bb = gimple_bb (def_phi);
 445   /* Don't propagate loop invariants into deeper loops.  */
 446   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 447     return def;
 448
 449   /* Backtrack jump threading path from IDX to see if def has constant
 450      value.  */
 451   for (int j = idx - 1; j >= 0; j--)
 452     {
 453       edge e = (*path)[j]->e;
 454       if (e->dest == def_bb)
 455         {
 456           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 457           if (is_gimple_min_invariant (arg))
 458             {
 459               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 460               return arg;
 461             }
 462           break;
 463         }
 464     }
 465
 466   return def;
 467 }
 468
 469 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 470    Try to backtrack jump threading PATH from node IDX to see if the arg
 471    has constant value, copy constant value instead of argument itself
 472    if yes.  */
 473
 474 static void
 475 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 476                vec<jump_thread_edge *> *path, int idx)
 477 {
 478   gphi_iterator gsi;
 479   int src_indx = src_e->dest_idx;
 480
 481   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 482     {
 483       gphi *phi = gsi.phi ();
 484       tree def = gimple_phi_arg_def (phi, src_indx);
 485       source_location locus = gimple_phi_arg_location (phi, src_indx);
 486
 487       if (TREE_CODE (def) == SSA_NAME
 488           && !virtual_operand_p (gimple_phi_result (phi)))
 489         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 490
 491       add_phi_arg (phi, def, tgt_e, locus);
 492     }
 493 }
 494
 495 /* We have recently made a copy of ORIG_BB, including its outgoing
 496    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 497    ORIG_BB has a new argument associated with edge from NEW_BB to the
 498    successor.  Initialize the PHI argument so that it is equal to the PHI
 499    argument associated with the edge from ORIG_BB to the successor.
 500    PATH and IDX are used to check if the new PHI argument has constant
 501    value in a flow sensitive manner.  */
 502
 503 static void
 504 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 505                          vec<jump_thread_edge *> *path, int idx)
 506 {
 507   edge_iterator ei;
 508   edge e;
 509
 510   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 511     {
 512       edge e2 = find_edge (new_bb, e->dest);
 513       copy_phi_args (e->dest, e, e2, path, idx);
 514     }
 515 }
 516
 517 /* Given a duplicate block and its single destination (both stored
 518    in RD).  Create an edge between the duplicate and its single
 519    destination.
 520
 521    Add an additional argument to any PHI nodes at the single
 522    destination.  IDX is the start node in jump threading path
 523    we start to check to see if the new PHI argument has constant
 524    value along the jump threading path.  */
 525
 526 static void
 527 create_edge_and_update_destination_phis (struct redirection_data *rd,
 528                                          basic_block bb, int idx)
 529 {
 530   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 531
 532   rescan_loop_exit (e, true, false);
 533   e->probability = REG_BR_PROB_BASE;
 534   e->count = bb->count;
 535
 536   /* We used to copy the thread path here.  That was added in 2007
 537      and dutifully updated through the representation changes in 2013.
 538
 539      In 2013 we added code to thread from an interior node through
 540      the backedge to another interior node.  That runs after the code
 541      to thread through loop headers from outside the loop.
 542
 543      The latter may delete edges in the CFG, including those
 544      which appeared in the jump threading path we copied here.  Thus
 545      we'd end up using a dangling pointer.
 546
 547      After reviewing the 2007/2011 code, I can't see how anything
 548      depended on copying the AUX field and clearly copying the jump
 549      threading path is problematical due to embedded edge pointers.
 550      It has been removed.  */
 551   e->aux = NULL;
 552
 553   /* If there are any PHI nodes at the destination of the outgoing edge
 554      from the duplicate block, then we will need to add a new argument
 555      to them.  The argument should have the same value as the argument
 556      associated with the outgoing edge stored in RD.  */
 557   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 558 }
 559
 560 /* Look through PATH beginning at START and return TRUE if there are
 561    any additional blocks that need to be duplicated.  Otherwise,
 562    return FALSE.  */
 563 static bool
 564 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 565                                  unsigned int start)
 566 {
 567   for (unsigned int i = start + 1; i < path->length (); i++)
 568     {
 569       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 570           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 571         return true;
 572     }
 573   return false;
 574 }
 575
 576
 577 /* Compute the amount of profile count/frequency coming into the jump threading
 578    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 579    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 580    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 581    identify blocks duplicated for jump threading, which have duplicated
 582    edges that need to be ignored in the analysis.  Return true if path contains
 583    a joiner, false otherwise.
 584
 585    In the non-joiner case, this is straightforward - all the counts/frequency
 586    flowing into the jump threading path should flow through the duplicated
 587    block and out of the duplicated path.
 588
 589    In the joiner case, it is very tricky.  Some of the counts flowing into
 590    the original path go offpath at the joiner.  The problem is that while
 591    we know how much total count goes off-path in the original control flow,
 592    we don't know how many of the counts corresponding to just the jump
 593    threading path go offpath at the joiner.
 594
 595    For example, assume we have the following control flow and identified
 596    jump threading paths:
 597
 598                 A     B     C
 599                  \    |    /
 600                Ea \   |Eb / Ec
 601                    \  |  /
 602                     v v v
 603                       J       <-- Joiner
 604                      / \
 605                 Eoff/   \Eon
 606                    /     \
 607                   v       v
 608                 Soff     Son  <--- Normal
 609                          /\
 610                       Ed/  \ Ee
 611                        /    \
 612                       v     v
 613                       D      E
 614
 615             Jump threading paths: A -> J -> Son -> D (path 1)
 616                                   C -> J -> Son -> E (path 2)
 617
 618    Note that the control flow could be more complicated:
 619    - Each jump threading path may have more than one incoming edge.  I.e. A and
 620    Ea could represent multiple incoming blocks/edges that are included in
 621    path 1.
 622    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 623    before or after the "normal" copy block).  These are not duplicated onto
 624    the jump threading path, as they are single-successor.
 625    - Any of the blocks along the path may have other incoming edges that
 626    are not part of any jump threading path, but add profile counts along
 627    the path.
 628
 629    In the aboe example, after all jump threading is complete, we will
 630    end up with the following control flow:
 631
 632                 A         B         C
 633                 |         |         |
 634               Ea|         |Eb     |Ec
 635                 |         |         |
 636                 v         v         v
 637                Ja         J        Jc
 638                / \      / \Eon'     / \
 639           Eona/   \   ---/---\--------   \Eonc
 640              /     \ /  /     \    \
 641             v       v  v       v          v
 642            Sona     Soff      Son       Sonc
 643              \           /\      /
 644               \___________    /  \  _____/
 645                           \  /    \/
 646                            vv      v
 647                             D      E
 648
 649    The main issue to notice here is that when we are processing path 1
 650    (A->J->Son->D) we need to figure out the outgoing edge weights to
 651    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 652    sum of the incoming weights to D remain Ed.  The problem with simply
 653    assuming that Ja (and Jc when processing path 2) has the same outgoing
 654    probabilities to its successors as the original block J, is that after
 655    all paths are processed and other edges/counts removed (e.g. none
 656    of Ec will reach D after processing path 2), we may end up with not
 657    enough count flowing along duplicated edge Sona->D.
 658
 659    Therefore, in the case of a joiner, we keep track of all counts
 660    coming in along the current path, as well as from predecessors not
 661    on any jump threading path (Eb in the above example).  While we
 662    first assume that the duplicated Eona for Ja->Sona has the same
 663    probability as the original, we later compensate for other jump
 664    threading paths that may eliminate edges.  We do that by keep track
 665    of all counts coming into the original path that are not in a jump
 666    thread (Eb in the above example, but as noted earlier, there could
 667    be other predecessors incoming to the path at various points, such
 668    as at Son).  Call this cumulative non-path count coming into the path
 669    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 670    least as big as (Ed - Enonpath), but no bigger than the minimum
 671    weight along the jump threading path.  The probabilities of both the
 672    original and duplicated joiner block J and Ja will be adjusted
 673    accordingly after the updates.  */
 674
 675 static bool
 676 compute_path_counts (struct redirection_data *rd,
 677                      ssa_local_info_t *local_info,
 678                      gcov_type *path_in_count_ptr,
 679                      gcov_type *path_out_count_ptr,
 680                      int *path_in_freq_ptr)
 681 {
 682   edge e = rd->incoming_edges->e;
 683   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 684   edge elast = path->last ()->e;
 685   gcov_type nonpath_count = 0;
 686   bool has_joiner = false;
 687   gcov_type path_in_count = 0;
 688   int path_in_freq = 0;
 689
 690   /* Start by accumulating incoming edge counts to the path's first bb
 691      into a couple buckets:
 692         path_in_count: total count of incoming edges that flow into the
 693                   current path.
 694         nonpath_count: total count of incoming edges that are not
 695                   flowing along *any* path.  These are the counts
 696                   that will still flow along the original path after
 697                   all path duplication is done by potentially multiple
 698                   calls to this routine.
 699      (any other incoming edge counts are for a different jump threading
 700      path that will be handled by a later call to this routine.)
 701      To make this easier, start by recording all incoming edges that flow into
 702      the current path in a bitmap.  We could add up the path's incoming edge
 703      counts here, but we still need to walk all the first bb's incoming edges
 704      below to add up the counts of the other edges not included in this jump
 705      threading path.  */
 706   struct el *next, *el;
 707   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 708   for (el = rd->incoming_edges; el; el = next)
 709     {
 710       next = el->next;
 711       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 712     }
 713   edge ein;
 714   edge_iterator ei;
 715   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 716     {
 717       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 718       /* Simply check the incoming edge src against the set captured above.  */
 719       if (ein_path
 720           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 721         {
 722           /* It is necessary but not sufficient that the last path edges
 723              are identical.  There may be different paths that share the
 724              same last path edge in the case where the last edge has a nocopy
 725              source block.  */
 726           gcc_assert (ein_path->last ()->e == elast);
 727           path_in_count += ein->count;
 728           path_in_freq += EDGE_FREQUENCY (ein);
 729         }
 730       else if (!ein_path)
 731         {
 732           /* Keep track of the incoming edges that are not on any jump-threading
 733              path.  These counts will still flow out of original path after all
 734              jump threading is complete.  */
 735             nonpath_count += ein->count;
 736         }
 737     }
 738
 739   /* This is needed due to insane incoming frequencies.  */
 740   if (path_in_freq > BB_FREQ_MAX)
 741     path_in_freq = BB_FREQ_MAX;
 742
 743   BITMAP_FREE (in_edge_srcs);
 744
 745   /* Now compute the fraction of the total count coming into the first
 746      path bb that is from the current threading path.  */
 747   gcov_type total_count = e->dest->count;
 748   /* Handle incoming profile insanities.  */
 749   if (total_count < path_in_count)
 750     path_in_count = total_count;
 751   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 752
 753   /* Walk the entire path to do some more computation in order to estimate
 754      how much of the path_in_count will flow out of the duplicated threading
 755      path.  In the non-joiner case this is straightforward (it should be
 756      the same as path_in_count, although we will handle incoming profile
 757      insanities by setting it equal to the minimum count along the path).
 758
 759      In the joiner case, we need to estimate how much of the path_in_count
 760      will stay on the threading path after the joiner's conditional branch.
 761      We don't really know for sure how much of the counts
 762      associated with this path go to each successor of the joiner, but we'll
 763      estimate based on the fraction of the total count coming into the path
 764      bb was from the threading paths (computed above in onpath_scale).
 765      Afterwards, we will need to do some fixup to account for other threading
 766      paths and possible profile insanities.
 767
 768      In order to estimate the joiner case's counts we also need to update
 769      nonpath_count with any additional counts coming into the path.  Other
 770      blocks along the path may have additional predecessors from outside
 771      the path.  */
 772   gcov_type path_out_count = path_in_count;
 773   gcov_type min_path_count = path_in_count;
 774   for (unsigned int i = 1; i < path->length (); i++)
 775     {
 776       edge epath = (*path)[i]->e;
 777       gcov_type cur_count = epath->count;
 778       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 779         {
 780           has_joiner = true;
 781           cur_count = apply_probability (cur_count, onpath_scale);
 782         }
 783       /* In the joiner case we need to update nonpath_count for any edges
 784          coming into the path that will contribute to the count flowing
 785          into the path successor.  */
 786       if (has_joiner && epath != elast)
 787       {
 788         /* Look for other incoming edges after joiner.  */
 789         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 790           {
 791             if (ein != epath
 792                 /* Ignore in edges from blocks we have duplicated for a
 793                    threading path, which have duplicated edge counts until
 794                    they are redirected by an invocation of this routine.  */
 795                 && !bitmap_bit_p (local_info->duplicate_blocks,
 796                                   ein->src->index))
 797               nonpath_count += ein->count;
 798           }
 799       }
 800       if (cur_count < path_out_count)
 801         path_out_count = cur_count;
 802       if (epath->count < min_path_count)
 803         min_path_count = epath->count;
 804     }
 805
 806   /* We computed path_out_count above assuming that this path targeted
 807      the joiner's on-path successor with the same likelihood as it
 808      reached the joiner.  However, other thread paths through the joiner
 809      may take a different path through the normal copy source block
 810      (i.e. they have a different elast), meaning that they do not
 811      contribute any counts to this path's elast.  As a result, it may
 812      turn out that this path must have more count flowing to the on-path
 813      successor of the joiner.  Essentially, all of this path's elast
 814      count must be contributed by this path and any nonpath counts
 815      (since any path through the joiner with a different elast will not
 816      include a copy of this elast in its duplicated path).
 817      So ensure that this path's path_out_count is at least the
 818      difference between elast->count and nonpath_count.  Otherwise the edge
 819      counts after threading will not be sane.  */
 820   if (has_joiner && path_out_count < elast->count - nonpath_count)
 821   {
 822     path_out_count = elast->count - nonpath_count;
 823     /* But neither can we go above the minimum count along the path
 824        we are duplicating.  This can be an issue due to profile
 825        insanities coming in to this pass.  */
 826     if (path_out_count > min_path_count)
 827       path_out_count = min_path_count;
 828   }
 829
 830   *path_in_count_ptr = path_in_count;
 831   *path_out_count_ptr = path_out_count;
 832   *path_in_freq_ptr = path_in_freq;
 833   return has_joiner;
 834 }
 835
 836
 837 /* Update the counts and frequencies for both an original path
 838    edge EPATH and its duplicate EDUP.  The duplicate source block
 839    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 840    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 841 static void
 842 update_profile (edge epath, edge edup, gcov_type path_in_count,
 843                 gcov_type path_out_count, int path_in_freq)
 844 {
 845
 846   /* First update the duplicated block's count / frequency.  */
 847   if (edup)
 848     {
 849       basic_block dup_block = edup->src;
 850       gcc_assert (dup_block->count == 0);
 851       gcc_assert (dup_block->frequency == 0);
 852       dup_block->count = path_in_count;
 853       dup_block->frequency = path_in_freq;
 854     }
 855
 856   /* Now update the original block's count and frequency in the
 857      opposite manner - remove the counts/freq that will flow
 858      into the duplicated block.  Handle underflow due to precision/
 859      rounding issues.  */
 860   epath->src->count -= path_in_count;
 861   if (epath->src->count < 0)
 862     epath->src->count = 0;
 863   epath->src->frequency -= path_in_freq;
 864   if (epath->src->frequency < 0)
 865     epath->src->frequency = 0;
 866
 867   /* Next update this path edge's original and duplicated counts.  We know
 868      that the duplicated path will have path_out_count flowing
 869      out of it (in the joiner case this is the count along the duplicated path
 870      out of the duplicated joiner).  This count can then be removed from the
 871      original path edge.  */
 872   if (edup)
 873     edup->count = path_out_count;
 874   epath->count -= path_out_count;
 875   gcc_assert (epath->count >= 0);
 876 }
 877
 878
 879 /* The duplicate and original joiner blocks may end up with different
 880    probabilities (different from both the original and from each other).
 881    Recompute the probabilities here once we have updated the edge
 882    counts and frequencies.  */
 883
 884 static void
 885 recompute_probabilities (basic_block bb)
 886 {
 887   edge esucc;
 888   edge_iterator ei;
 889   FOR_EACH_EDGE (esucc, ei, bb->succs)
 890     {
 891       if (!bb->count)
 892         continue;
 893
 894       /* Prevent overflow computation due to insane profiles.  */
 895       if (esucc->count < bb->count)
 896         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 897                                                  bb->count);
 898       else
 899         /* Can happen with missing/guessed probabilities, since we
 900            may determine that more is flowing along duplicated
 901            path than joiner succ probabilities allowed.
 902            Counts and freqs will be insane after jump threading,
 903            at least make sure probability is sane or we will
 904            get a flow verification error.
 905            Not much we can do to make counts/freqs sane without
 906            redoing the profile estimation.  */
 907         esucc->probability = REG_BR_PROB_BASE;
 908     }
 909 }
 910
 911
 912 /* Update the counts of the original and duplicated edges from a joiner
 913    that go off path, given that we have already determined that the
 914    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 915    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 916    edge from joiner is EPATH.  */
 917
 918 static void
 919 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 920                               gcov_type path_in_count,
 921                               gcov_type path_out_count)
 922 {
 923   /* Compute the count that currently flows off path from the joiner.
 924      In other words, the total count of joiner's out edges other than
 925      epath.  Compute this by walking the successors instead of
 926      subtracting epath's count from the joiner bb count, since there
 927      are sometimes slight insanities where the total out edge count is
 928      larger than the bb count (possibly due to rounding/truncation
 929      errors).  */
 930   gcov_type total_orig_off_path_count = 0;
 931   edge enonpath;
 932   edge_iterator ei;
 933   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 934     {
 935       if (enonpath == epath)
 936         continue;
 937       total_orig_off_path_count += enonpath->count;
 938     }
 939
 940   /* For the path that we are duplicating, the amount that will flow
 941      off path from the duplicated joiner is the delta between the
 942      path's cumulative in count and the portion of that count we
 943      estimated above as flowing from the joiner along the duplicated
 944      path.  */
 945   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 946
 947   /* Now do the actual updates of the off-path edges.  */
 948   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 949     {
 950       /* Look for edges going off of the threading path.  */
 951       if (enonpath == epath)
 952         continue;
 953
 954       /* Find the corresponding edge out of the duplicated joiner.  */
 955       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 956       gcc_assert (enonpathdup);
 957
 958       /* We can't use the original probability of the joiner's out
 959          edges, since the probabilities of the original branch
 960          and the duplicated branches may vary after all threading is
 961          complete.  But apportion the duplicated joiner's off-path
 962          total edge count computed earlier (total_dup_off_path_count)
 963          among the duplicated off-path edges based on their original
 964          ratio to the full off-path count (total_orig_off_path_count).
 965          */
 966       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 967                                       total_orig_off_path_count);
 968       /* Give the duplicated offpath edge a portion of the duplicated
 969          total.  */
 970       enonpathdup->count = apply_scale (scale,
 971                                         total_dup_off_path_count);
 972       /* Now update the original offpath edge count, handling underflow
 973          due to rounding errors.  */
 974       enonpath->count -= enonpathdup->count;
 975       if (enonpath->count < 0)
 976         enonpath->count = 0;
 977     }
 978 }
 979
 980
 981 /* Check if the paths through RD all have estimated frequencies but zero
 982    profile counts.  This is more accurate than checking the entry block
 983    for a zero profile count, since profile insanities sometimes creep in.  */
 984
 985 static bool
 986 estimated_freqs_path (struct redirection_data *rd)
 987 {
 988   edge e = rd->incoming_edges->e;
 989   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 990   edge ein;
 991   edge_iterator ei;
 992   bool non_zero_freq = false;
 993   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 994     {
 995       if (ein->count)
 996         return false;
 997       non_zero_freq |= ein->src->frequency != 0;
 998     }
 999
1000   for (unsigned int i = 1; i < path->length (); i++)
1001     {
1002       edge epath = (*path)[i]->e;
1003       if (epath->src->count)
1004         return false;
1005       non_zero_freq |= epath->src->frequency != 0;
1006       edge esucc;
1007       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1008         {
1009           if (esucc->count)
1010             return false;
1011           non_zero_freq |= esucc->src->frequency != 0;
1012         }
1013     }
1014   return non_zero_freq;
1015 }
1016
1017
1018 /* Invoked for routines that have guessed frequencies and no profile
1019    counts to record the block and edge frequencies for paths through RD
1020    in the profile count fields of those blocks and edges.  This is because
1021    ssa_fix_duplicate_block_edges incrementally updates the block and
1022    edge counts as edges are redirected, and it is difficult to do that
1023    for edge frequencies which are computed on the fly from the source
1024    block frequency and probability.  When a block frequency is updated
1025    its outgoing edge frequencies are affected and become difficult to
1026    adjust.  */
1027
1028 static void
1029 freqs_to_counts_path (struct redirection_data *rd)
1030 {
1031   edge e = rd->incoming_edges->e;
1032   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1033   edge ein;
1034   edge_iterator ei;
1035   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1036     {
1037       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1038          errors applying the probability when the frequencies are very
1039          small.  */
1040       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1041                                       ein->probability);
1042     }
1043
1044   for (unsigned int i = 1; i < path->length (); i++)
1045     {
1046       edge epath = (*path)[i]->e;
1047       edge esucc;
1048       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1049          errors applying the edge probability when the frequencies are very
1050          small.  */
1051       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1052       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1053         esucc->count = apply_probability (esucc->src->count,
1054                                           esucc->probability);
1055     }
1056 }
1057
1058
1059 /* For routines that have guessed frequencies and no profile counts, where we
1060    used freqs_to_counts_path to record block and edge frequencies for paths
1061    through RD, we clear the counts after completing all updates for RD.
1062    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1063    but the block frequencies and edge probabilities were updated as well,
1064    so we can simply clear the count fields.  */
1065
1066 static void
1067 clear_counts_path (struct redirection_data *rd)
1068 {
1069   edge e = rd->incoming_edges->e;
1070   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1071   edge ein, esucc;
1072   edge_iterator ei;
1073   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1074     ein->count = 0;
1075
1076   /* First clear counts along original path.  */
1077   for (unsigned int i = 1; i < path->length (); i++)
1078     {
1079       edge epath = (*path)[i]->e;
1080       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1081         esucc->count = 0;
1082       epath->src->count = 0;
1083     }
1084   /* Also need to clear the counts along duplicated path.  */
1085   for (unsigned int i = 0; i < 2; i++)
1086     {
1087       basic_block dup = rd->dup_blocks[i];
1088       if (!dup)
1089         continue;
1090       FOR_EACH_EDGE (esucc, ei, dup->succs)
1091         esucc->count = 0;
1092       dup->count = 0;
1093     }
1094 }
1095
1096 /* Wire up the outgoing edges from the duplicate blocks and
1097    update any PHIs as needed.  Also update the profile counts
1098    on the original and duplicate blocks and edges.  */
1099 void
1100 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1101                                ssa_local_info_t *local_info)
1102 {
1103   bool multi_incomings = (rd->incoming_edges->next != NULL);
1104   edge e = rd->incoming_edges->e;
1105   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1106   edge elast = path->last ()->e;
1107   gcov_type path_in_count = 0;
1108   gcov_type path_out_count = 0;
1109   int path_in_freq = 0;
1110
1111   /* This routine updates profile counts, frequencies, and probabilities
1112      incrementally. Since it is difficult to do the incremental updates
1113      using frequencies/probabilities alone, for routines without profile
1114      data we first take a snapshot of the existing block and edge frequencies
1115      by copying them into the empty profile count fields.  These counts are
1116      then used to do the incremental updates, and cleared at the end of this
1117      routine.  If the function is marked as having a profile, we still check
1118      to see if the paths through RD are using estimated frequencies because
1119      the routine had zero profile counts.  */
1120   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1121                              || estimated_freqs_path (rd));
1122   if (do_freqs_to_counts)
1123     freqs_to_counts_path (rd);
1124
1125   /* First determine how much profile count to move from original
1126      path to the duplicate path.  This is tricky in the presence of
1127      a joiner (see comments for compute_path_counts), where some portion
1128      of the path's counts will flow off-path from the joiner.  In the
1129      non-joiner case the path_in_count and path_out_count should be the
1130      same.  */
1131   bool has_joiner = compute_path_counts (rd, local_info,
1132                                          &path_in_count, &path_out_count,
1133                                          &path_in_freq);
1134
1135   int cur_path_freq = path_in_freq;
1136   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1137     {
1138       edge epath = (*path)[i]->e;
1139
1140       /* If we were threading through an joiner block, then we want
1141          to keep its control statement and redirect an outgoing edge.
1142          Else we want to remove the control statement & edges, then create
1143          a new outgoing edge.  In both cases we may need to update PHIs.  */
1144       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1145         {
1146           edge victim;
1147           edge e2;
1148
1149           gcc_assert (has_joiner);
1150
1151           /* This updates the PHIs at the destination of the duplicate
1152              block.  Pass 0 instead of i if we are threading a path which
1153              has multiple incoming edges.  */
1154           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1155                                    path, multi_incomings ? 0 : i);
1156
1157           /* Find the edge from the duplicate block to the block we're
1158              threading through.  That's the edge we want to redirect.  */
1159           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1160
1161           /* If there are no remaining blocks on the path to duplicate,
1162              then redirect VICTIM to the final destination of the jump
1163              threading path.  */
1164           if (!any_remaining_duplicated_blocks (path, i))
1165             {
1166               e2 = redirect_edge_and_branch (victim, elast->dest);
1167               /* If we redirected the edge, then we need to copy PHI arguments
1168                  at the target.  If the edge already existed (e2 != victim
1169                  case), then the PHIs in the target already have the correct
1170                  arguments.  */
1171               if (e2 == victim)
1172                 copy_phi_args (e2->dest, elast, e2,
1173                                path, multi_incomings ? 0 : i);
1174             }
1175           else
1176             {
1177               /* Redirect VICTIM to the next duplicated block in the path.  */
1178               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1179
1180               /* We need to update the PHIs in the next duplicated block.  We
1181                  want the new PHI args to have the same value as they had
1182                  in the source of the next duplicate block.
1183
1184                  Thus, we need to know which edge we traversed into the
1185                  source of the duplicate.  Furthermore, we may have
1186                  traversed many edges to reach the source of the duplicate.
1187
1188                  Walk through the path starting at element I until we
1189                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1190                  the edge from the prior element.  */
1191               for (unsigned int j = i + 1; j < path->length (); j++)
1192                 {
1193                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1194                     {
1195                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1196                       break;
1197                     }
1198                 }
1199             }
1200
1201           /* Update the counts and frequency of both the original block
1202              and path edge, and the duplicates.  The path duplicate's
1203              incoming count and frequency are the totals for all edges
1204              incoming to this jump threading path computed earlier.
1205              And we know that the duplicated path will have path_out_count
1206              flowing out of it (i.e. along the duplicated path out of the
1207              duplicated joiner).  */
1208           update_profile (epath, e2, path_in_count, path_out_count,
1209                           path_in_freq);
1210
1211           /* Next we need to update the counts of the original and duplicated
1212              edges from the joiner that go off path.  */
1213           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1214                                         path_out_count);
1215
1216           /* Finally, we need to set the probabilities on the duplicated
1217              edges out of the duplicated joiner (e2->src).  The probabilities
1218              along the original path will all be updated below after we finish
1219              processing the whole path.  */
1220           recompute_probabilities (e2->src);
1221
1222           /* Record the frequency flowing to the downstream duplicated
1223              path blocks.  */
1224           cur_path_freq = EDGE_FREQUENCY (e2);
1225         }
1226       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1227         {
1228           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1229           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1230                                                    multi_incomings ? 0 : i);
1231           if (count == 1)
1232             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1233
1234           /* Update the counts and frequency of both the original block
1235              and path edge, and the duplicates.  Since we are now after
1236              any joiner that may have existed on the path, the count
1237              flowing along the duplicated threaded path is path_out_count.
1238              If we didn't have a joiner, then cur_path_freq was the sum
1239              of the total frequencies along all incoming edges to the
1240              thread path (path_in_freq).  If we had a joiner, it would have
1241              been updated at the end of that handling to the edge frequency
1242              along the duplicated joiner path edge.  */
1243           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1244                           path_out_count, path_out_count,
1245                           cur_path_freq);
1246         }
1247       else
1248         {
1249           /* No copy case.  In this case we don't have an equivalent block
1250              on the duplicated thread path to update, but we do need
1251              to remove the portion of the counts/freqs that were moved
1252              to the duplicated path from the counts/freqs flowing through
1253              this block on the original path.  Since all the no-copy edges
1254              are after any joiner, the removed count is the same as
1255              path_out_count.
1256
1257              If we didn't have a joiner, then cur_path_freq was the sum
1258              of the total frequencies along all incoming edges to the
1259              thread path (path_in_freq).  If we had a joiner, it would have
1260              been updated at the end of that handling to the edge frequency
1261              along the duplicated joiner path edge.  */
1262              update_profile (epath, NULL, path_out_count, path_out_count,
1263                              cur_path_freq);
1264         }
1265
1266       /* Increment the index into the duplicated path when we processed
1267          a duplicated block.  */
1268       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1269           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1270       {
1271           count++;
1272       }
1273     }
1274
1275   /* Now walk orig blocks and update their probabilities, since the
1276      counts and freqs should be updated properly by above loop.  */
1277   for (unsigned int i = 1; i < path->length (); i++)
1278     {
1279       edge epath = (*path)[i]->e;
1280       recompute_probabilities (epath->src);
1281     }
1282
1283   /* Done with all profile and frequency updates, clear counts if they
1284      were copied.  */
1285   if (do_freqs_to_counts)
1286     clear_counts_path (rd);
1287 }
1288
1289 /* Hash table traversal callback routine to create duplicate blocks.  */
1290
1291 int
1292 ssa_create_duplicates (struct redirection_data **slot,
1293                        ssa_local_info_t *local_info)
1294 {
1295   struct redirection_data *rd = *slot;
1296
1297   /* The second duplicated block in a jump threading path is specific
1298      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1299
1300      Each time we're called, we have to look through the path and see
1301      if a second block needs to be duplicated.
1302
1303      Note the search starts with the third edge on the path.  The first
1304      edge is the incoming edge, the second edge always has its source
1305      duplicated.  Thus we start our search with the third edge.  */
1306   vec<jump_thread_edge *> *path = rd->path;
1307   for (unsigned int i = 2; i < path->length (); i++)
1308     {
1309       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1310           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1311         {
1312           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1313                                       &local_info->duplicate_blocks);
1314           break;
1315         }
1316     }
1317
1318   /* Create a template block if we have not done so already.  Otherwise
1319      use the template to create a new block.  */
1320   if (local_info->template_block == NULL)
1321     {
1322       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1323                                   &local_info->duplicate_blocks);
1324       local_info->template_block = rd->dup_blocks[0];
1325
1326       /* We do not create any outgoing edges for the template.  We will
1327          take care of that in a later traversal.  That way we do not
1328          create edges that are going to just be deleted.  */
1329     }
1330   else
1331     {
1332       create_block_for_threading (local_info->template_block, rd, 0,
1333                                   &local_info->duplicate_blocks);
1334
1335       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1336          block.   */
1337       ssa_fix_duplicate_block_edges (rd, local_info);
1338     }
1339
1340   /* Keep walking the hash table.  */
1341   return 1;
1342 }
1343
1344 /* We did not create any outgoing edges for the template block during
1345    block creation.  This hash table traversal callback creates the
1346    outgoing edge for the template block.  */
1347
1348 inline int
1349 ssa_fixup_template_block (struct redirection_data **slot,
1350                           ssa_local_info_t *local_info)
1351 {
1352   struct redirection_data *rd = *slot;
1353
1354   /* If this is the template block halt the traversal after updating
1355      it appropriately.
1356
1357      If we were threading through an joiner block, then we want
1358      to keep its control statement and redirect an outgoing edge.
1359      Else we want to remove the control statement & edges, then create
1360      a new outgoing edge.  In both cases we may need to update PHIs.  */
1361   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1362     {
1363       ssa_fix_duplicate_block_edges (rd, local_info);
1364       return 0;
1365     }
1366
1367   return 1;
1368 }
1369
1370 /* Hash table traversal callback to redirect each incoming edge
1371    associated with this hash table element to its new destination.  */
1372
1373 int
1374 ssa_redirect_edges (struct redirection_data **slot,
1375                     ssa_local_info_t *local_info)
1376 {
1377   struct redirection_data *rd = *slot;
1378   struct el *next, *el;
1379
1380   /* Walk over all the incoming edges associated associated with this
1381      hash table entry.  */
1382   for (el = rd->incoming_edges; el; el = next)
1383     {
1384       edge e = el->e;
1385       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1386
1387       /* Go ahead and free this element from the list.  Doing this now
1388          avoids the need for another list walk when we destroy the hash
1389          table.  */
1390       next = el->next;
1391       free (el);
1392
1393       thread_stats.num_threaded_edges++;
1394
1395       if (rd->dup_blocks[0])
1396         {
1397           edge e2;
1398
1399           if (dump_file && (dump_flags & TDF_DETAILS))
1400             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1401                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1402
1403           /* If we redirect a loop latch edge cancel its loop.  */
1404           if (e->src == e->src->loop_father->latch)
1405             mark_loop_for_removal (e->src->loop_father);
1406
1407           /* Redirect the incoming edge (possibly to the joiner block) to the
1408              appropriate duplicate block.  */
1409           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1410           gcc_assert (e == e2);
1411           flush_pending_stmts (e2);
1412         }
1413
1414       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1415          to clear it will cause all kinds of unpleasant problems later.  */
1416       delete_jump_thread_path (path);
1417       e->aux = NULL;
1418
1419     }
1420
1421   /* Indicate that we actually threaded one or more jumps.  */
1422   if (rd->incoming_edges)
1423     local_info->jumps_threaded = true;
1424
1425   return 1;
1426 }
1427
1428 /* Return true if this block has no executable statements other than
1429    a simple ctrl flow instruction.  When the number of outgoing edges
1430    is one, this is equivalent to a "forwarder" block.  */
1431
1432 static bool
1433 redirection_block_p (basic_block bb)
1434 {
1435   gimple_stmt_iterator gsi;
1436
1437   /* Advance to the first executable statement.  */
1438   gsi = gsi_start_bb (bb);
1439   while (!gsi_end_p (gsi)
1440          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1441              || is_gimple_debug (gsi_stmt (gsi))
1442              || gimple_nop_p (gsi_stmt (gsi))
1443              || gimple_clobber_p (gsi_stmt (gsi))))
1444     gsi_next (&gsi);
1445
1446   /* Check if this is an empty block.  */
1447   if (gsi_end_p (gsi))
1448     return true;
1449
1450   /* Test that we've reached the terminating control statement.  */
1451   return gsi_stmt (gsi)
1452          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1453              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1454              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1455 }
1456
1457 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1458    is reached via one or more specific incoming edges, we know which
1459    outgoing edge from BB will be traversed.
1460
1461    We want to redirect those incoming edges to the target of the
1462    appropriate outgoing edge.  Doing so avoids a conditional branch
1463    and may expose new optimization opportunities.  Note that we have
1464    to update dominator tree and SSA graph after such changes.
1465
1466    The key to keeping the SSA graph update manageable is to duplicate
1467    the side effects occurring in BB so that those side effects still
1468    occur on the paths which bypass BB after redirecting edges.
1469
1470    We accomplish this by creating duplicates of BB and arranging for
1471    the duplicates to unconditionally pass control to one specific
1472    successor of BB.  We then revector the incoming edges into BB to
1473    the appropriate duplicate of BB.
1474
1475    If NOLOOP_ONLY is true, we only perform the threading as long as it
1476    does not affect the structure of the loops in a nontrivial way.
1477
1478    If JOINERS is true, then thread through joiner blocks as well.  */
1479
1480 static bool
1481 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1482 {
1483   /* E is an incoming edge into BB that we may or may not want to
1484      redirect to a duplicate of BB.  */
1485   edge e, e2;
1486   edge_iterator ei;
1487   ssa_local_info_t local_info;
1488
1489   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1490
1491   /* To avoid scanning a linear array for the element we need we instead
1492      use a hash table.  For normal code there should be no noticeable
1493      difference.  However, if we have a block with a large number of
1494      incoming and outgoing edges such linear searches can get expensive.  */
1495   redirection_data
1496     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1497
1498   /* Record each unique threaded destination into a hash table for
1499      efficient lookups.  */
1500   FOR_EACH_EDGE (e, ei, bb->preds)
1501     {
1502       if (e->aux == NULL)
1503         continue;
1504
1505       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1506
1507       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1508           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1509         continue;
1510
1511       e2 = path->last ()->e;
1512       if (!e2 || noloop_only)
1513         {
1514           /* If NOLOOP_ONLY is true, we only allow threading through the
1515              header of a loop to exit edges.  */
1516
1517           /* One case occurs when there was loop header buried in a jump
1518              threading path that crosses loop boundaries.  We do not try
1519              and thread this elsewhere, so just cancel the jump threading
1520              request by clearing the AUX field now.  */
1521           if ((bb->loop_father != e2->src->loop_father
1522                && !loop_exit_edge_p (e2->src->loop_father, e2))
1523               || (e2->src->loop_father != e2->dest->loop_father
1524                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1525             {
1526               /* Since this case is not handled by our special code
1527                  to thread through a loop header, we must explicitly
1528                  cancel the threading request here.  */
1529               delete_jump_thread_path (path);
1530               e->aux = NULL;
1531               continue;
1532             }
1533
1534           /* Another case occurs when trying to thread through our
1535              own loop header, possibly from inside the loop.  We will
1536              thread these later.  */
1537           unsigned int i;
1538           for (i = 1; i < path->length (); i++)
1539             {
1540               if ((*path)[i]->e->src == bb->loop_father->header
1541                   && (!loop_exit_edge_p (bb->loop_father, e2)
1542                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1543                 break;
1544             }
1545
1546           if (i != path->length ())
1547             continue;
1548         }
1549
1550       /* Insert the outgoing edge into the hash table if it is not
1551          already in the hash table.  */
1552       lookup_redirection_data (e, INSERT);
1553     }
1554
1555   /* We do not update dominance info.  */
1556   free_dominance_info (CDI_DOMINATORS);
1557
1558   /* We know we only thread through the loop header to loop exits.
1559      Let the basic block duplication hook know we are not creating
1560      a multiple entry loop.  */
1561   if (noloop_only
1562       && bb == bb->loop_father->header)
1563     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1564
1565   /* Now create duplicates of BB.
1566
1567      Note that for a block with a high outgoing degree we can waste
1568      a lot of time and memory creating and destroying useless edges.
1569
1570      So we first duplicate BB and remove the control structure at the
1571      tail of the duplicate as well as all outgoing edges from the
1572      duplicate.  We then use that duplicate block as a template for
1573      the rest of the duplicates.  */
1574   local_info.template_block = NULL;
1575   local_info.bb = bb;
1576   local_info.jumps_threaded = false;
1577   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1578                             (&local_info);
1579
1580   /* The template does not have an outgoing edge.  Create that outgoing
1581      edge and update PHI nodes as the edge's target as necessary.
1582
1583      We do this after creating all the duplicates to avoid creating
1584      unnecessary edges.  */
1585   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1586                             (&local_info);
1587
1588   /* The hash table traversals above created the duplicate blocks (and the
1589      statements within the duplicate blocks).  This loop creates PHI nodes for
1590      the duplicated blocks and redirects the incoming edges into BB to reach
1591      the duplicates of BB.  */
1592   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1593                             (&local_info);
1594
1595   /* Done with this block.  Clear REDIRECTION_DATA.  */
1596   delete redirection_data;
1597   redirection_data = NULL;
1598
1599   if (noloop_only
1600       && bb == bb->loop_father->header)
1601     set_loop_copy (bb->loop_father, NULL);
1602
1603   BITMAP_FREE (local_info.duplicate_blocks);
1604   local_info.duplicate_blocks = NULL;
1605
1606   /* Indicate to our caller whether or not any jumps were threaded.  */
1607   return local_info.jumps_threaded;
1608 }
1609
1610 /* Wrapper for thread_block_1 so that we can first handle jump
1611    thread paths which do not involve copying joiner blocks, then
1612    handle jump thread paths which have joiner blocks.
1613
1614    By doing things this way we can be as aggressive as possible and
1615    not worry that copying a joiner block will create a jump threading
1616    opportunity.  */
1617
1618 static bool
1619 thread_block (basic_block bb, bool noloop_only)
1620 {
1621   bool retval;
1622   retval = thread_block_1 (bb, noloop_only, false);
1623   retval |= thread_block_1 (bb, noloop_only, true);
1624   return retval;
1625 }
1626
1627
1628 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1629    copy of E->dest created during threading, or E->dest if it was not necessary
1630    to copy it (E is its single predecessor).  */
1631
1632 static basic_block
1633 thread_single_edge (edge e)
1634 {
1635   basic_block bb = e->dest;
1636   struct redirection_data rd;
1637   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1638   edge eto = (*path)[1]->e;
1639
1640   delete_jump_thread_path (path);
1641   e->aux = NULL;
1642
1643   thread_stats.num_threaded_edges++;
1644
1645   if (single_pred_p (bb))
1646     {
1647       /* If BB has just a single predecessor, we should only remove the
1648          control statements at its end, and successors except for ETO.  */
1649       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1650
1651       /* And fixup the flags on the single remaining edge.  */
1652       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1653       eto->flags |= EDGE_FALLTHRU;
1654
1655       return bb;
1656     }
1657
1658   /* Otherwise, we need to create a copy.  */
1659   if (e->dest == eto->src)
1660     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1661
1662   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1663   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1664   npath->safe_push (x);
1665
1666   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1667   npath->safe_push (x);
1668   rd.path = npath;
1669
1670   create_block_for_threading (bb, &rd, 0, NULL);
1671   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1672   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1673
1674   if (dump_file && (dump_flags & TDF_DETAILS))
1675     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1676              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1677
1678   rd.dup_blocks[0]->count = e->count;
1679   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1680   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1681   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1682   flush_pending_stmts (e);
1683
1684   delete_jump_thread_path (npath);
1685   return rd.dup_blocks[0];
1686 }
1687
1688 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1689    from STOP and DBDS_CE_STOP.  */
1690
1691 static basic_block dbds_ce_stop;
1692 static bool
1693 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1694 {
1695   return (bb != (const_basic_block) stop
1696           && bb != dbds_ce_stop);
1697 }
1698
1699 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1700    returns the state.  */
1701
1702 enum bb_dom_status
1703 {
1704   /* BB does not dominate latch of the LOOP.  */
1705   DOMST_NONDOMINATING,
1706   /* The LOOP is broken (there is no path from the header to its latch.  */
1707   DOMST_LOOP_BROKEN,
1708   /* BB dominates the latch of the LOOP.  */
1709   DOMST_DOMINATING
1710 };
1711
1712 static enum bb_dom_status
1713 determine_bb_domination_status (struct loop *loop, basic_block bb)
1714 {
1715   basic_block *bblocks;
1716   unsigned nblocks, i;
1717   bool bb_reachable = false;
1718   edge_iterator ei;
1719   edge e;
1720
1721   /* This function assumes BB is a successor of LOOP->header.
1722      If that is not the case return DOMST_NONDOMINATING which
1723      is always safe.  */
1724     {
1725       bool ok = false;
1726
1727       FOR_EACH_EDGE (e, ei, bb->preds)
1728         {
1729           if (e->src == loop->header)
1730             {
1731               ok = true;
1732               break;
1733             }
1734         }
1735
1736       if (!ok)
1737         return DOMST_NONDOMINATING;
1738     }
1739
1740   if (bb == loop->latch)
1741     return DOMST_DOMINATING;
1742
1743   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1744      from it.  */
1745
1746   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1747   dbds_ce_stop = loop->header;
1748   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1749                                 bblocks, loop->num_nodes, bb);
1750   for (i = 0; i < nblocks; i++)
1751     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1752       {
1753         if (e->src == loop->header)
1754           {
1755             free (bblocks);
1756             return DOMST_NONDOMINATING;
1757           }
1758         if (e->src == bb)
1759           bb_reachable = true;
1760       }
1761
1762   free (bblocks);
1763   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1764 }
1765
1766 /* Return true if BB is part of the new pre-header that is created
1767    when threading the latch to DATA.  */
1768
1769 static bool
1770 def_split_header_continue_p (const_basic_block bb, const void *data)
1771 {
1772   const_basic_block new_header = (const_basic_block) data;
1773   const struct loop *l;
1774
1775   if (bb == new_header
1776       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1777     return false;
1778   for (l = bb->loop_father; l; l = loop_outer (l))
1779     if (l == new_header->loop_father)
1780       return true;
1781   return false;
1782 }
1783
1784 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1785    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1786    to the inside of the loop.  */
1787
1788 static bool
1789 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1790 {
1791   basic_block header = loop->header;
1792   edge e, tgt_edge, latch = loop_latch_edge (loop);
1793   edge_iterator ei;
1794   basic_block tgt_bb, atgt_bb;
1795   enum bb_dom_status domst;
1796
1797   /* We have already threaded through headers to exits, so all the threading
1798      requests now are to the inside of the loop.  We need to avoid creating
1799      irreducible regions (i.e., loops with more than one entry block), and
1800      also loop with several latch edges, or new subloops of the loop (although
1801      there are cases where it might be appropriate, it is difficult to decide,
1802      and doing it wrongly may confuse other optimizers).
1803
1804      We could handle more general cases here.  However, the intention is to
1805      preserve some information about the loop, which is impossible if its
1806      structure changes significantly, in a way that is not well understood.
1807      Thus we only handle few important special cases, in which also updating
1808      of the loop-carried information should be feasible:
1809
1810      1) Propagation of latch edge to a block that dominates the latch block
1811         of a loop.  This aims to handle the following idiom:
1812
1813         first = 1;
1814         while (1)
1815           {
1816             if (first)
1817               initialize;
1818             first = 0;
1819             body;
1820           }
1821
1822         After threading the latch edge, this becomes
1823
1824         first = 1;
1825         if (first)
1826           initialize;
1827         while (1)
1828           {
1829             first = 0;
1830             body;
1831           }
1832
1833         The original header of the loop is moved out of it, and we may thread
1834         the remaining edges through it without further constraints.
1835
1836      2) All entry edges are propagated to a single basic block that dominates
1837         the latch block of the loop.  This aims to handle the following idiom
1838         (normally created for "for" loops):
1839
1840         i = 0;
1841         while (1)
1842           {
1843             if (i >= 100)
1844               break;
1845             body;
1846             i++;
1847           }
1848
1849         This becomes
1850
1851         i = 0;
1852         while (1)
1853           {
1854             body;
1855             i++;
1856             if (i >= 100)
1857               break;
1858           }
1859      */
1860
1861   /* Threading through the header won't improve the code if the header has just
1862      one successor.  */
1863   if (single_succ_p (header))
1864     goto fail;
1865
1866   /* If we threaded the latch using a joiner block, we cancel the
1867      threading opportunity out of an abundance of caution.  However,
1868      still allow threading from outside to inside the loop.  */
1869   if (latch->aux)
1870     {
1871       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1872       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1873         {
1874           delete_jump_thread_path (path);
1875           latch->aux = NULL;
1876         }
1877     }
1878
1879   if (latch->aux)
1880     {
1881       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1882       tgt_edge = (*path)[1]->e;
1883       tgt_bb = tgt_edge->dest;
1884     }
1885   else if (!may_peel_loop_headers
1886            && !redirection_block_p (loop->header))
1887     goto fail;
1888   else
1889     {
1890       tgt_bb = NULL;
1891       tgt_edge = NULL;
1892       FOR_EACH_EDGE (e, ei, header->preds)
1893         {
1894           if (!e->aux)
1895             {
1896               if (e == latch)
1897                 continue;
1898
1899               /* If latch is not threaded, and there is a header
1900                  edge that is not threaded, we would create loop
1901                  with multiple entries.  */
1902               goto fail;
1903             }
1904
1905           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1906
1907           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1908             goto fail;
1909           tgt_edge = (*path)[1]->e;
1910           atgt_bb = tgt_edge->dest;
1911           if (!tgt_bb)
1912             tgt_bb = atgt_bb;
1913           /* Two targets of threading would make us create loop
1914              with multiple entries.  */
1915           else if (tgt_bb != atgt_bb)
1916             goto fail;
1917         }
1918
1919       if (!tgt_bb)
1920         {
1921           /* There are no threading requests.  */
1922           return false;
1923         }
1924
1925       /* Redirecting to empty loop latch is useless.  */
1926       if (tgt_bb == loop->latch
1927           && empty_block_p (loop->latch))
1928         goto fail;
1929     }
1930
1931   /* The target block must dominate the loop latch, otherwise we would be
1932      creating a subloop.  */
1933   domst = determine_bb_domination_status (loop, tgt_bb);
1934   if (domst == DOMST_NONDOMINATING)
1935     goto fail;
1936   if (domst == DOMST_LOOP_BROKEN)
1937     {
1938       /* If the loop ceased to exist, mark it as such, and thread through its
1939          original header.  */
1940       mark_loop_for_removal (loop);
1941       return thread_block (header, false);
1942     }
1943
1944   if (tgt_bb->loop_father->header == tgt_bb)
1945     {
1946       /* If the target of the threading is a header of a subloop, we need
1947          to create a preheader for it, so that the headers of the two loops
1948          do not merge.  */
1949       if (EDGE_COUNT (tgt_bb->preds) > 2)
1950         {
1951           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1952           gcc_assert (tgt_bb != NULL);
1953         }
1954       else
1955         tgt_bb = split_edge (tgt_edge);
1956     }
1957
1958   if (latch->aux)
1959     {
1960       basic_block *bblocks;
1961       unsigned nblocks, i;
1962
1963       /* First handle the case latch edge is redirected.  We are copying
1964          the loop header but not creating a multiple entry loop.  Make the
1965          cfg manipulation code aware of that fact.  */
1966       set_loop_copy (loop, loop);
1967       loop->latch = thread_single_edge (latch);
1968       set_loop_copy (loop, NULL);
1969       gcc_assert (single_succ (loop->latch) == tgt_bb);
1970       loop->header = tgt_bb;
1971
1972       /* Remove the new pre-header blocks from our loop.  */
1973       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1974       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1975                                     bblocks, loop->num_nodes, tgt_bb);
1976       for (i = 0; i < nblocks; i++)
1977         if (bblocks[i]->loop_father == loop)
1978           {
1979             remove_bb_from_loops (bblocks[i]);
1980             add_bb_to_loop (bblocks[i], loop_outer (loop));
1981           }
1982       free (bblocks);
1983
1984       /* If the new header has multiple latches mark it so.  */
1985       FOR_EACH_EDGE (e, ei, loop->header->preds)
1986         if (e->src->loop_father == loop
1987             && e->src != loop->latch)
1988           {
1989             loop->latch = NULL;
1990             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1991           }
1992
1993       /* Cancel remaining threading requests that would make the
1994          loop a multiple entry loop.  */
1995       FOR_EACH_EDGE (e, ei, header->preds)
1996         {
1997           edge e2;
1998
1999           if (e->aux == NULL)
2000             continue;
2001
2002           vec<jump_thread_edge *> *path = THREAD_PATH (e);
2003           e2 = path->last ()->e;
2004
2005           if (e->src->loop_father != e2->dest->loop_father
2006               && e2->dest != loop->header)
2007             {
2008               delete_jump_thread_path (path);
2009               e->aux = NULL;
2010             }
2011         }
2012
2013       /* Thread the remaining edges through the former header.  */
2014       thread_block (header, false);
2015     }
2016   else
2017     {
2018       basic_block new_preheader;
2019
2020       /* Now consider the case entry edges are redirected to the new entry
2021          block.  Remember one entry edge, so that we can find the new
2022          preheader (its destination after threading).  */
2023       FOR_EACH_EDGE (e, ei, header->preds)
2024         {
2025           if (e->aux)
2026             break;
2027         }
2028
2029       /* The duplicate of the header is the new preheader of the loop.  Ensure
2030          that it is placed correctly in the loop hierarchy.  */
2031       set_loop_copy (loop, loop_outer (loop));
2032
2033       thread_block (header, false);
2034       set_loop_copy (loop, NULL);
2035       new_preheader = e->dest;
2036
2037       /* Create the new latch block.  This is always necessary, as the latch
2038          must have only a single successor, but the original header had at
2039          least two successors.  */
2040       loop->latch = NULL;
2041       mfb_kj_edge = single_succ_edge (new_preheader);
2042       loop->header = mfb_kj_edge->dest;
2043       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2044       loop->header = latch->dest;
2045       loop->latch = latch->src;
2046     }
2047
2048   return true;
2049
2050 fail:
2051   /* We failed to thread anything.  Cancel the requests.  */
2052   FOR_EACH_EDGE (e, ei, header->preds)
2053     {
2054       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2055
2056       if (path)
2057         {
2058           delete_jump_thread_path (path);
2059           e->aux = NULL;
2060         }
2061     }
2062   return false;
2063 }
2064
2065 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2066    PHI arguments associated with those edges are equal or there are no
2067    PHI arguments, otherwise return FALSE.  */
2068
2069 static bool
2070 phi_args_equal_on_edges (edge e1, edge e2)
2071 {
2072   gphi_iterator gsi;
2073   int indx1 = e1->dest_idx;
2074   int indx2 = e2->dest_idx;
2075
2076   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2077     {
2078       gphi *phi = gsi.phi ();
2079
2080       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2081                             gimple_phi_arg_def (phi, indx2), 0))
2082         return false;
2083     }
2084   return true;
2085 }
2086
2087 /* Walk through the registered jump threads and convert them into a
2088    form convenient for this pass.
2089
2090    Any block which has incoming edges threaded to outgoing edges
2091    will have its entry in THREADED_BLOCK set.
2092
2093    Any threaded edge will have its new outgoing edge stored in the
2094    original edge's AUX field.
2095
2096    This form avoids the need to walk all the edges in the CFG to
2097    discover blocks which need processing and avoids unnecessary
2098    hash table lookups to map from threaded edge to new target.  */
2099
2100 static void
2101 mark_threaded_blocks (bitmap threaded_blocks)
2102 {
2103   unsigned int i;
2104   bitmap_iterator bi;
2105   bitmap tmp = BITMAP_ALLOC (NULL);
2106   basic_block bb;
2107   edge e;
2108   edge_iterator ei;
2109
2110   /* It is possible to have jump threads in which one is a subpath
2111      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2112      block and (B, C), (C, D) where no joiner block exists.
2113
2114      When this occurs ignore the jump thread request with the joiner
2115      block.  It's totally subsumed by the simpler jump thread request.
2116
2117      This results in less block copying, simpler CFGs.  More importantly,
2118      when we duplicate the joiner block, B, in this case we will create
2119      a new threading opportunity that we wouldn't be able to optimize
2120      until the next jump threading iteration.
2121
2122      So first convert the jump thread requests which do not require a
2123      joiner block.  */
2124   for (i = 0; i < paths.length (); i++)
2125     {
2126       vec<jump_thread_edge *> *path = paths[i];
2127
2128       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2129         {
2130           edge e = (*path)[0]->e;
2131           e->aux = (void *)path;
2132           bitmap_set_bit (tmp, e->dest->index);
2133         }
2134     }
2135
2136   /* Now iterate again, converting cases where we want to thread
2137      through a joiner block, but only if no other edge on the path
2138      already has a jump thread attached to it.  We do this in two passes,
2139      to avoid situations where the order in the paths vec can hide overlapping
2140      threads (the path is recorded on the incoming edge, so we would miss
2141      cases where the second path starts at a downstream edge on the same
2142      path).  First record all joiner paths, deleting any in the unexpected
2143      case where there is already a path for that incoming edge.  */
2144   for (i = 0; i < paths.length (); i++)
2145     {
2146       vec<jump_thread_edge *> *path = paths[i];
2147
2148       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2149         {
2150           /* Attach the path to the starting edge if none is yet recorded.  */
2151           if ((*path)[0]->e->aux == NULL)
2152             {
2153               (*path)[0]->e->aux = path;
2154             }
2155           else
2156             {
2157               paths.unordered_remove (i);
2158               if (dump_file && (dump_flags & TDF_DETAILS))
2159                 dump_jump_thread_path (dump_file, *path, false);
2160               delete_jump_thread_path (path);
2161             }
2162         }
2163     }
2164   /* Second, look for paths that have any other jump thread attached to
2165      them, and either finish converting them or cancel them.  */
2166   for (i = 0; i < paths.length (); i++)
2167     {
2168       vec<jump_thread_edge *> *path = paths[i];
2169       edge e = (*path)[0]->e;
2170
2171       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2172         {
2173           unsigned int j;
2174           for (j = 1; j < path->length (); j++)
2175             if ((*path)[j]->e->aux != NULL)
2176               break;
2177
2178           /* If we iterated through the entire path without exiting the loop,
2179              then we are good to go, record it.  */
2180           if (j == path->length ())
2181             bitmap_set_bit (tmp, e->dest->index);
2182           else
2183             {
2184               e->aux = NULL;
2185               paths.unordered_remove (i);
2186               if (dump_file && (dump_flags & TDF_DETAILS))
2187                 dump_jump_thread_path (dump_file, *path, false);
2188               delete_jump_thread_path (path);
2189             }
2190         }
2191     }
2192
2193   /* If optimizing for size, only thread through block if we don't have
2194      to duplicate it or it's an otherwise empty redirection block.  */
2195   if (optimize_function_for_size_p (cfun))
2196     {
2197       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2198         {
2199           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2200           if (EDGE_COUNT (bb->preds) > 1
2201               && !redirection_block_p (bb))
2202             {
2203               FOR_EACH_EDGE (e, ei, bb->preds)
2204                 {
2205                   if (e->aux)
2206                     {
2207                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2208                       delete_jump_thread_path (path);
2209                       e->aux = NULL;
2210                     }
2211                 }
2212             }
2213           else
2214             bitmap_set_bit (threaded_blocks, i);
2215         }
2216     }
2217   else
2218     bitmap_copy (threaded_blocks, tmp);
2219
2220   /* Look for jump threading paths which cross multiple loop headers.
2221
2222      The code to thread through loop headers will change the CFG in ways
2223      that break assumptions made by the loop optimization code.
2224
2225      We don't want to blindly cancel the requests.  We can instead do better
2226      by trimming off the end of the jump thread path.  */
2227   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2228     {
2229       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2230       FOR_EACH_EDGE (e, ei, bb->preds)
2231         {
2232           if (e->aux)
2233             {
2234               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2235
2236               for (unsigned int i = 0, crossed_headers = 0;
2237                    i < path->length ();
2238                    i++)
2239                 {
2240                   basic_block dest = (*path)[i]->e->dest;
2241                   crossed_headers += (dest == dest->loop_father->header);
2242                   if (crossed_headers > 1)
2243                     {
2244                       /* Trim from entry I onwards.  */
2245                       for (unsigned int j = i; j < path->length (); j++)
2246                         delete (*path)[j];
2247                       path->truncate (i);
2248
2249                       /* Now that we've truncated the path, make sure
2250                          what's left is still valid.   We need at least
2251                          two edges on the path and the last edge can not
2252                          be a joiner.  This should never happen, but let's
2253                          be safe.  */
2254                       if (path->length () < 2
2255                           || (path->last ()->type
2256                               == EDGE_COPY_SRC_JOINER_BLOCK))
2257                         {
2258                           delete_jump_thread_path (path);
2259                           e->aux = NULL;
2260                         }
2261                       break;
2262                     }
2263                 }
2264             }
2265         }
2266     }
2267
2268   /* If we have a joiner block (J) which has two successors S1 and S2 and
2269      we are threading though S1 and the final destination of the thread
2270      is S2, then we must verify that any PHI nodes in S2 have the same
2271      PHI arguments for the edge J->S2 and J->S1->...->S2.
2272
2273      We used to detect this prior to registering the jump thread, but
2274      that prohibits propagation of edge equivalences into non-dominated
2275      PHI nodes as the equivalency test might occur before propagation.
2276
2277      This must also occur after we truncate any jump threading paths
2278      as this scenario may only show up after truncation.
2279
2280      This works for now, but will need improvement as part of the FSA
2281      optimization.
2282
2283      Note since we've moved the thread request data to the edges,
2284      we have to iterate on those rather than the threaded_edges vector.  */
2285   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2286     {
2287       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2288       FOR_EACH_EDGE (e, ei, bb->preds)
2289         {
2290           if (e->aux)
2291             {
2292               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2293               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2294
2295               if (have_joiner)
2296                 {
2297                   basic_block joiner = e->dest;
2298                   edge final_edge = path->last ()->e;
2299                   basic_block final_dest = final_edge->dest;
2300                   edge e2 = find_edge (joiner, final_dest);
2301
2302                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2303                     {
2304                       delete_jump_thread_path (path);
2305                       e->aux = NULL;
2306                     }
2307                 }
2308             }
2309         }
2310     }
2311
2312   BITMAP_FREE (tmp);
2313 }
2314
2315
2316 /* Return TRUE if BB ends with a switch statement or a computed goto.
2317    Otherwise return false.  */
2318 static bool
2319 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2320 {
2321   gimple stmt = last_stmt (bb);
2322   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2323     return true;
2324   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2325       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2326     return true;
2327   return false;
2328 }
2329
2330 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2331    case of SEME Single Entry Multiple Exits region in which all nodes in the
2332    REGION have exactly one incoming edge.  The only exception is the first block
2333    that may not have been connected to the rest of the cfg yet.  */
2334
2335 DEBUG_FUNCTION void
2336 verify_jump_thread (basic_block *region, unsigned n_region)
2337 {
2338   for (unsigned i = 0; i < n_region; i++)
2339     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2340 }
2341
2342 /* Return true when BB is one of the first N items in BBS.  */
2343
2344 static inline bool
2345 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2346 {
2347   for (int i = 0; i < n; i++)
2348     if (bb == bbs[i])
2349       return true;
2350
2351   return false;
2352 }
2353
2354 /* Duplicates a jump-thread path of N_REGION basic blocks.
2355    The ENTRY edge is redirected to the duplicate of the region.
2356
2357    Remove the last conditional statement in the last basic block in the REGION,
2358    and create a single fallthru edge pointing to the same destination as the
2359    EXIT edge.
2360
2361    The new basic blocks are stored to REGION_COPY in the same order as they had
2362    in REGION, provided that REGION_COPY is not NULL.
2363
2364    Returns false if it is unable to copy the region, true otherwise.  */
2365
2366 static bool
2367 duplicate_thread_path (edge entry, edge exit,
2368                        basic_block *region, unsigned n_region,
2369                        basic_block *region_copy)
2370 {
2371   unsigned i;
2372   bool free_region_copy = false;
2373   struct loop *loop = entry->dest->loop_father;
2374   edge exit_copy;
2375   edge redirected;
2376   int total_freq = 0, entry_freq = 0;
2377   gcov_type total_count = 0, entry_count = 0;
2378
2379   if (!can_copy_bbs_p (region, n_region))
2380     return false;
2381
2382   /* Some sanity checking.  Note that we do not check for all possible
2383      missuses of the functions.  I.e. if you ask to copy something weird,
2384      it will work, but the state of structures probably will not be
2385      correct.  */
2386   for (i = 0; i < n_region; i++)
2387     {
2388       /* We do not handle subloops, i.e. all the blocks must belong to the
2389          same loop.  */
2390       if (region[i]->loop_father != loop)
2391         return false;
2392     }
2393
2394   initialize_original_copy_tables ();
2395
2396   set_loop_copy (loop, loop);
2397
2398   if (!region_copy)
2399     {
2400       region_copy = XNEWVEC (basic_block, n_region);
2401       free_region_copy = true;
2402     }
2403
2404   if (entry->dest->count)
2405     {
2406       total_count = entry->dest->count;
2407       entry_count = entry->count;
2408       /* Fix up corner cases, to avoid division by zero or creation of negative
2409          frequencies.  */
2410       if (entry_count > total_count)
2411         entry_count = total_count;
2412     }
2413   else
2414     {
2415       total_freq = entry->dest->frequency;
2416       entry_freq = EDGE_FREQUENCY (entry);
2417       /* Fix up corner cases, to avoid division by zero or creation of negative
2418          frequencies.  */
2419       if (total_freq == 0)
2420         total_freq = 1;
2421       else if (entry_freq > total_freq)
2422         entry_freq = total_freq;
2423     }
2424
2425   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2426             split_edge_bb_loc (entry), false);
2427
2428   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2429      following code ensures that all the edges exiting the jump-thread path are
2430      redirected back to the original code: these edges are exceptions
2431      invalidating the property that is propagated by executing all the blocks of
2432      the jump-thread path in order.  */
2433
2434   for (i = 0; i < n_region; i++)
2435     {
2436       edge e;
2437       edge_iterator ei;
2438       basic_block bb = region_copy[i];
2439
2440       if (single_succ_p (bb))
2441         {
2442           /* Make sure the successor is the next node in the path.  */
2443           gcc_assert (i + 1 == n_region
2444                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2445           continue;
2446         }
2447
2448       /* Special case the last block on the path: make sure that it does not
2449          jump back on the copied path.  */
2450       if (i + 1 == n_region)
2451         {
2452           FOR_EACH_EDGE (e, ei, bb->succs)
2453             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2454               {
2455                 basic_block orig = get_bb_original (e->dest);
2456                 if (orig)
2457                   redirect_edge_and_branch_force (e, orig);
2458               }
2459           continue;
2460         }
2461
2462       /* Redirect all other edges jumping to non-adjacent blocks back to the
2463          original code.  */
2464       FOR_EACH_EDGE (e, ei, bb->succs)
2465         if (region_copy[i + 1] != e->dest)
2466           {
2467             basic_block orig = get_bb_original (e->dest);
2468             if (orig)
2469               redirect_edge_and_branch_force (e, orig);
2470           }
2471     }
2472
2473   if (total_count)
2474     {
2475       scale_bbs_frequencies_gcov_type (region, n_region,
2476                                        total_count - entry_count,
2477                                        total_count);
2478       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2479                                        total_count);
2480     }
2481   else
2482     {
2483       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2484                                  total_freq);
2485       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2486     }
2487
2488 #ifdef ENABLE_CHECKING
2489   verify_jump_thread (region_copy, n_region);
2490 #endif
2491
2492   /* Remove the last branch in the jump thread path.  */
2493   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2494   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2495
2496   if (e) {
2497     rescan_loop_exit (e, true, false);
2498     e->probability = REG_BR_PROB_BASE;
2499     e->count = region_copy[n_region - 1]->count;
2500   }
2501
2502   /* Redirect the entry and add the phi node arguments.  */
2503   if (entry->dest == loop->header)
2504     mark_loop_for_removal (loop);
2505   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2506   gcc_assert (redirected != NULL);
2507   flush_pending_stmts (entry);
2508
2509   /* Add the other PHI node arguments.  */
2510   add_phi_args_after_copy (region_copy, n_region, NULL);
2511
2512   if (free_region_copy)
2513     free (region_copy);
2514
2515   free_original_copy_tables ();
2516   return true;
2517 }
2518
2519 /* Return true when PATH is a valid jump-thread path.  */
2520
2521 static bool
2522 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2523 {
2524   unsigned len = path->length ();
2525
2526   /* Check that the path is connected.  */
2527   for (unsigned int j = 0; j < len - 1; j++)
2528     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2529       return false;
2530
2531   return true;
2532 }
2533
2534 /* Walk through all blocks and thread incoming edges to the appropriate
2535    outgoing edge for each edge pair recorded in THREADED_EDGES.
2536
2537    It is the caller's responsibility to fix the dominance information
2538    and rewrite duplicated SSA_NAMEs back into SSA form.
2539
2540    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2541    loop headers if it does not simplify the loop.
2542
2543    Returns true if one or more edges were threaded, false otherwise.  */
2544
2545 bool
2546 thread_through_all_blocks (bool may_peel_loop_headers)
2547 {
2548   bool retval = false;
2549   unsigned int i;
2550   bitmap_iterator bi;
2551   bitmap threaded_blocks;
2552   struct loop *loop;
2553
2554   if (!paths.exists ())
2555     return false;
2556
2557   threaded_blocks = BITMAP_ALLOC (NULL);
2558   memset (&thread_stats, 0, sizeof (thread_stats));
2559
2560   /* Jump-thread all FSM threads before other jump-threads.  */
2561   for (i = 0; i < paths.length ();)
2562     {
2563       vec<jump_thread_edge *> *path = paths[i];
2564       edge entry = (*path)[0]->e;
2565
2566       /* Only code-generate FSM jump-threads in this loop.  */
2567       if ((*path)[0]->type != EDGE_FSM_THREAD)
2568         {
2569           i++;
2570           continue;
2571         }
2572
2573       /* Do not jump-thread twice from the same block.  */
2574       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2575           /* Verify that the jump thread path is still valid: a
2576              previous jump-thread may have changed the CFG, and
2577              invalidated the current path.  */
2578           || !valid_jump_thread_path (path))
2579         {
2580           /* Remove invalid FSM jump-thread paths.  */
2581           delete_jump_thread_path (path);
2582           paths.unordered_remove (i);
2583           continue;
2584         }
2585
2586       unsigned len = path->length ();
2587       edge exit = (*path)[len - 1]->e;
2588       basic_block *region = XNEWVEC (basic_block, len - 1);
2589
2590       for (unsigned int j = 0; j < len - 1; j++)
2591         region[j] = (*path)[j]->e->dest;
2592
2593       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2594         {
2595           /* We do not update dominance info.  */
2596           free_dominance_info (CDI_DOMINATORS);
2597           bitmap_set_bit (threaded_blocks, entry->src->index);
2598           retval = true;
2599         }
2600
2601       delete_jump_thread_path (path);
2602       paths.unordered_remove (i);
2603     }
2604
2605   /* Remove from PATHS all the jump-threads starting with an edge already
2606      jump-threaded.  */
2607   for (i = 0; i < paths.length ();)
2608     {
2609       vec<jump_thread_edge *> *path = paths[i];
2610       edge entry = (*path)[0]->e;
2611
2612       /* Do not jump-thread twice from the same block.  */
2613       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2614         {
2615           delete_jump_thread_path (path);
2616           paths.unordered_remove (i);
2617         }
2618       else
2619         i++;
2620     }
2621
2622   bitmap_clear (threaded_blocks);
2623
2624   mark_threaded_blocks (threaded_blocks);
2625
2626   initialize_original_copy_tables ();
2627
2628   /* First perform the threading requests that do not affect
2629      loop structure.  */
2630   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2631     {
2632       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2633
2634       if (EDGE_COUNT (bb->preds) > 0)
2635         retval |= thread_block (bb, true);
2636     }
2637
2638   /* Then perform the threading through loop headers.  We start with the
2639      innermost loop, so that the changes in cfg we perform won't affect
2640      further threading.  */
2641   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2642     {
2643       if (!loop->header
2644           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2645         continue;
2646
2647       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2648     }
2649
2650   /* Any jump threading paths that are still attached to edges at this
2651      point must be one of two cases.
2652
2653      First, we could have a jump threading path which went from outside
2654      a loop to inside a loop that was ignored because a prior jump thread
2655      across a backedge was realized (which indirectly causes the loop
2656      above to ignore the latter thread).  We can detect these because the
2657      loop structures will be different and we do not currently try to
2658      optimize this case.
2659
2660      Second, we could be threading across a backedge to a point within the
2661      same loop.  This occurrs for the FSA/FSM optimization and we would
2662      like to optimize it.  However, we have to be very careful as this
2663      may completely scramble the loop structures, with the result being
2664      irreducible loops causing us to throw away our loop structure.
2665
2666      As a compromise for the latter case, if the thread path ends in
2667      a block where the last statement is a multiway branch, then go
2668      ahead and thread it, else ignore it.  */
2669   basic_block bb;
2670   edge e;
2671   FOR_EACH_BB_FN (bb, cfun)
2672     {
2673       /* If we do end up threading here, we can remove elements from
2674          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2675       for (edge_iterator ei = ei_start (bb->preds);
2676            (e = ei_safe_edge (ei));)
2677         if (e->aux)
2678           {
2679             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2680
2681             /* Case 1, threading from outside to inside the loop
2682                after we'd already threaded through the header.  */
2683             if ((*path)[0]->e->dest->loop_father
2684                 != path->last ()->e->src->loop_father)
2685               {
2686                 delete_jump_thread_path (path);
2687                 e->aux = NULL;
2688                 ei_next (&ei);
2689               }
2690            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2691               {
2692                 /* The code to thread through loop headers may have
2693                    split a block with jump threads attached to it.
2694
2695                    We can identify this with a disjoint jump threading
2696                    path.  If found, just remove it.  */
2697                 for (unsigned int i = 0; i < path->length () - 1; i++)
2698                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2699                     {
2700                       delete_jump_thread_path (path);
2701                       e->aux = NULL;
2702                       ei_next (&ei);
2703                       break;
2704                     }
2705
2706                 /* Our path is still valid, thread it.  */
2707                 if (e->aux)
2708                   {
2709                     if (thread_block ((*path)[0]->e->dest, false))
2710                       e->aux = NULL;
2711                     else
2712                       {
2713                         delete_jump_thread_path (path);
2714                         e->aux = NULL;
2715                         ei_next (&ei);
2716                       }
2717                   }
2718               }
2719            else
2720               {
2721                 delete_jump_thread_path (path);
2722                 e->aux = NULL;
2723                 ei_next (&ei);
2724               }
2725           }
2726         else
2727           ei_next (&ei);
2728     }
2729
2730   statistics_counter_event (cfun, "Jumps threaded",
2731                             thread_stats.num_threaded_edges);
2732
2733   free_original_copy_tables ();
2734
2735   BITMAP_FREE (threaded_blocks);
2736   threaded_blocks = NULL;
2737   paths.release ();
2738
2739   if (retval)
2740     loops_state_set (LOOPS_NEED_FIXUP);
2741
2742   return retval;
2743 }
2744
2745 /* Delete the jump threading path PATH.  We have to explcitly delete
2746    each entry in the vector, then the container.  */
2747
2748 void
2749 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2750 {
2751   for (unsigned int i = 0; i < path->length (); i++)
2752     delete (*path)[i];
2753   path->release();
2754   delete path;
2755 }
2756
2757 /* Register a jump threading opportunity.  We queue up all the jump
2758    threading opportunities discovered by a pass and update the CFG
2759    and SSA form all at once.
2760
2761    E is the edge we can thread, E2 is the new target edge, i.e., we
2762    are effectively recording that E->dest can be changed to E2->dest
2763    after fixing the SSA graph.  */
2764
2765 void
2766 register_jump_thread (vec<jump_thread_edge *> *path)
2767 {
2768   if (!dbg_cnt (registered_jump_thread))
2769     {
2770       delete_jump_thread_path (path);
2771       return;
2772     }
2773
2774   /* First make sure there are no NULL outgoing edges on the jump threading
2775      path.  That can happen for jumping to a constant address.  */
2776   for (unsigned int i = 0; i < path->length (); i++)
2777     if ((*path)[i]->e == NULL)
2778       {
2779         if (dump_file && (dump_flags & TDF_DETAILS))
2780           {
2781             fprintf (dump_file,
2782                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2783             dump_jump_thread_path (dump_file, *path, false);
2784           }
2785
2786         delete_jump_thread_path (path);
2787         return;
2788       }
2789
2790   if (dump_file && (dump_flags & TDF_DETAILS))
2791     dump_jump_thread_path (dump_file, *path, true);
2792
2793   if (!paths.exists ())
2794     paths.create (5);
2795
2796   paths.safe_push (path);
2797 }