gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "predict.h"
  26 #include "vec.h"
  27 #include "hashtab.h"
  28 #include "hash-set.h"
  29 #include "machmode.h"
  30 #include "tm.h"
  31 #include "hard-reg-set.h"
  32 #include "input.h"
  33 #include "function.h"
  34 #include "dominance.h"
  35 #include "cfg.h"
  36 #include "cfganal.h"
  37 #include "basic-block.h"
  38 #include "hash-table.h"
  39 #include "tree-ssa-alias.h"
  40 #include "internal-fn.h"
  41 #include "gimple-expr.h"
  42 #include "is-a.h"
  43 #include "gimple.h"
  44 #include "gimple-iterator.h"
  45 #include "gimple-ssa.h"
  46 #include "tree-phinodes.h"
  47 #include "tree-ssa.h"
  48 #include "tree-ssa-threadupdate.h"
  49 #include "ssa-iterators.h"
  50 #include "dumpfile.h"
  51 #include "cfgloop.h"
  52 #include "dbgcnt.h"
  53 #include "tree-cfg.h"
  54 #include "tree-pass.h"
  55
  56 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  57    one or more in-edges to B to instead reach the destination of an
  58    out-edge from B while preserving any side effects in B.
  59
  60    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  61    side effects of executing B.
  62
  63      1. Make a copy of B (including its outgoing edges and statements).  Call
  64         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  65
  66      2. Remove the control statement at the end of B' and all outgoing edges
  67         except B'->C.
  68
  69      3. Add a new argument to each PHI in C with the same value as the existing
  70         argument associated with edge B->C.  Associate the new PHI arguments
  71         with the edge B'->C.
  72
  73      4. For each PHI in B, find or create a PHI in B' with an identical
  74         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  75         value as the PHI in B associated with the edge A->B.  Associate
  76         the new argument in the PHI in B' with the edge A->B.
  77
  78      5. Change the edge A->B to A->B'.
  79
  80         5a. This automatically deletes any PHI arguments associated with the
  81             edge A->B in B.
  82
  83         5b. This automatically associates each new argument added in step 4
  84             with the edge A->B'.
  85
  86      6. Repeat for other incoming edges into B.
  87
  88      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  89
  90    Note that block duplication can be minimized by first collecting the
  91    set of unique destination blocks that the incoming edges should
  92    be threaded to.
  93
  94    We reduce the number of edges and statements we create by not copying all
  95    the outgoing edges and the control statement in step #1.  We instead create
  96    a template block without the outgoing edges and duplicate the template.
  97
  98    Another case this code handles is threading through a "joiner" block.  In
  99    this case, we do not know the destination of the joiner block, but one
 100    of the outgoing edges from the joiner block leads to a threadable path.  This
 101    case largely works as outlined above, except the duplicate of the joiner
 102    block still contains a full set of outgoing edges and its control statement.
 103    We just redirect one of its outgoing edges to our jump threading path.  */
 104
 105
 106 /* Steps #5 and #6 of the above algorithm are best implemented by walking
 107    all the incoming edges which thread to the same destination edge at
 108    the same time.  That avoids lots of table lookups to get information
 109    for the destination edge.
 110
 111    To realize that implementation we create a list of incoming edges
 112    which thread to the same outgoing edge.  Thus to implement steps
 113    #5 and #6 we traverse our hash table of outgoing edge information.
 114    For each entry we walk the list of incoming edges which thread to
 115    the current outgoing edge.  */
 116
 117 struct el
 118 {
 119   edge e;
 120   struct el *next;
 121 };
 122
 123 /* Main data structure recording information regarding B's duplicate
 124    blocks.  */
 125
 126 /* We need to efficiently record the unique thread destinations of this
 127    block and specific information associated with those destinations.  We
 128    may have many incoming edges threaded to the same outgoing edge.  This
 129    can be naturally implemented with a hash table.  */
 130
 131 struct redirection_data : typed_free_remove<redirection_data>
 132 {
 133   /* We support wiring up two block duplicates in a jump threading path.
 134
 135      One is a normal block copy where we remove the control statement
 136      and wire up its single remaining outgoing edge to the thread path.
 137
 138      The other is a joiner block where we leave the control statement
 139      in place, but wire one of the outgoing edges to a thread path.
 140
 141      In theory we could have multiple block duplicates in a jump
 142      threading path, but I haven't tried that.
 143
 144      The duplicate blocks appear in this array in the same order in
 145      which they appear in the jump thread path.  */
 146   basic_block dup_blocks[2];
 147
 148   /* The jump threading path.  */
 149   vec<jump_thread_edge *> *path;
 150
 151   /* A list of incoming edges which we want to thread to the
 152      same path.  */
 153   struct el *incoming_edges;
 154
 155   /* hash_table support.  */
 156   typedef redirection_data value_type;
 157   typedef redirection_data compare_type;
 158   static inline hashval_t hash (const value_type *);
 159   static inline int equal (const value_type *, const compare_type *);
 160 };
 161
 162 /* Dump a jump threading path, including annotations about each
 163    edge in the path.  */
 164
 165 static void
 166 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 167                        bool registering)
 168 {
 169   fprintf (dump_file,
 170            "  %s%s jump thread: (%d, %d) incoming edge; ",
 171            (registering ? "Registering" : "Cancelling"),
 172            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 173            path[0]->e->src->index, path[0]->e->dest->index);
 174
 175   for (unsigned int i = 1; i < path.length (); i++)
 176     {
 177       /* We can get paths with a NULL edge when the final destination
 178          of a jump thread turns out to be a constant address.  We dump
 179          those paths when debugging, so we have to be prepared for that
 180          possibility here.  */
 181       if (path[i]->e == NULL)
 182         continue;
 183
 184       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 185         fprintf (dump_file, " (%d, %d) joiner; ",
 186                  path[i]->e->src->index, path[i]->e->dest->index);
 187       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 188        fprintf (dump_file, " (%d, %d) normal;",
 189                  path[i]->e->src->index, path[i]->e->dest->index);
 190       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 191        fprintf (dump_file, " (%d, %d) nocopy;",
 192                  path[i]->e->src->index, path[i]->e->dest->index);
 193     }
 194   fputc ('\n', dump_file);
 195 }
 196
 197 /* Simple hashing function.  For any given incoming edge E, we're going
 198    to be most concerned with the final destination of its jump thread
 199    path.  So hash on the block index of the final edge in the path.  */
 200
 201 inline hashval_t
 202 redirection_data::hash (const value_type *p)
 203 {
 204   vec<jump_thread_edge *> *path = p->path;
 205   return path->last ()->e->dest->index;
 206 }
 207
 208 /* Given two hash table entries, return true if they have the same
 209    jump threading path.  */
 210 inline int
 211 redirection_data::equal (const value_type *p1, const compare_type *p2)
 212 {
 213   vec<jump_thread_edge *> *path1 = p1->path;
 214   vec<jump_thread_edge *> *path2 = p2->path;
 215
 216   if (path1->length () != path2->length ())
 217     return false;
 218
 219   for (unsigned int i = 1; i < path1->length (); i++)
 220     {
 221       if ((*path1)[i]->type != (*path2)[i]->type
 222           || (*path1)[i]->e != (*path2)[i]->e)
 223         return false;
 224     }
 225
 226   return true;
 227 }
 228
 229 /* Data structure of information to pass to hash table traversal routines.  */
 230 struct ssa_local_info_t
 231 {
 232   /* The current block we are working on.  */
 233   basic_block bb;
 234
 235   /* We only create a template block for the first duplicated block in a
 236      jump threading path as we may need many duplicates of that block.
 237
 238      The second duplicate block in a path is specific to that path.  Creating
 239      and sharing a template for that block is considerably more difficult.  */
 240   basic_block template_block;
 241
 242   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 243   bool jumps_threaded;
 244
 245   /* Blocks duplicated for the thread.  */
 246   bitmap duplicate_blocks;
 247 };
 248
 249 /* Passes which use the jump threading code register jump threading
 250    opportunities as they are discovered.  We keep the registered
 251    jump threading opportunities in this vector as edge pairs
 252    (original_edge, target_edge).  */
 253 static vec<vec<jump_thread_edge *> *> paths;
 254
 255 /* When we start updating the CFG for threading, data necessary for jump
 256    threading is attached to the AUX field for the incoming edge.  Use these
 257    macros to access the underlying structure attached to the AUX field.  */
 258 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 259
 260 /* Jump threading statistics.  */
 261
 262 struct thread_stats_d
 263 {
 264   unsigned long num_threaded_edges;
 265 };
 266
 267 struct thread_stats_d thread_stats;
 268
 269
 270 /* Remove the last statement in block BB if it is a control statement
 271    Also remove all outgoing edges except the edge which reaches DEST_BB.
 272    If DEST_BB is NULL, then remove all outgoing edges.  */
 273
 274 static void
 275 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 276 {
 277   gimple_stmt_iterator gsi;
 278   edge e;
 279   edge_iterator ei;
 280
 281   gsi = gsi_last_bb (bb);
 282
 283   /* If the duplicate ends with a control statement, then remove it.
 284
 285      Note that if we are duplicating the template block rather than the
 286      original basic block, then the duplicate might not have any real
 287      statements in it.  */
 288   if (!gsi_end_p (gsi)
 289       && gsi_stmt (gsi)
 290       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 291           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 292           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 293     gsi_remove (&gsi, true);
 294
 295   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 296     {
 297       if (e->dest != dest_bb)
 298         remove_edge (e);
 299       else
 300         ei_next (&ei);
 301     }
 302 }
 303
 304 /* Create a duplicate of BB.  Record the duplicate block in an array
 305    indexed by COUNT stored in RD.  */
 306
 307 static void
 308 create_block_for_threading (basic_block bb,
 309                             struct redirection_data *rd,
 310                             unsigned int count,
 311                             bitmap *duplicate_blocks)
 312 {
 313   edge_iterator ei;
 314   edge e;
 315
 316   /* We can use the generic block duplication code and simply remove
 317      the stuff we do not need.  */
 318   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 319
 320   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 321     e->aux = NULL;
 322
 323   /* Zero out the profile, since the block is unreachable for now.  */
 324   rd->dup_blocks[count]->frequency = 0;
 325   rd->dup_blocks[count]->count = 0;
 326   if (duplicate_blocks)
 327     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 328 }
 329
 330 /* Main data structure to hold information for duplicates of BB.  */
 331
 332 static hash_table<redirection_data> *redirection_data;
 333
 334 /* Given an outgoing edge E lookup and return its entry in our hash table.
 335
 336    If INSERT is true, then we insert the entry into the hash table if
 337    it is not already present.  INCOMING_EDGE is added to the list of incoming
 338    edges associated with E in the hash table.  */
 339
 340 static struct redirection_data *
 341 lookup_redirection_data (edge e, enum insert_option insert)
 342 {
 343   struct redirection_data **slot;
 344   struct redirection_data *elt;
 345   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 346
 347  /* Build a hash table element so we can see if E is already
 348      in the table.  */
 349   elt = XNEW (struct redirection_data);
 350   elt->path = path;
 351   elt->dup_blocks[0] = NULL;
 352   elt->dup_blocks[1] = NULL;
 353   elt->incoming_edges = NULL;
 354
 355   slot = redirection_data->find_slot (elt, insert);
 356
 357   /* This will only happen if INSERT is false and the entry is not
 358      in the hash table.  */
 359   if (slot == NULL)
 360     {
 361       free (elt);
 362       return NULL;
 363     }
 364
 365   /* This will only happen if E was not in the hash table and
 366      INSERT is true.  */
 367   if (*slot == NULL)
 368     {
 369       *slot = elt;
 370       elt->incoming_edges = XNEW (struct el);
 371       elt->incoming_edges->e = e;
 372       elt->incoming_edges->next = NULL;
 373       return elt;
 374     }
 375   /* E was in the hash table.  */
 376   else
 377     {
 378       /* Free ELT as we do not need it anymore, we will extract the
 379          relevant entry from the hash table itself.  */
 380       free (elt);
 381
 382       /* Get the entry stored in the hash table.  */
 383       elt = *slot;
 384
 385       /* If insertion was requested, then we need to add INCOMING_EDGE
 386          to the list of incoming edges associated with E.  */
 387       if (insert)
 388         {
 389           struct el *el = XNEW (struct el);
 390           el->next = elt->incoming_edges;
 391           el->e = e;
 392           elt->incoming_edges = el;
 393         }
 394
 395       return elt;
 396     }
 397 }
 398
 399 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 400    does not have a value associated with it.  */
 401
 402 static void
 403 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 404 {
 405   int src_idx = src_e->dest_idx;
 406   int tgt_idx = tgt_e->dest_idx;
 407
 408   /* Iterate over each PHI in e->dest.  */
 409   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 410                            gsi2 = gsi_start_phis (tgt_e->dest);
 411        !gsi_end_p (gsi);
 412        gsi_next (&gsi), gsi_next (&gsi2))
 413     {
 414       gphi *src_phi = gsi.phi ();
 415       gphi *dest_phi = gsi2.phi ();
 416       tree val = gimple_phi_arg_def (src_phi, src_idx);
 417       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 418
 419       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 420       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 421     }
 422 }
 423
 424 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 425    to see if it has constant value in a flow sensitive manner.  Set
 426    LOCUS to location of the constant phi arg and return the value.
 427    Return DEF directly if either PATH or idx is ZERO.  */
 428
 429 static tree
 430 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 431                          basic_block bb, int idx, source_location *locus)
 432 {
 433   tree arg;
 434   gphi *def_phi;
 435   basic_block def_bb;
 436
 437   if (path == NULL || idx == 0)
 438     return def;
 439
 440   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 441   if (!def_phi)
 442     return def;
 443
 444   def_bb = gimple_bb (def_phi);
 445   /* Don't propagate loop invariants into deeper loops.  */
 446   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 447     return def;
 448
 449   /* Backtrack jump threading path from IDX to see if def has constant
 450      value.  */
 451   for (int j = idx - 1; j >= 0; j--)
 452     {
 453       edge e = (*path)[j]->e;
 454       if (e->dest == def_bb)
 455         {
 456           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 457           if (is_gimple_min_invariant (arg))
 458             {
 459               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 460               return arg;
 461             }
 462           break;
 463         }
 464     }
 465
 466   return def;
 467 }
 468
 469 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 470    Try to backtrack jump threading PATH from node IDX to see if the arg
 471    has constant value, copy constant value instead of argument itself
 472    if yes.  */
 473
 474 static void
 475 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 476                vec<jump_thread_edge *> *path, int idx)
 477 {
 478   gphi_iterator gsi;
 479   int src_indx = src_e->dest_idx;
 480
 481   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 482     {
 483       gphi *phi = gsi.phi ();
 484       tree def = gimple_phi_arg_def (phi, src_indx);
 485       source_location locus = gimple_phi_arg_location (phi, src_indx);
 486
 487       if (TREE_CODE (def) == SSA_NAME
 488           && !virtual_operand_p (gimple_phi_result (phi)))
 489         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 490
 491       add_phi_arg (phi, def, tgt_e, locus);
 492     }
 493 }
 494
 495 /* We have recently made a copy of ORIG_BB, including its outgoing
 496    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 497    ORIG_BB has a new argument associated with edge from NEW_BB to the
 498    successor.  Initialize the PHI argument so that it is equal to the PHI
 499    argument associated with the edge from ORIG_BB to the successor.
 500    PATH and IDX are used to check if the new PHI argument has constant
 501    value in a flow sensitive manner.  */
 502
 503 static void
 504 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 505                          vec<jump_thread_edge *> *path, int idx)
 506 {
 507   edge_iterator ei;
 508   edge e;
 509
 510   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 511     {
 512       edge e2 = find_edge (new_bb, e->dest);
 513       copy_phi_args (e->dest, e, e2, path, idx);
 514     }
 515 }
 516
 517 /* Given a duplicate block and its single destination (both stored
 518    in RD).  Create an edge between the duplicate and its single
 519    destination.
 520
 521    Add an additional argument to any PHI nodes at the single
 522    destination.  IDX is the start node in jump threading path
 523    we start to check to see if the new PHI argument has constant
 524    value along the jump threading path.  */
 525
 526 static void
 527 create_edge_and_update_destination_phis (struct redirection_data *rd,
 528                                          basic_block bb, int idx)
 529 {
 530   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 531
 532   rescan_loop_exit (e, true, false);
 533   e->probability = REG_BR_PROB_BASE;
 534   e->count = bb->count;
 535
 536   /* We used to copy the thread path here.  That was added in 2007
 537      and dutifully updated through the representation changes in 2013.
 538
 539      In 2013 we added code to thread from an interior node through
 540      the backedge to another interior node.  That runs after the code
 541      to thread through loop headers from outside the loop.
 542
 543      The latter may delete edges in the CFG, including those
 544      which appeared in the jump threading path we copied here.  Thus
 545      we'd end up using a dangling pointer.
 546
 547      After reviewing the 2007/2011 code, I can't see how anything
 548      depended on copying the AUX field and clearly copying the jump
 549      threading path is problematical due to embedded edge pointers.
 550      It has been removed.  */
 551   e->aux = NULL;
 552
 553   /* If there are any PHI nodes at the destination of the outgoing edge
 554      from the duplicate block, then we will need to add a new argument
 555      to them.  The argument should have the same value as the argument
 556      associated with the outgoing edge stored in RD.  */
 557   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 558 }
 559
 560 /* Look through PATH beginning at START and return TRUE if there are
 561    any additional blocks that need to be duplicated.  Otherwise,
 562    return FALSE.  */
 563 static bool
 564 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 565                                  unsigned int start)
 566 {
 567   for (unsigned int i = start + 1; i < path->length (); i++)
 568     {
 569       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 570           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 571         return true;
 572     }
 573   return false;
 574 }
 575
 576
 577 /* Compute the amount of profile count/frequency coming into the jump threading
 578    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 579    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 580    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 581    identify blocks duplicated for jump threading, which have duplicated
 582    edges that need to be ignored in the analysis.  Return true if path contains
 583    a joiner, false otherwise.
 584
 585    In the non-joiner case, this is straightforward - all the counts/frequency
 586    flowing into the jump threading path should flow through the duplicated
 587    block and out of the duplicated path.
 588
 589    In the joiner case, it is very tricky.  Some of the counts flowing into
 590    the original path go offpath at the joiner.  The problem is that while
 591    we know how much total count goes off-path in the original control flow,
 592    we don't know how many of the counts corresponding to just the jump
 593    threading path go offpath at the joiner.
 594
 595    For example, assume we have the following control flow and identified
 596    jump threading paths:
 597
 598                 A     B     C
 599                  \    |    /
 600                Ea \   |Eb / Ec
 601                    \  |  /
 602                     v v v
 603                       J       <-- Joiner
 604                      / \
 605                 Eoff/   \Eon
 606                    /     \
 607                   v       v
 608                 Soff     Son  <--- Normal
 609                          /\
 610                       Ed/  \ Ee
 611                        /    \
 612                       v     v
 613                       D      E
 614
 615             Jump threading paths: A -> J -> Son -> D (path 1)
 616                                   C -> J -> Son -> E (path 2)
 617
 618    Note that the control flow could be more complicated:
 619    - Each jump threading path may have more than one incoming edge.  I.e. A and
 620    Ea could represent multiple incoming blocks/edges that are included in
 621    path 1.
 622    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 623    before or after the "normal" copy block).  These are not duplicated onto
 624    the jump threading path, as they are single-successor.
 625    - Any of the blocks along the path may have other incoming edges that
 626    are not part of any jump threading path, but add profile counts along
 627    the path.
 628
 629    In the aboe example, after all jump threading is complete, we will
 630    end up with the following control flow:
 631
 632                 A          B            C
 633                 |          |            |
 634               Ea|          |Eb          |Ec
 635                 |          |            |
 636                 v          v            v
 637                Ja          J           Jc
 638                / \        / \Eon'     / \
 639           Eona/   \   ---/---\--------   \Eonc
 640              /     \ /  /     \           \
 641             v       v  v       v          v
 642            Sona     Soff      Son        Sonc
 643              \                 /\         /
 644               \___________    /  \  _____/
 645                           \  /    \/
 646                            vv      v
 647                             D      E
 648
 649    The main issue to notice here is that when we are processing path 1
 650    (A->J->Son->D) we need to figure out the outgoing edge weights to
 651    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 652    sum of the incoming weights to D remain Ed.  The problem with simply
 653    assuming that Ja (and Jc when processing path 2) has the same outgoing
 654    probabilities to its successors as the original block J, is that after
 655    all paths are processed and other edges/counts removed (e.g. none
 656    of Ec will reach D after processing path 2), we may end up with not
 657    enough count flowing along duplicated edge Sona->D.
 658
 659    Therefore, in the case of a joiner, we keep track of all counts
 660    coming in along the current path, as well as from predecessors not
 661    on any jump threading path (Eb in the above example).  While we
 662    first assume that the duplicated Eona for Ja->Sona has the same
 663    probability as the original, we later compensate for other jump
 664    threading paths that may eliminate edges.  We do that by keep track
 665    of all counts coming into the original path that are not in a jump
 666    thread (Eb in the above example, but as noted earlier, there could
 667    be other predecessors incoming to the path at various points, such
 668    as at Son).  Call this cumulative non-path count coming into the path
 669    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 670    least as big as (Ed - Enonpath), but no bigger than the minimum
 671    weight along the jump threading path.  The probabilities of both the
 672    original and duplicated joiner block J and Ja will be adjusted
 673    accordingly after the updates.  */
 674
 675 static bool
 676 compute_path_counts (struct redirection_data *rd,
 677                      ssa_local_info_t *local_info,
 678                      gcov_type *path_in_count_ptr,
 679                      gcov_type *path_out_count_ptr,
 680                      int *path_in_freq_ptr)
 681 {
 682   edge e = rd->incoming_edges->e;
 683   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 684   edge elast = path->last ()->e;
 685   gcov_type nonpath_count = 0;
 686   bool has_joiner = false;
 687   gcov_type path_in_count = 0;
 688   int path_in_freq = 0;
 689
 690   /* Start by accumulating incoming edge counts to the path's first bb
 691      into a couple buckets:
 692         path_in_count: total count of incoming edges that flow into the
 693                   current path.
 694         nonpath_count: total count of incoming edges that are not
 695                   flowing along *any* path.  These are the counts
 696                   that will still flow along the original path after
 697                   all path duplication is done by potentially multiple
 698                   calls to this routine.
 699      (any other incoming edge counts are for a different jump threading
 700      path that will be handled by a later call to this routine.)
 701      To make this easier, start by recording all incoming edges that flow into
 702      the current path in a bitmap.  We could add up the path's incoming edge
 703      counts here, but we still need to walk all the first bb's incoming edges
 704      below to add up the counts of the other edges not included in this jump
 705      threading path.  */
 706   struct el *next, *el;
 707   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 708   for (el = rd->incoming_edges; el; el = next)
 709     {
 710       next = el->next;
 711       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 712     }
 713   edge ein;
 714   edge_iterator ei;
 715   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 716     {
 717       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 718       /* Simply check the incoming edge src against the set captured above.  */
 719       if (ein_path
 720           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 721         {
 722           /* It is necessary but not sufficient that the last path edges
 723              are identical.  There may be different paths that share the
 724              same last path edge in the case where the last edge has a nocopy
 725              source block.  */
 726           gcc_assert (ein_path->last ()->e == elast);
 727           path_in_count += ein->count;
 728           path_in_freq += EDGE_FREQUENCY (ein);
 729         }
 730       else if (!ein_path)
 731         {
 732           /* Keep track of the incoming edges that are not on any jump-threading
 733              path.  These counts will still flow out of original path after all
 734              jump threading is complete.  */
 735             nonpath_count += ein->count;
 736         }
 737     }
 738
 739   /* This is needed due to insane incoming frequencies.  */
 740   if (path_in_freq > BB_FREQ_MAX)
 741     path_in_freq = BB_FREQ_MAX;
 742
 743   BITMAP_FREE (in_edge_srcs);
 744
 745   /* Now compute the fraction of the total count coming into the first
 746      path bb that is from the current threading path.  */
 747   gcov_type total_count = e->dest->count;
 748   /* Handle incoming profile insanities.  */
 749   if (total_count < path_in_count)
 750     path_in_count = total_count;
 751   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 752
 753   /* Walk the entire path to do some more computation in order to estimate
 754      how much of the path_in_count will flow out of the duplicated threading
 755      path.  In the non-joiner case this is straightforward (it should be
 756      the same as path_in_count, although we will handle incoming profile
 757      insanities by setting it equal to the minimum count along the path).
 758
 759      In the joiner case, we need to estimate how much of the path_in_count
 760      will stay on the threading path after the joiner's conditional branch.
 761      We don't really know for sure how much of the counts
 762      associated with this path go to each successor of the joiner, but we'll
 763      estimate based on the fraction of the total count coming into the path
 764      bb was from the threading paths (computed above in onpath_scale).
 765      Afterwards, we will need to do some fixup to account for other threading
 766      paths and possible profile insanities.
 767
 768      In order to estimate the joiner case's counts we also need to update
 769      nonpath_count with any additional counts coming into the path.  Other
 770      blocks along the path may have additional predecessors from outside
 771      the path.  */
 772   gcov_type path_out_count = path_in_count;
 773   gcov_type min_path_count = path_in_count;
 774   for (unsigned int i = 1; i < path->length (); i++)
 775     {
 776       edge epath = (*path)[i]->e;
 777       gcov_type cur_count = epath->count;
 778       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 779         {
 780           has_joiner = true;
 781           cur_count = apply_probability (cur_count, onpath_scale);
 782         }
 783       /* In the joiner case we need to update nonpath_count for any edges
 784          coming into the path that will contribute to the count flowing
 785          into the path successor.  */
 786       if (has_joiner && epath != elast)
 787       {
 788         /* Look for other incoming edges after joiner.  */
 789         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 790           {
 791             if (ein != epath
 792                 /* Ignore in edges from blocks we have duplicated for a
 793                    threading path, which have duplicated edge counts until
 794                    they are redirected by an invocation of this routine.  */
 795                 && !bitmap_bit_p (local_info->duplicate_blocks,
 796                                   ein->src->index))
 797               nonpath_count += ein->count;
 798           }
 799       }
 800       if (cur_count < path_out_count)
 801         path_out_count = cur_count;
 802       if (epath->count < min_path_count)
 803         min_path_count = epath->count;
 804     }
 805
 806   /* We computed path_out_count above assuming that this path targeted
 807      the joiner's on-path successor with the same likelihood as it
 808      reached the joiner.  However, other thread paths through the joiner
 809      may take a different path through the normal copy source block
 810      (i.e. they have a different elast), meaning that they do not
 811      contribute any counts to this path's elast.  As a result, it may
 812      turn out that this path must have more count flowing to the on-path
 813      successor of the joiner.  Essentially, all of this path's elast
 814      count must be contributed by this path and any nonpath counts
 815      (since any path through the joiner with a different elast will not
 816      include a copy of this elast in its duplicated path).
 817      So ensure that this path's path_out_count is at least the
 818      difference between elast->count and nonpath_count.  Otherwise the edge
 819      counts after threading will not be sane.  */
 820   if (has_joiner && path_out_count < elast->count - nonpath_count)
 821   {
 822     path_out_count = elast->count - nonpath_count;
 823     /* But neither can we go above the minimum count along the path
 824        we are duplicating.  This can be an issue due to profile
 825        insanities coming in to this pass.  */
 826     if (path_out_count > min_path_count)
 827       path_out_count = min_path_count;
 828   }
 829
 830   *path_in_count_ptr = path_in_count;
 831   *path_out_count_ptr = path_out_count;
 832   *path_in_freq_ptr = path_in_freq;
 833   return has_joiner;
 834 }
 835
 836
 837 /* Update the counts and frequencies for both an original path
 838    edge EPATH and its duplicate EDUP.  The duplicate source block
 839    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 840    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 841 static void
 842 update_profile (edge epath, edge edup, gcov_type path_in_count,
 843                 gcov_type path_out_count, int path_in_freq)
 844 {
 845
 846   /* First update the duplicated block's count / frequency.  */
 847   if (edup)
 848     {
 849       basic_block dup_block = edup->src;
 850       gcc_assert (dup_block->count == 0);
 851       gcc_assert (dup_block->frequency == 0);
 852       dup_block->count = path_in_count;
 853       dup_block->frequency = path_in_freq;
 854     }
 855
 856   /* Now update the original block's count and frequency in the
 857      opposite manner - remove the counts/freq that will flow
 858      into the duplicated block.  Handle underflow due to precision/
 859      rounding issues.  */
 860   epath->src->count -= path_in_count;
 861   if (epath->src->count < 0)
 862     epath->src->count = 0;
 863   epath->src->frequency -= path_in_freq;
 864   if (epath->src->frequency < 0)
 865     epath->src->frequency = 0;
 866
 867   /* Next update this path edge's original and duplicated counts.  We know
 868      that the duplicated path will have path_out_count flowing
 869      out of it (in the joiner case this is the count along the duplicated path
 870      out of the duplicated joiner).  This count can then be removed from the
 871      original path edge.  */
 872   if (edup)
 873     edup->count = path_out_count;
 874   epath->count -= path_out_count;
 875   gcc_assert (epath->count >= 0);
 876 }
 877
 878
 879 /* The duplicate and original joiner blocks may end up with different
 880    probabilities (different from both the original and from each other).
 881    Recompute the probabilities here once we have updated the edge
 882    counts and frequencies.  */
 883
 884 static void
 885 recompute_probabilities (basic_block bb)
 886 {
 887   edge esucc;
 888   edge_iterator ei;
 889   FOR_EACH_EDGE (esucc, ei, bb->succs)
 890     {
 891       if (!bb->count)
 892         continue;
 893
 894       /* Prevent overflow computation due to insane profiles.  */
 895       if (esucc->count < bb->count)
 896         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 897                                                  bb->count);
 898       else
 899         /* Can happen with missing/guessed probabilities, since we
 900            may determine that more is flowing along duplicated
 901            path than joiner succ probabilities allowed.
 902            Counts and freqs will be insane after jump threading,
 903            at least make sure probability is sane or we will
 904            get a flow verification error.
 905            Not much we can do to make counts/freqs sane without
 906            redoing the profile estimation.  */
 907         esucc->probability = REG_BR_PROB_BASE;
 908     }
 909 }
 910
 911
 912 /* Update the counts of the original and duplicated edges from a joiner
 913    that go off path, given that we have already determined that the
 914    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 915    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 916    edge from joiner is EPATH.  */
 917
 918 static void
 919 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 920                               gcov_type path_in_count,
 921                               gcov_type path_out_count)
 922 {
 923   /* Compute the count that currently flows off path from the joiner.
 924      In other words, the total count of joiner's out edges other than
 925      epath.  Compute this by walking the successors instead of
 926      subtracting epath's count from the joiner bb count, since there
 927      are sometimes slight insanities where the total out edge count is
 928      larger than the bb count (possibly due to rounding/truncation
 929      errors).  */
 930   gcov_type total_orig_off_path_count = 0;
 931   edge enonpath;
 932   edge_iterator ei;
 933   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 934     {
 935       if (enonpath == epath)
 936         continue;
 937       total_orig_off_path_count += enonpath->count;
 938     }
 939
 940   /* For the path that we are duplicating, the amount that will flow
 941      off path from the duplicated joiner is the delta between the
 942      path's cumulative in count and the portion of that count we
 943      estimated above as flowing from the joiner along the duplicated
 944      path.  */
 945   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 946
 947   /* Now do the actual updates of the off-path edges.  */
 948   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 949     {
 950       /* Look for edges going off of the threading path.  */
 951       if (enonpath == epath)
 952         continue;
 953
 954       /* Find the corresponding edge out of the duplicated joiner.  */
 955       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 956       gcc_assert (enonpathdup);
 957
 958       /* We can't use the original probability of the joiner's out
 959          edges, since the probabilities of the original branch
 960          and the duplicated branches may vary after all threading is
 961          complete.  But apportion the duplicated joiner's off-path
 962          total edge count computed earlier (total_dup_off_path_count)
 963          among the duplicated off-path edges based on their original
 964          ratio to the full off-path count (total_orig_off_path_count).
 965          */
 966       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 967                                       total_orig_off_path_count);
 968       /* Give the duplicated offpath edge a portion of the duplicated
 969          total.  */
 970       enonpathdup->count = apply_scale (scale,
 971                                         total_dup_off_path_count);
 972       /* Now update the original offpath edge count, handling underflow
 973          due to rounding errors.  */
 974       enonpath->count -= enonpathdup->count;
 975       if (enonpath->count < 0)
 976         enonpath->count = 0;
 977     }
 978 }
 979
 980
 981 /* Check if the paths through RD all have estimated frequencies but zero
 982    profile counts.  This is more accurate than checking the entry block
 983    for a zero profile count, since profile insanities sometimes creep in.  */
 984
 985 static bool
 986 estimated_freqs_path (struct redirection_data *rd)
 987 {
 988   edge e = rd->incoming_edges->e;
 989   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 990   edge ein;
 991   edge_iterator ei;
 992   bool non_zero_freq = false;
 993   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 994     {
 995       if (ein->count)
 996         return false;
 997       non_zero_freq |= ein->src->frequency != 0;
 998     }
 999
1000   for (unsigned int i = 1; i < path->length (); i++)
1001     {
1002       edge epath = (*path)[i]->e;
1003       if (epath->src->count)
1004         return false;
1005       non_zero_freq |= epath->src->frequency != 0;
1006       edge esucc;
1007       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1008         {
1009           if (esucc->count)
1010             return false;
1011           non_zero_freq |= esucc->src->frequency != 0;
1012         }
1013     }
1014   return non_zero_freq;
1015 }
1016
1017
1018 /* Invoked for routines that have guessed frequencies and no profile
1019    counts to record the block and edge frequencies for paths through RD
1020    in the profile count fields of those blocks and edges.  This is because
1021    ssa_fix_duplicate_block_edges incrementally updates the block and
1022    edge counts as edges are redirected, and it is difficult to do that
1023    for edge frequencies which are computed on the fly from the source
1024    block frequency and probability.  When a block frequency is updated
1025    its outgoing edge frequencies are affected and become difficult to
1026    adjust.  */
1027
1028 static void
1029 freqs_to_counts_path (struct redirection_data *rd)
1030 {
1031   edge e = rd->incoming_edges->e;
1032   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1033   edge ein;
1034   edge_iterator ei;
1035   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1036     {
1037       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1038          errors applying the probability when the frequencies are very
1039          small.  */
1040       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1041                                       ein->probability);
1042     }
1043
1044   for (unsigned int i = 1; i < path->length (); i++)
1045     {
1046       edge epath = (*path)[i]->e;
1047       edge esucc;
1048       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1049          errors applying the edge probability when the frequencies are very
1050          small.  */
1051       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1052       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1053         esucc->count = apply_probability (esucc->src->count,
1054                                           esucc->probability);
1055     }
1056 }
1057
1058
1059 /* For routines that have guessed frequencies and no profile counts, where we
1060    used freqs_to_counts_path to record block and edge frequencies for paths
1061    through RD, we clear the counts after completing all updates for RD.
1062    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1063    but the block frequencies and edge probabilities were updated as well,
1064    so we can simply clear the count fields.  */
1065
1066 static void
1067 clear_counts_path (struct redirection_data *rd)
1068 {
1069   edge e = rd->incoming_edges->e;
1070   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1071   edge ein, esucc;
1072   edge_iterator ei;
1073   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1074     ein->count = 0;
1075
1076   /* First clear counts along original path.  */
1077   for (unsigned int i = 1; i < path->length (); i++)
1078     {
1079       edge epath = (*path)[i]->e;
1080       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1081         esucc->count = 0;
1082       epath->src->count = 0;
1083     }
1084   /* Also need to clear the counts along duplicated path.  */
1085   for (unsigned int i = 0; i < 2; i++)
1086     {
1087       basic_block dup = rd->dup_blocks[i];
1088       if (!dup)
1089         continue;
1090       FOR_EACH_EDGE (esucc, ei, dup->succs)
1091         esucc->count = 0;
1092       dup->count = 0;
1093     }
1094 }
1095
1096 /* Wire up the outgoing edges from the duplicate blocks and
1097    update any PHIs as needed.  Also update the profile counts
1098    on the original and duplicate blocks and edges.  */
1099 void
1100 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1101                                ssa_local_info_t *local_info)
1102 {
1103   bool multi_incomings = (rd->incoming_edges->next != NULL);
1104   edge e = rd->incoming_edges->e;
1105   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1106   edge elast = path->last ()->e;
1107   gcov_type path_in_count = 0;
1108   gcov_type path_out_count = 0;
1109   int path_in_freq = 0;
1110
1111   /* This routine updates profile counts, frequencies, and probabilities
1112      incrementally. Since it is difficult to do the incremental updates
1113      using frequencies/probabilities alone, for routines without profile
1114      data we first take a snapshot of the existing block and edge frequencies
1115      by copying them into the empty profile count fields.  These counts are
1116      then used to do the incremental updates, and cleared at the end of this
1117      routine.  If the function is marked as having a profile, we still check
1118      to see if the paths through RD are using estimated frequencies because
1119      the routine had zero profile counts.  */
1120   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1121                              || estimated_freqs_path (rd));
1122   if (do_freqs_to_counts)
1123     freqs_to_counts_path (rd);
1124
1125   /* First determine how much profile count to move from original
1126      path to the duplicate path.  This is tricky in the presence of
1127      a joiner (see comments for compute_path_counts), where some portion
1128      of the path's counts will flow off-path from the joiner.  In the
1129      non-joiner case the path_in_count and path_out_count should be the
1130      same.  */
1131   bool has_joiner = compute_path_counts (rd, local_info,
1132                                          &path_in_count, &path_out_count,
1133                                          &path_in_freq);
1134
1135   int cur_path_freq = path_in_freq;
1136   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1137     {
1138       edge epath = (*path)[i]->e;
1139
1140       /* If we were threading through an joiner block, then we want
1141          to keep its control statement and redirect an outgoing edge.
1142          Else we want to remove the control statement & edges, then create
1143          a new outgoing edge.  In both cases we may need to update PHIs.  */
1144       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1145         {
1146           edge victim;
1147           edge e2;
1148
1149           gcc_assert (has_joiner);
1150
1151           /* This updates the PHIs at the destination of the duplicate
1152              block.  Pass 0 instead of i if we are threading a path which
1153              has multiple incoming edges.  */
1154           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1155                                    path, multi_incomings ? 0 : i);
1156
1157           /* Find the edge from the duplicate block to the block we're
1158              threading through.  That's the edge we want to redirect.  */
1159           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1160
1161           /* If there are no remaining blocks on the path to duplicate,
1162              then redirect VICTIM to the final destination of the jump
1163              threading path.  */
1164           if (!any_remaining_duplicated_blocks (path, i))
1165             {
1166               e2 = redirect_edge_and_branch (victim, elast->dest);
1167               /* If we redirected the edge, then we need to copy PHI arguments
1168                  at the target.  If the edge already existed (e2 != victim
1169                  case), then the PHIs in the target already have the correct
1170                  arguments.  */
1171               if (e2 == victim)
1172                 copy_phi_args (e2->dest, elast, e2,
1173                                path, multi_incomings ? 0 : i);
1174             }
1175           else
1176             {
1177               /* Redirect VICTIM to the next duplicated block in the path.  */
1178               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1179
1180               /* We need to update the PHIs in the next duplicated block.  We
1181                  want the new PHI args to have the same value as they had
1182                  in the source of the next duplicate block.
1183
1184                  Thus, we need to know which edge we traversed into the
1185                  source of the duplicate.  Furthermore, we may have
1186                  traversed many edges to reach the source of the duplicate.
1187
1188                  Walk through the path starting at element I until we
1189                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1190                  the edge from the prior element.  */
1191               for (unsigned int j = i + 1; j < path->length (); j++)
1192                 {
1193                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1194                     {
1195                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1196                       break;
1197                     }
1198                 }
1199             }
1200
1201           /* Update the counts and frequency of both the original block
1202              and path edge, and the duplicates.  The path duplicate's
1203              incoming count and frequency are the totals for all edges
1204              incoming to this jump threading path computed earlier.
1205              And we know that the duplicated path will have path_out_count
1206              flowing out of it (i.e. along the duplicated path out of the
1207              duplicated joiner).  */
1208           update_profile (epath, e2, path_in_count, path_out_count,
1209                           path_in_freq);
1210
1211           /* Next we need to update the counts of the original and duplicated
1212              edges from the joiner that go off path.  */
1213           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1214                                         path_out_count);
1215
1216           /* Finally, we need to set the probabilities on the duplicated
1217              edges out of the duplicated joiner (e2->src).  The probabilities
1218              along the original path will all be updated below after we finish
1219              processing the whole path.  */
1220           recompute_probabilities (e2->src);
1221
1222           /* Record the frequency flowing to the downstream duplicated
1223              path blocks.  */
1224           cur_path_freq = EDGE_FREQUENCY (e2);
1225         }
1226       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1227         {
1228           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1229           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1230                                                    multi_incomings ? 0 : i);
1231           if (count == 1)
1232             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1233
1234           /* Update the counts and frequency of both the original block
1235              and path edge, and the duplicates.  Since we are now after
1236              any joiner that may have existed on the path, the count
1237              flowing along the duplicated threaded path is path_out_count.
1238              If we didn't have a joiner, then cur_path_freq was the sum
1239              of the total frequencies along all incoming edges to the
1240              thread path (path_in_freq).  If we had a joiner, it would have
1241              been updated at the end of that handling to the edge frequency
1242              along the duplicated joiner path edge.  */
1243           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1244                           path_out_count, path_out_count,
1245                           cur_path_freq);
1246         }
1247       else
1248         {
1249           /* No copy case.  In this case we don't have an equivalent block
1250              on the duplicated thread path to update, but we do need
1251              to remove the portion of the counts/freqs that were moved
1252              to the duplicated path from the counts/freqs flowing through
1253              this block on the original path.  Since all the no-copy edges
1254              are after any joiner, the removed count is the same as
1255              path_out_count.
1256
1257              If we didn't have a joiner, then cur_path_freq was the sum
1258              of the total frequencies along all incoming edges to the
1259              thread path (path_in_freq).  If we had a joiner, it would have
1260              been updated at the end of that handling to the edge frequency
1261              along the duplicated joiner path edge.  */
1262              update_profile (epath, NULL, path_out_count, path_out_count,
1263                              cur_path_freq);
1264         }
1265
1266       /* Increment the index into the duplicated path when we processed
1267          a duplicated block.  */
1268       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1269           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1270       {
1271           count++;
1272       }
1273     }
1274
1275   /* Now walk orig blocks and update their probabilities, since the
1276      counts and freqs should be updated properly by above loop.  */
1277   for (unsigned int i = 1; i < path->length (); i++)
1278     {
1279       edge epath = (*path)[i]->e;
1280       recompute_probabilities (epath->src);
1281     }
1282
1283   /* Done with all profile and frequency updates, clear counts if they
1284      were copied.  */
1285   if (do_freqs_to_counts)
1286     clear_counts_path (rd);
1287 }
1288
1289 /* Hash table traversal callback routine to create duplicate blocks.  */
1290
1291 int
1292 ssa_create_duplicates (struct redirection_data **slot,
1293                        ssa_local_info_t *local_info)
1294 {
1295   struct redirection_data *rd = *slot;
1296
1297   /* The second duplicated block in a jump threading path is specific
1298      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1299
1300      Each time we're called, we have to look through the path and see
1301      if a second block needs to be duplicated.
1302
1303      Note the search starts with the third edge on the path.  The first
1304      edge is the incoming edge, the second edge always has its source
1305      duplicated.  Thus we start our search with the third edge.  */
1306   vec<jump_thread_edge *> *path = rd->path;
1307   for (unsigned int i = 2; i < path->length (); i++)
1308     {
1309       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1310           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1311         {
1312           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1313                                       &local_info->duplicate_blocks);
1314           break;
1315         }
1316     }
1317
1318   /* Create a template block if we have not done so already.  Otherwise
1319      use the template to create a new block.  */
1320   if (local_info->template_block == NULL)
1321     {
1322       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1323                                   &local_info->duplicate_blocks);
1324       local_info->template_block = rd->dup_blocks[0];
1325
1326       /* We do not create any outgoing edges for the template.  We will
1327          take care of that in a later traversal.  That way we do not
1328          create edges that are going to just be deleted.  */
1329     }
1330   else
1331     {
1332       create_block_for_threading (local_info->template_block, rd, 0,
1333                                   &local_info->duplicate_blocks);
1334
1335       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1336          block.   */
1337       ssa_fix_duplicate_block_edges (rd, local_info);
1338     }
1339
1340   /* Keep walking the hash table.  */
1341   return 1;
1342 }
1343
1344 /* We did not create any outgoing edges for the template block during
1345    block creation.  This hash table traversal callback creates the
1346    outgoing edge for the template block.  */
1347
1348 inline int
1349 ssa_fixup_template_block (struct redirection_data **slot,
1350                           ssa_local_info_t *local_info)
1351 {
1352   struct redirection_data *rd = *slot;
1353
1354   /* If this is the template block halt the traversal after updating
1355      it appropriately.
1356
1357      If we were threading through an joiner block, then we want
1358      to keep its control statement and redirect an outgoing edge.
1359      Else we want to remove the control statement & edges, then create
1360      a new outgoing edge.  In both cases we may need to update PHIs.  */
1361   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1362     {
1363       ssa_fix_duplicate_block_edges (rd, local_info);
1364       return 0;
1365     }
1366
1367   return 1;
1368 }
1369
1370 /* Hash table traversal callback to redirect each incoming edge
1371    associated with this hash table element to its new destination.  */
1372
1373 int
1374 ssa_redirect_edges (struct redirection_data **slot,
1375                     ssa_local_info_t *local_info)
1376 {
1377   struct redirection_data *rd = *slot;
1378   struct el *next, *el;
1379
1380   /* Walk over all the incoming edges associated associated with this
1381      hash table entry.  */
1382   for (el = rd->incoming_edges; el; el = next)
1383     {
1384       edge e = el->e;
1385       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1386
1387       /* Go ahead and free this element from the list.  Doing this now
1388          avoids the need for another list walk when we destroy the hash
1389          table.  */
1390       next = el->next;
1391       free (el);
1392
1393       thread_stats.num_threaded_edges++;
1394
1395       if (rd->dup_blocks[0])
1396         {
1397           edge e2;
1398
1399           if (dump_file && (dump_flags & TDF_DETAILS))
1400             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1401                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1402
1403           /* If we redirect a loop latch edge cancel its loop.  */
1404           if (e->src == e->src->loop_father->latch)
1405             mark_loop_for_removal (e->src->loop_father);
1406
1407           /* Redirect the incoming edge (possibly to the joiner block) to the
1408              appropriate duplicate block.  */
1409           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1410           gcc_assert (e == e2);
1411           flush_pending_stmts (e2);
1412         }
1413
1414       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1415          to clear it will cause all kinds of unpleasant problems later.  */
1416       delete_jump_thread_path (path);
1417       e->aux = NULL;
1418
1419     }
1420
1421   /* Indicate that we actually threaded one or more jumps.  */
1422   if (rd->incoming_edges)
1423     local_info->jumps_threaded = true;
1424
1425   return 1;
1426 }
1427
1428 /* Return true if this block has no executable statements other than
1429    a simple ctrl flow instruction.  When the number of outgoing edges
1430    is one, this is equivalent to a "forwarder" block.  */
1431
1432 static bool
1433 redirection_block_p (basic_block bb)
1434 {
1435   gimple_stmt_iterator gsi;
1436
1437   /* Advance to the first executable statement.  */
1438   gsi = gsi_start_bb (bb);
1439   while (!gsi_end_p (gsi)
1440          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1441              || is_gimple_debug (gsi_stmt (gsi))
1442              || gimple_nop_p (gsi_stmt (gsi))))
1443     gsi_next (&gsi);
1444
1445   /* Check if this is an empty block.  */
1446   if (gsi_end_p (gsi))
1447     return true;
1448
1449   /* Test that we've reached the terminating control statement.  */
1450   return gsi_stmt (gsi)
1451          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1452              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1453              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1454 }
1455
1456 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1457    is reached via one or more specific incoming edges, we know which
1458    outgoing edge from BB will be traversed.
1459
1460    We want to redirect those incoming edges to the target of the
1461    appropriate outgoing edge.  Doing so avoids a conditional branch
1462    and may expose new optimization opportunities.  Note that we have
1463    to update dominator tree and SSA graph after such changes.
1464
1465    The key to keeping the SSA graph update manageable is to duplicate
1466    the side effects occurring in BB so that those side effects still
1467    occur on the paths which bypass BB after redirecting edges.
1468
1469    We accomplish this by creating duplicates of BB and arranging for
1470    the duplicates to unconditionally pass control to one specific
1471    successor of BB.  We then revector the incoming edges into BB to
1472    the appropriate duplicate of BB.
1473
1474    If NOLOOP_ONLY is true, we only perform the threading as long as it
1475    does not affect the structure of the loops in a nontrivial way.
1476
1477    If JOINERS is true, then thread through joiner blocks as well.  */
1478
1479 static bool
1480 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1481 {
1482   /* E is an incoming edge into BB that we may or may not want to
1483      redirect to a duplicate of BB.  */
1484   edge e, e2;
1485   edge_iterator ei;
1486   ssa_local_info_t local_info;
1487
1488   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1489
1490   /* To avoid scanning a linear array for the element we need we instead
1491      use a hash table.  For normal code there should be no noticeable
1492      difference.  However, if we have a block with a large number of
1493      incoming and outgoing edges such linear searches can get expensive.  */
1494   redirection_data
1495     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1496
1497   /* Record each unique threaded destination into a hash table for
1498      efficient lookups.  */
1499   FOR_EACH_EDGE (e, ei, bb->preds)
1500     {
1501       if (e->aux == NULL)
1502         continue;
1503
1504       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1505
1506       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1507           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1508         continue;
1509
1510       e2 = path->last ()->e;
1511       if (!e2 || noloop_only)
1512         {
1513           /* If NOLOOP_ONLY is true, we only allow threading through the
1514              header of a loop to exit edges.  */
1515
1516           /* One case occurs when there was loop header buried in a jump
1517              threading path that crosses loop boundaries.  We do not try
1518              and thread this elsewhere, so just cancel the jump threading
1519              request by clearing the AUX field now.  */
1520           if ((bb->loop_father != e2->src->loop_father
1521                && !loop_exit_edge_p (e2->src->loop_father, e2))
1522               || (e2->src->loop_father != e2->dest->loop_father
1523                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1524             {
1525               /* Since this case is not handled by our special code
1526                  to thread through a loop header, we must explicitly
1527                  cancel the threading request here.  */
1528               delete_jump_thread_path (path);
1529               e->aux = NULL;
1530               continue;
1531             }
1532
1533           /* Another case occurs when trying to thread through our
1534              own loop header, possibly from inside the loop.  We will
1535              thread these later.  */
1536           unsigned int i;
1537           for (i = 1; i < path->length (); i++)
1538             {
1539               if ((*path)[i]->e->src == bb->loop_father->header
1540                   && (!loop_exit_edge_p (bb->loop_father, e2)
1541                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1542                 break;
1543             }
1544
1545           if (i != path->length ())
1546             continue;
1547         }
1548
1549       /* Insert the outgoing edge into the hash table if it is not
1550          already in the hash table.  */
1551       lookup_redirection_data (e, INSERT);
1552     }
1553
1554   /* We do not update dominance info.  */
1555   free_dominance_info (CDI_DOMINATORS);
1556
1557   /* We know we only thread through the loop header to loop exits.
1558      Let the basic block duplication hook know we are not creating
1559      a multiple entry loop.  */
1560   if (noloop_only
1561       && bb == bb->loop_father->header)
1562     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1563
1564   /* Now create duplicates of BB.
1565
1566      Note that for a block with a high outgoing degree we can waste
1567      a lot of time and memory creating and destroying useless edges.
1568
1569      So we first duplicate BB and remove the control structure at the
1570      tail of the duplicate as well as all outgoing edges from the
1571      duplicate.  We then use that duplicate block as a template for
1572      the rest of the duplicates.  */
1573   local_info.template_block = NULL;
1574   local_info.bb = bb;
1575   local_info.jumps_threaded = false;
1576   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1577                             (&local_info);
1578
1579   /* The template does not have an outgoing edge.  Create that outgoing
1580      edge and update PHI nodes as the edge's target as necessary.
1581
1582      We do this after creating all the duplicates to avoid creating
1583      unnecessary edges.  */
1584   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1585                             (&local_info);
1586
1587   /* The hash table traversals above created the duplicate blocks (and the
1588      statements within the duplicate blocks).  This loop creates PHI nodes for
1589      the duplicated blocks and redirects the incoming edges into BB to reach
1590      the duplicates of BB.  */
1591   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1592                             (&local_info);
1593
1594   /* Done with this block.  Clear REDIRECTION_DATA.  */
1595   delete redirection_data;
1596   redirection_data = NULL;
1597
1598   if (noloop_only
1599       && bb == bb->loop_father->header)
1600     set_loop_copy (bb->loop_father, NULL);
1601
1602   BITMAP_FREE (local_info.duplicate_blocks);
1603   local_info.duplicate_blocks = NULL;
1604
1605   /* Indicate to our caller whether or not any jumps were threaded.  */
1606   return local_info.jumps_threaded;
1607 }
1608
1609 /* Wrapper for thread_block_1 so that we can first handle jump
1610    thread paths which do not involve copying joiner blocks, then
1611    handle jump thread paths which have joiner blocks.
1612
1613    By doing things this way we can be as aggressive as possible and
1614    not worry that copying a joiner block will create a jump threading
1615    opportunity.  */
1616
1617 static bool
1618 thread_block (basic_block bb, bool noloop_only)
1619 {
1620   bool retval;
1621   retval = thread_block_1 (bb, noloop_only, false);
1622   retval |= thread_block_1 (bb, noloop_only, true);
1623   return retval;
1624 }
1625
1626
1627 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1628    copy of E->dest created during threading, or E->dest if it was not necessary
1629    to copy it (E is its single predecessor).  */
1630
1631 static basic_block
1632 thread_single_edge (edge e)
1633 {
1634   basic_block bb = e->dest;
1635   struct redirection_data rd;
1636   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1637   edge eto = (*path)[1]->e;
1638
1639   for (unsigned int i = 0; i < path->length (); i++)
1640     delete (*path)[i];
1641   delete path;
1642   e->aux = NULL;
1643
1644   thread_stats.num_threaded_edges++;
1645
1646   if (single_pred_p (bb))
1647     {
1648       /* If BB has just a single predecessor, we should only remove the
1649          control statements at its end, and successors except for ETO.  */
1650       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1651
1652       /* And fixup the flags on the single remaining edge.  */
1653       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1654       eto->flags |= EDGE_FALLTHRU;
1655
1656       return bb;
1657     }
1658
1659   /* Otherwise, we need to create a copy.  */
1660   if (e->dest == eto->src)
1661     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1662
1663   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1664   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1665   npath->safe_push (x);
1666
1667   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1668   npath->safe_push (x);
1669   rd.path = npath;
1670
1671   create_block_for_threading (bb, &rd, 0, NULL);
1672   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1673   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1674
1675   if (dump_file && (dump_flags & TDF_DETAILS))
1676     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1677              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1678
1679   rd.dup_blocks[0]->count = e->count;
1680   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1681   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1682   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1683   flush_pending_stmts (e);
1684
1685   return rd.dup_blocks[0];
1686 }
1687
1688 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1689    from STOP and DBDS_CE_STOP.  */
1690
1691 static basic_block dbds_ce_stop;
1692 static bool
1693 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1694 {
1695   return (bb != (const_basic_block) stop
1696           && bb != dbds_ce_stop);
1697 }
1698
1699 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1700    returns the state.  */
1701
1702 enum bb_dom_status
1703 {
1704   /* BB does not dominate latch of the LOOP.  */
1705   DOMST_NONDOMINATING,
1706   /* The LOOP is broken (there is no path from the header to its latch.  */
1707   DOMST_LOOP_BROKEN,
1708   /* BB dominates the latch of the LOOP.  */
1709   DOMST_DOMINATING
1710 };
1711
1712 static enum bb_dom_status
1713 determine_bb_domination_status (struct loop *loop, basic_block bb)
1714 {
1715   basic_block *bblocks;
1716   unsigned nblocks, i;
1717   bool bb_reachable = false;
1718   edge_iterator ei;
1719   edge e;
1720
1721   /* This function assumes BB is a successor of LOOP->header.
1722      If that is not the case return DOMST_NONDOMINATING which
1723      is always safe.  */
1724     {
1725       bool ok = false;
1726
1727       FOR_EACH_EDGE (e, ei, bb->preds)
1728         {
1729           if (e->src == loop->header)
1730             {
1731               ok = true;
1732               break;
1733             }
1734         }
1735
1736       if (!ok)
1737         return DOMST_NONDOMINATING;
1738     }
1739
1740   if (bb == loop->latch)
1741     return DOMST_DOMINATING;
1742
1743   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1744      from it.  */
1745
1746   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1747   dbds_ce_stop = loop->header;
1748   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1749                                 bblocks, loop->num_nodes, bb);
1750   for (i = 0; i < nblocks; i++)
1751     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1752       {
1753         if (e->src == loop->header)
1754           {
1755             free (bblocks);
1756             return DOMST_NONDOMINATING;
1757           }
1758         if (e->src == bb)
1759           bb_reachable = true;
1760       }
1761
1762   free (bblocks);
1763   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1764 }
1765
1766 /* Return true if BB is part of the new pre-header that is created
1767    when threading the latch to DATA.  */
1768
1769 static bool
1770 def_split_header_continue_p (const_basic_block bb, const void *data)
1771 {
1772   const_basic_block new_header = (const_basic_block) data;
1773   const struct loop *l;
1774
1775   if (bb == new_header
1776       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1777     return false;
1778   for (l = bb->loop_father; l; l = loop_outer (l))
1779     if (l == new_header->loop_father)
1780       return true;
1781   return false;
1782 }
1783
1784 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1785    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1786    to the inside of the loop.  */
1787
1788 static bool
1789 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1790 {
1791   basic_block header = loop->header;
1792   edge e, tgt_edge, latch = loop_latch_edge (loop);
1793   edge_iterator ei;
1794   basic_block tgt_bb, atgt_bb;
1795   enum bb_dom_status domst;
1796
1797   /* We have already threaded through headers to exits, so all the threading
1798      requests now are to the inside of the loop.  We need to avoid creating
1799      irreducible regions (i.e., loops with more than one entry block), and
1800      also loop with several latch edges, or new subloops of the loop (although
1801      there are cases where it might be appropriate, it is difficult to decide,
1802      and doing it wrongly may confuse other optimizers).
1803
1804      We could handle more general cases here.  However, the intention is to
1805      preserve some information about the loop, which is impossible if its
1806      structure changes significantly, in a way that is not well understood.
1807      Thus we only handle few important special cases, in which also updating
1808      of the loop-carried information should be feasible:
1809
1810      1) Propagation of latch edge to a block that dominates the latch block
1811         of a loop.  This aims to handle the following idiom:
1812
1813         first = 1;
1814         while (1)
1815           {
1816             if (first)
1817               initialize;
1818             first = 0;
1819             body;
1820           }
1821
1822         After threading the latch edge, this becomes
1823
1824         first = 1;
1825         if (first)
1826           initialize;
1827         while (1)
1828           {
1829             first = 0;
1830             body;
1831           }
1832
1833         The original header of the loop is moved out of it, and we may thread
1834         the remaining edges through it without further constraints.
1835
1836      2) All entry edges are propagated to a single basic block that dominates
1837         the latch block of the loop.  This aims to handle the following idiom
1838         (normally created for "for" loops):
1839
1840         i = 0;
1841         while (1)
1842           {
1843             if (i >= 100)
1844               break;
1845             body;
1846             i++;
1847           }
1848
1849         This becomes
1850
1851         i = 0;
1852         while (1)
1853           {
1854             body;
1855             i++;
1856             if (i >= 100)
1857               break;
1858           }
1859      */
1860
1861   /* Threading through the header won't improve the code if the header has just
1862      one successor.  */
1863   if (single_succ_p (header))
1864     goto fail;
1865
1866   /* If we threaded the latch using a joiner block, we cancel the
1867      threading opportunity out of an abundance of caution.  However,
1868      still allow threading from outside to inside the loop.  */
1869   if (latch->aux)
1870     {
1871       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1872       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1873         {
1874           delete_jump_thread_path (path);
1875           latch->aux = NULL;
1876         }
1877     }
1878
1879   if (latch->aux)
1880     {
1881       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1882       tgt_edge = (*path)[1]->e;
1883       tgt_bb = tgt_edge->dest;
1884     }
1885   else if (!may_peel_loop_headers
1886            && !redirection_block_p (loop->header))
1887     goto fail;
1888   else
1889     {
1890       tgt_bb = NULL;
1891       tgt_edge = NULL;
1892       FOR_EACH_EDGE (e, ei, header->preds)
1893         {
1894           if (!e->aux)
1895             {
1896               if (e == latch)
1897                 continue;
1898
1899               /* If latch is not threaded, and there is a header
1900                  edge that is not threaded, we would create loop
1901                  with multiple entries.  */
1902               goto fail;
1903             }
1904
1905           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1906
1907           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1908             goto fail;
1909           tgt_edge = (*path)[1]->e;
1910           atgt_bb = tgt_edge->dest;
1911           if (!tgt_bb)
1912             tgt_bb = atgt_bb;
1913           /* Two targets of threading would make us create loop
1914              with multiple entries.  */
1915           else if (tgt_bb != atgt_bb)
1916             goto fail;
1917         }
1918
1919       if (!tgt_bb)
1920         {
1921           /* There are no threading requests.  */
1922           return false;
1923         }
1924
1925       /* Redirecting to empty loop latch is useless.  */
1926       if (tgt_bb == loop->latch
1927           && empty_block_p (loop->latch))
1928         goto fail;
1929     }
1930
1931   /* The target block must dominate the loop latch, otherwise we would be
1932      creating a subloop.  */
1933   domst = determine_bb_domination_status (loop, tgt_bb);
1934   if (domst == DOMST_NONDOMINATING)
1935     goto fail;
1936   if (domst == DOMST_LOOP_BROKEN)
1937     {
1938       /* If the loop ceased to exist, mark it as such, and thread through its
1939          original header.  */
1940       mark_loop_for_removal (loop);
1941       return thread_block (header, false);
1942     }
1943
1944   if (tgt_bb->loop_father->header == tgt_bb)
1945     {
1946       /* If the target of the threading is a header of a subloop, we need
1947          to create a preheader for it, so that the headers of the two loops
1948          do not merge.  */
1949       if (EDGE_COUNT (tgt_bb->preds) > 2)
1950         {
1951           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1952           gcc_assert (tgt_bb != NULL);
1953         }
1954       else
1955         tgt_bb = split_edge (tgt_edge);
1956     }
1957
1958   if (latch->aux)
1959     {
1960       basic_block *bblocks;
1961       unsigned nblocks, i;
1962
1963       /* First handle the case latch edge is redirected.  We are copying
1964          the loop header but not creating a multiple entry loop.  Make the
1965          cfg manipulation code aware of that fact.  */
1966       set_loop_copy (loop, loop);
1967       loop->latch = thread_single_edge (latch);
1968       set_loop_copy (loop, NULL);
1969       gcc_assert (single_succ (loop->latch) == tgt_bb);
1970       loop->header = tgt_bb;
1971
1972       /* Remove the new pre-header blocks from our loop.  */
1973       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1974       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1975                                     bblocks, loop->num_nodes, tgt_bb);
1976       for (i = 0; i < nblocks; i++)
1977         if (bblocks[i]->loop_father == loop)
1978           {
1979             remove_bb_from_loops (bblocks[i]);
1980             add_bb_to_loop (bblocks[i], loop_outer (loop));
1981           }
1982       free (bblocks);
1983
1984       /* If the new header has multiple latches mark it so.  */
1985       FOR_EACH_EDGE (e, ei, loop->header->preds)
1986         if (e->src->loop_father == loop
1987             && e->src != loop->latch)
1988           {
1989             loop->latch = NULL;
1990             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1991           }
1992
1993       /* Cancel remaining threading requests that would make the
1994          loop a multiple entry loop.  */
1995       FOR_EACH_EDGE (e, ei, header->preds)
1996         {
1997           edge e2;
1998
1999           if (e->aux == NULL)
2000             continue;
2001
2002           vec<jump_thread_edge *> *path = THREAD_PATH (e);
2003           e2 = path->last ()->e;
2004
2005           if (e->src->loop_father != e2->dest->loop_father
2006               && e2->dest != loop->header)
2007             {
2008               delete_jump_thread_path (path);
2009               e->aux = NULL;
2010             }
2011         }
2012
2013       /* Thread the remaining edges through the former header.  */
2014       thread_block (header, false);
2015     }
2016   else
2017     {
2018       basic_block new_preheader;
2019
2020       /* Now consider the case entry edges are redirected to the new entry
2021          block.  Remember one entry edge, so that we can find the new
2022          preheader (its destination after threading).  */
2023       FOR_EACH_EDGE (e, ei, header->preds)
2024         {
2025           if (e->aux)
2026             break;
2027         }
2028
2029       /* The duplicate of the header is the new preheader of the loop.  Ensure
2030          that it is placed correctly in the loop hierarchy.  */
2031       set_loop_copy (loop, loop_outer (loop));
2032
2033       thread_block (header, false);
2034       set_loop_copy (loop, NULL);
2035       new_preheader = e->dest;
2036
2037       /* Create the new latch block.  This is always necessary, as the latch
2038          must have only a single successor, but the original header had at
2039          least two successors.  */
2040       loop->latch = NULL;
2041       mfb_kj_edge = single_succ_edge (new_preheader);
2042       loop->header = mfb_kj_edge->dest;
2043       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2044       loop->header = latch->dest;
2045       loop->latch = latch->src;
2046     }
2047
2048   return true;
2049
2050 fail:
2051   /* We failed to thread anything.  Cancel the requests.  */
2052   FOR_EACH_EDGE (e, ei, header->preds)
2053     {
2054       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2055
2056       if (path)
2057         {
2058           delete_jump_thread_path (path);
2059           e->aux = NULL;
2060         }
2061     }
2062   return false;
2063 }
2064
2065 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2066    PHI arguments associated with those edges are equal or there are no
2067    PHI arguments, otherwise return FALSE.  */
2068
2069 static bool
2070 phi_args_equal_on_edges (edge e1, edge e2)
2071 {
2072   gphi_iterator gsi;
2073   int indx1 = e1->dest_idx;
2074   int indx2 = e2->dest_idx;
2075
2076   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2077     {
2078       gphi *phi = gsi.phi ();
2079
2080       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2081                             gimple_phi_arg_def (phi, indx2), 0))
2082         return false;
2083     }
2084   return true;
2085 }
2086
2087 /* Walk through the registered jump threads and convert them into a
2088    form convenient for this pass.
2089
2090    Any block which has incoming edges threaded to outgoing edges
2091    will have its entry in THREADED_BLOCK set.
2092
2093    Any threaded edge will have its new outgoing edge stored in the
2094    original edge's AUX field.
2095
2096    This form avoids the need to walk all the edges in the CFG to
2097    discover blocks which need processing and avoids unnecessary
2098    hash table lookups to map from threaded edge to new target.  */
2099
2100 static void
2101 mark_threaded_blocks (bitmap threaded_blocks)
2102 {
2103   unsigned int i;
2104   bitmap_iterator bi;
2105   bitmap tmp = BITMAP_ALLOC (NULL);
2106   basic_block bb;
2107   edge e;
2108   edge_iterator ei;
2109
2110   /* It is possible to have jump threads in which one is a subpath
2111      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2112      block and (B, C), (C, D) where no joiner block exists.
2113
2114      When this occurs ignore the jump thread request with the joiner
2115      block.  It's totally subsumed by the simpler jump thread request.
2116
2117      This results in less block copying, simpler CFGs.  More importantly,
2118      when we duplicate the joiner block, B, in this case we will create
2119      a new threading opportunity that we wouldn't be able to optimize
2120      until the next jump threading iteration.
2121
2122      So first convert the jump thread requests which do not require a
2123      joiner block.  */
2124   for (i = 0; i < paths.length (); i++)
2125     {
2126       vec<jump_thread_edge *> *path = paths[i];
2127
2128       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2129         {
2130           edge e = (*path)[0]->e;
2131           e->aux = (void *)path;
2132           bitmap_set_bit (tmp, e->dest->index);
2133         }
2134     }
2135
2136   /* Now iterate again, converting cases where we want to thread
2137      through a joiner block, but only if no other edge on the path
2138      already has a jump thread attached to it.  We do this in two passes,
2139      to avoid situations where the order in the paths vec can hide overlapping
2140      threads (the path is recorded on the incoming edge, so we would miss
2141      cases where the second path starts at a downstream edge on the same
2142      path).  First record all joiner paths, deleting any in the unexpected
2143      case where there is already a path for that incoming edge.  */
2144   for (i = 0; i < paths.length (); i++)
2145     {
2146       vec<jump_thread_edge *> *path = paths[i];
2147
2148       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2149         {
2150           /* Attach the path to the starting edge if none is yet recorded.  */
2151           if ((*path)[0]->e->aux == NULL)
2152             (*path)[0]->e->aux = path;
2153           else if (dump_file && (dump_flags & TDF_DETAILS))
2154             dump_jump_thread_path (dump_file, *path, false);
2155         }
2156     }
2157   /* Second, look for paths that have any other jump thread attached to
2158      them, and either finish converting them or cancel them.  */
2159   for (i = 0; i < paths.length (); i++)
2160     {
2161       vec<jump_thread_edge *> *path = paths[i];
2162       edge e = (*path)[0]->e;
2163
2164       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2165         {
2166           unsigned int j;
2167           for (j = 1; j < path->length (); j++)
2168             if ((*path)[j]->e->aux != NULL)
2169               break;
2170
2171           /* If we iterated through the entire path without exiting the loop,
2172              then we are good to go, record it.  */
2173           if (j == path->length ())
2174             bitmap_set_bit (tmp, e->dest->index);
2175           else
2176             {
2177               e->aux = NULL;
2178               if (dump_file && (dump_flags & TDF_DETAILS))
2179                 dump_jump_thread_path (dump_file, *path, false);
2180             }
2181         }
2182     }
2183
2184   /* If optimizing for size, only thread through block if we don't have
2185      to duplicate it or it's an otherwise empty redirection block.  */
2186   if (optimize_function_for_size_p (cfun))
2187     {
2188       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2189         {
2190           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2191           if (EDGE_COUNT (bb->preds) > 1
2192               && !redirection_block_p (bb))
2193             {
2194               FOR_EACH_EDGE (e, ei, bb->preds)
2195                 {
2196                   if (e->aux)
2197                     {
2198                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2199                       delete_jump_thread_path (path);
2200                       e->aux = NULL;
2201                     }
2202                 }
2203             }
2204           else
2205             bitmap_set_bit (threaded_blocks, i);
2206         }
2207     }
2208   else
2209     bitmap_copy (threaded_blocks, tmp);
2210
2211   /* Look for jump threading paths which cross multiple loop headers.
2212
2213      The code to thread through loop headers will change the CFG in ways
2214      that break assumptions made by the loop optimization code.
2215
2216      We don't want to blindly cancel the requests.  We can instead do better
2217      by trimming off the end of the jump thread path.  */
2218   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2219     {
2220       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2221       FOR_EACH_EDGE (e, ei, bb->preds)
2222         {
2223           if (e->aux)
2224             {
2225               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2226
2227               for (unsigned int i = 0, crossed_headers = 0;
2228                    i < path->length ();
2229                    i++)
2230                 {
2231                   basic_block dest = (*path)[i]->e->dest;
2232                   crossed_headers += (dest == dest->loop_father->header);
2233                   if (crossed_headers > 1)
2234                     {
2235                       /* Trim from entry I onwards.  */
2236                       for (unsigned int j = i; j < path->length (); j++)
2237                         delete (*path)[j];
2238                       path->truncate (i);
2239
2240                       /* Now that we've truncated the path, make sure
2241                          what's left is still valid.   We need at least
2242                          two edges on the path and the last edge can not
2243                          be a joiner.  This should never happen, but let's
2244                          be safe.  */
2245                       if (path->length () < 2
2246                           || (path->last ()->type
2247                               == EDGE_COPY_SRC_JOINER_BLOCK))
2248                         {
2249                           delete_jump_thread_path (path);
2250                           e->aux = NULL;
2251                         }
2252                       break;
2253                     }
2254                 }
2255             }
2256         }
2257     }
2258
2259   /* If we have a joiner block (J) which has two successors S1 and S2 and
2260      we are threading though S1 and the final destination of the thread
2261      is S2, then we must verify that any PHI nodes in S2 have the same
2262      PHI arguments for the edge J->S2 and J->S1->...->S2.
2263
2264      We used to detect this prior to registering the jump thread, but
2265      that prohibits propagation of edge equivalences into non-dominated
2266      PHI nodes as the equivalency test might occur before propagation.
2267
2268      This must also occur after we truncate any jump threading paths
2269      as this scenario may only show up after truncation.
2270
2271      This works for now, but will need improvement as part of the FSA
2272      optimization.
2273
2274      Note since we've moved the thread request data to the edges,
2275      we have to iterate on those rather than the threaded_edges vector.  */
2276   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2277     {
2278       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2279       FOR_EACH_EDGE (e, ei, bb->preds)
2280         {
2281           if (e->aux)
2282             {
2283               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2284               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2285
2286               if (have_joiner)
2287                 {
2288                   basic_block joiner = e->dest;
2289                   edge final_edge = path->last ()->e;
2290                   basic_block final_dest = final_edge->dest;
2291                   edge e2 = find_edge (joiner, final_dest);
2292
2293                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2294                     {
2295                       delete_jump_thread_path (path);
2296                       e->aux = NULL;
2297                     }
2298                 }
2299             }
2300         }
2301     }
2302
2303   BITMAP_FREE (tmp);
2304 }
2305
2306
2307 /* Return TRUE if BB ends with a switch statement or a computed goto.
2308    Otherwise return false.  */
2309 static bool
2310 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2311 {
2312   gimple stmt = last_stmt (bb);
2313   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2314     return true;
2315   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2316       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2317     return true;
2318   return false;
2319 }
2320
2321 /* Verify that the REGION is a Single Entry Multiple Exits region: make sure no
2322    edge other than ENTRY is entering the REGION.  */
2323
2324 DEBUG_FUNCTION void
2325 verify_seme (edge entry, basic_block *region, unsigned n_region)
2326 {
2327   bitmap bbs = BITMAP_ALLOC (NULL);
2328
2329   for (unsigned i = 0; i < n_region; i++)
2330     bitmap_set_bit (bbs, region[i]->index);
2331
2332   for (unsigned i = 0; i < n_region; i++)
2333     {
2334       edge e;
2335       edge_iterator ei;
2336       basic_block bb = region[i];
2337
2338       /* All predecessors other than ENTRY->src should be in the region.  */
2339       for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); ei_next (&ei))
2340         if (e != entry)
2341           gcc_assert (bitmap_bit_p (bbs, e->src->index));
2342     }
2343
2344   BITMAP_FREE (bbs);
2345 }
2346
2347 /* Duplicates a Single Entry Multiple Exit REGION (set of N_REGION basic
2348    blocks).  The ENTRY edge is redirected to the duplicate of the region.  If
2349    REGION is not a Single Entry region, ignore any incoming edges other than
2350    ENTRY: this makes the copied region a Single Entry region.
2351
2352    Remove the last conditional statement in the last basic block in the REGION,
2353    and create a single fallthru edge pointing to the same destination as the
2354    EXIT edge.
2355
2356    The new basic blocks are stored to REGION_COPY in the same order as they had
2357    in REGION, provided that REGION_COPY is not NULL.
2358
2359    Returns false if it is unable to copy the region, true otherwise.  */
2360
2361 static bool
2362 duplicate_seme_region (edge entry, edge exit,
2363                        basic_block *region, unsigned n_region,
2364                        basic_block *region_copy)
2365 {
2366   unsigned i;
2367   bool free_region_copy = false, copying_header = false;
2368   struct loop *loop = entry->dest->loop_father;
2369   edge exit_copy;
2370   edge redirected;
2371   int total_freq = 0, entry_freq = 0;
2372   gcov_type total_count = 0, entry_count = 0;
2373
2374   if (!can_copy_bbs_p (region, n_region))
2375     return false;
2376
2377   /* Some sanity checking.  Note that we do not check for all possible
2378      missuses of the functions.  I.e. if you ask to copy something weird,
2379      it will work, but the state of structures probably will not be
2380      correct.  */
2381   for (i = 0; i < n_region; i++)
2382     {
2383       /* We do not handle subloops, i.e. all the blocks must belong to the
2384          same loop.  */
2385       if (region[i]->loop_father != loop)
2386         return false;
2387     }
2388
2389   initialize_original_copy_tables ();
2390
2391   if (copying_header)
2392     set_loop_copy (loop, loop_outer (loop));
2393   else
2394     set_loop_copy (loop, loop);
2395
2396   if (!region_copy)
2397     {
2398       region_copy = XNEWVEC (basic_block, n_region);
2399       free_region_copy = true;
2400     }
2401
2402   if (entry->dest->count)
2403     {
2404       total_count = entry->dest->count;
2405       entry_count = entry->count;
2406       /* Fix up corner cases, to avoid division by zero or creation of negative
2407          frequencies.  */
2408       if (entry_count > total_count)
2409         entry_count = total_count;
2410     }
2411   else
2412     {
2413       total_freq = entry->dest->frequency;
2414       entry_freq = EDGE_FREQUENCY (entry);
2415       /* Fix up corner cases, to avoid division by zero or creation of negative
2416          frequencies.  */
2417       if (total_freq == 0)
2418         total_freq = 1;
2419       else if (entry_freq > total_freq)
2420         entry_freq = total_freq;
2421     }
2422
2423   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2424             split_edge_bb_loc (entry), 0);
2425   if (total_count)
2426     {
2427       scale_bbs_frequencies_gcov_type (region, n_region,
2428                                        total_count - entry_count,
2429                                        total_count);
2430       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2431                                        total_count);
2432     }
2433   else
2434     {
2435       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2436                                  total_freq);
2437       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2438     }
2439
2440 #ifdef ENABLE_CHECKING
2441   /* Make sure no edge other than ENTRY is entering the copied region.  */
2442   verify_seme (entry, region_copy, n_region);
2443 #endif
2444
2445   /* Remove the last branch in the jump thread path.  */
2446   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2447   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2448
2449   if (e) {
2450     rescan_loop_exit (e, true, false);
2451     e->probability = REG_BR_PROB_BASE;
2452     e->count = region_copy[n_region - 1]->count;
2453   }
2454
2455   /* Redirect the entry and add the phi node arguments.  */
2456   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2457   gcc_assert (redirected != NULL);
2458   flush_pending_stmts (entry);
2459
2460   /* Add the other PHI node arguments.  */
2461   add_phi_args_after_copy (region_copy, n_region, NULL);
2462
2463   if (free_region_copy)
2464     free (region_copy);
2465
2466   free_original_copy_tables ();
2467   return true;
2468 }
2469
2470 /* Walk through all blocks and thread incoming edges to the appropriate
2471    outgoing edge for each edge pair recorded in THREADED_EDGES.
2472
2473    It is the caller's responsibility to fix the dominance information
2474    and rewrite duplicated SSA_NAMEs back into SSA form.
2475
2476    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2477    loop headers if it does not simplify the loop.
2478
2479    Returns true if one or more edges were threaded, false otherwise.  */
2480
2481 bool
2482 thread_through_all_blocks (bool may_peel_loop_headers)
2483 {
2484   bool retval = false;
2485   unsigned int i;
2486   bitmap_iterator bi;
2487   bitmap threaded_blocks;
2488   struct loop *loop;
2489
2490   if (!paths.exists ())
2491     return false;
2492
2493   threaded_blocks = BITMAP_ALLOC (NULL);
2494   memset (&thread_stats, 0, sizeof (thread_stats));
2495
2496   /* Jump-thread all FSM threads before other jump-threads.  */
2497   for (i = 0; i < paths.length ();)
2498     {
2499       vec<jump_thread_edge *> *path = paths[i];
2500       edge entry = (*path)[0]->e;
2501
2502       if ((*path)[0]->type != EDGE_FSM_THREAD
2503           /* Do not jump-thread twice from the same block.  */
2504           || bitmap_bit_p (threaded_blocks, entry->src->index)) {
2505         i++;
2506         continue;
2507       }
2508
2509       unsigned len = path->length ();
2510       edge exit = (*path)[len - 1]->e;
2511       basic_block *region = XNEWVEC (basic_block, len - 1);
2512
2513       for (unsigned int j = 0; j < len - 1; j++)
2514         region[j] = (*path)[j]->e->dest;
2515
2516       if (duplicate_seme_region (entry, exit, region, len - 1, NULL))
2517         {
2518           /* We do not update dominance info.  */
2519           free_dominance_info (CDI_DOMINATORS);
2520           bitmap_set_bit (threaded_blocks, entry->src->index);
2521           retval = true;
2522         }
2523
2524       delete_jump_thread_path (path);
2525       paths.unordered_remove (i);
2526     }
2527
2528   /* Remove from PATHS all the jump-threads starting with an edge already
2529      jump-threaded.  */
2530   for (i = 0; i < paths.length ();)
2531     {
2532       vec<jump_thread_edge *> *path = paths[i];
2533       edge entry = (*path)[0]->e;
2534
2535       /* Do not jump-thread twice from the same block.  */
2536       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2537         {
2538           delete_jump_thread_path (path);
2539           paths.unordered_remove (i);
2540         }
2541       else
2542         i++;
2543     }
2544
2545   bitmap_clear (threaded_blocks);
2546
2547   mark_threaded_blocks (threaded_blocks);
2548
2549   initialize_original_copy_tables ();
2550
2551   /* First perform the threading requests that do not affect
2552      loop structure.  */
2553   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2554     {
2555       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2556
2557       if (EDGE_COUNT (bb->preds) > 0)
2558         retval |= thread_block (bb, true);
2559     }
2560
2561   /* Then perform the threading through loop headers.  We start with the
2562      innermost loop, so that the changes in cfg we perform won't affect
2563      further threading.  */
2564   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2565     {
2566       if (!loop->header
2567           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2568         continue;
2569
2570       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2571     }
2572
2573   /* Any jump threading paths that are still attached to edges at this
2574      point must be one of two cases.
2575
2576      First, we could have a jump threading path which went from outside
2577      a loop to inside a loop that was ignored because a prior jump thread
2578      across a backedge was realized (which indirectly causes the loop
2579      above to ignore the latter thread).  We can detect these because the
2580      loop structures will be different and we do not currently try to
2581      optimize this case.
2582
2583      Second, we could be threading across a backedge to a point within the
2584      same loop.  This occurrs for the FSA/FSM optimization and we would
2585      like to optimize it.  However, we have to be very careful as this
2586      may completely scramble the loop structures, with the result being
2587      irreducible loops causing us to throw away our loop structure.
2588
2589      As a compromise for the latter case, if the thread path ends in
2590      a block where the last statement is a multiway branch, then go
2591      ahead and thread it, else ignore it.  */
2592   basic_block bb;
2593   edge e;
2594   FOR_EACH_BB_FN (bb, cfun)
2595     {
2596       /* If we do end up threading here, we can remove elements from
2597          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2598       for (edge_iterator ei = ei_start (bb->preds);
2599            (e = ei_safe_edge (ei));)
2600         if (e->aux)
2601           {
2602             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2603
2604             /* Case 1, threading from outside to inside the loop
2605                after we'd already threaded through the header.  */
2606             if ((*path)[0]->e->dest->loop_father
2607                 != path->last ()->e->src->loop_father)
2608               {
2609                 delete_jump_thread_path (path);
2610                 e->aux = NULL;
2611                 ei_next (&ei);
2612               }
2613            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2614               {
2615                 /* The code to thread through loop headers may have
2616                    split a block with jump threads attached to it.
2617
2618                    We can identify this with a disjoint jump threading
2619                    path.  If found, just remove it.  */
2620                 for (unsigned int i = 0; i < path->length () - 1; i++)
2621                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2622                     {
2623                       delete_jump_thread_path (path);
2624                       e->aux = NULL;
2625                       ei_next (&ei);
2626                       break;
2627                     }
2628
2629                 /* Our path is still valid, thread it.  */
2630                 if (e->aux)
2631                   {
2632                     if (thread_block ((*path)[0]->e->dest, false))
2633                       e->aux = NULL;
2634                     else
2635                       {
2636                         delete_jump_thread_path (path);
2637                         e->aux = NULL;
2638                         ei_next (&ei);
2639                       }
2640                   }
2641               }
2642            else
2643               {
2644                 delete_jump_thread_path (path);
2645                 e->aux = NULL;
2646                 ei_next (&ei);
2647               }
2648           }
2649         else
2650           ei_next (&ei);
2651     }
2652
2653   statistics_counter_event (cfun, "Jumps threaded",
2654                             thread_stats.num_threaded_edges);
2655
2656   free_original_copy_tables ();
2657
2658   BITMAP_FREE (threaded_blocks);
2659   threaded_blocks = NULL;
2660   paths.release ();
2661
2662   if (retval)
2663     loops_state_set (LOOPS_NEED_FIXUP);
2664
2665   return retval;
2666 }
2667
2668 /* Delete the jump threading path PATH.  We have to explcitly delete
2669    each entry in the vector, then the container.  */
2670
2671 void
2672 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2673 {
2674   for (unsigned int i = 0; i < path->length (); i++)
2675     delete (*path)[i];
2676   path->release();
2677   delete path;
2678 }
2679
2680 /* Register a jump threading opportunity.  We queue up all the jump
2681    threading opportunities discovered by a pass and update the CFG
2682    and SSA form all at once.
2683
2684    E is the edge we can thread, E2 is the new target edge, i.e., we
2685    are effectively recording that E->dest can be changed to E2->dest
2686    after fixing the SSA graph.  */
2687
2688 void
2689 register_jump_thread (vec<jump_thread_edge *> *path)
2690 {
2691   if (!dbg_cnt (registered_jump_thread))
2692     {
2693       delete_jump_thread_path (path);
2694       return;
2695     }
2696
2697   /* First make sure there are no NULL outgoing edges on the jump threading
2698      path.  That can happen for jumping to a constant address.  */
2699   for (unsigned int i = 0; i < path->length (); i++)
2700     if ((*path)[i]->e == NULL)
2701       {
2702         if (dump_file && (dump_flags & TDF_DETAILS))
2703           {
2704             fprintf (dump_file,
2705                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2706             dump_jump_thread_path (dump_file, *path, false);
2707           }
2708
2709         delete_jump_thread_path (path);
2710         return;
2711       }
2712
2713   if (dump_file && (dump_flags & TDF_DETAILS))
2714     dump_jump_thread_path (dump_file, *path, true);
2715
2716   if (!paths.exists ())
2717     paths.create (5);
2718
2719   paths.safe_push (path);
2720 }