gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "alias.h"
  24 #include "symtab.h"
  25 #include "options.h"
  26 #include "tree.h"
  27 #include "fold-const.h"
  28 #include "flags.h"
  29 #include "predict.h"
  30 #include "tm.h"
  31 #include "hard-reg-set.h"
  32 #include "function.h"
  33 #include "dominance.h"
  34 #include "cfg.h"
  35 #include "cfganal.h"
  36 #include "basic-block.h"
  37 #include "tree-ssa-alias.h"
  38 #include "internal-fn.h"
  39 #include "gimple-expr.h"
  40 #include "gimple.h"
  41 #include "gimple-iterator.h"
  42 #include "gimple-ssa.h"
  43 #include "tree-phinodes.h"
  44 #include "tree-ssa.h"
  45 #include "tree-ssa-threadupdate.h"
  46 #include "ssa-iterators.h"
  47 #include "dumpfile.h"
  48 #include "cfgloop.h"
  49 #include "dbgcnt.h"
  50 #include "tree-cfg.h"
  51 #include "tree-pass.h"
  52
  53 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  54    one or more in-edges to B to instead reach the destination of an
  55    out-edge from B while preserving any side effects in B.
  56
  57    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  58    side effects of executing B.
  59
  60      1. Make a copy of B (including its outgoing edges and statements).  Call
  61         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  62
  63      2. Remove the control statement at the end of B' and all outgoing edges
  64         except B'->C.
  65
  66      3. Add a new argument to each PHI in C with the same value as the existing
  67         argument associated with edge B->C.  Associate the new PHI arguments
  68         with the edge B'->C.
  69
  70      4. For each PHI in B, find or create a PHI in B' with an identical
  71         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  72         value as the PHI in B associated with the edge A->B.  Associate
  73         the new argument in the PHI in B' with the edge A->B.
  74
  75      5. Change the edge A->B to A->B'.
  76
  77         5a. This automatically deletes any PHI arguments associated with the
  78             edge A->B in B.
  79
  80         5b. This automatically associates each new argument added in step 4
  81             with the edge A->B'.
  82
  83      6. Repeat for other incoming edges into B.
  84
  85      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  86
  87    Note that block duplication can be minimized by first collecting the
  88    set of unique destination blocks that the incoming edges should
  89    be threaded to.
  90
  91    We reduce the number of edges and statements we create by not copying all
  92    the outgoing edges and the control statement in step #1.  We instead create
  93    a template block without the outgoing edges and duplicate the template.
  94
  95    Another case this code handles is threading through a "joiner" block.  In
  96    this case, we do not know the destination of the joiner block, but one
  97    of the outgoing edges from the joiner block leads to a threadable path.  This
  98    case largely works as outlined above, except the duplicate of the joiner
  99    block still contains a full set of outgoing edges and its control statement.
 100    We just redirect one of its outgoing edges to our jump threading path.  */
 101
 102
 103 /* Steps #5 and #6 of the above algorithm are best implemented by walking
 104    all the incoming edges which thread to the same destination edge at
 105    the same time.  That avoids lots of table lookups to get information
 106    for the destination edge.
 107
 108    To realize that implementation we create a list of incoming edges
 109    which thread to the same outgoing edge.  Thus to implement steps
 110    #5 and #6 we traverse our hash table of outgoing edge information.
 111    For each entry we walk the list of incoming edges which thread to
 112    the current outgoing edge.  */
 113
 114 struct el
 115 {
 116   edge e;
 117   struct el *next;
 118 };
 119
 120 /* Main data structure recording information regarding B's duplicate
 121    blocks.  */
 122
 123 /* We need to efficiently record the unique thread destinations of this
 124    block and specific information associated with those destinations.  We
 125    may have many incoming edges threaded to the same outgoing edge.  This
 126    can be naturally implemented with a hash table.  */
 127
 128 struct redirection_data : free_ptr_hash<redirection_data>
 129 {
 130   /* We support wiring up two block duplicates in a jump threading path.
 131
 132      One is a normal block copy where we remove the control statement
 133      and wire up its single remaining outgoing edge to the thread path.
 134
 135      The other is a joiner block where we leave the control statement
 136      in place, but wire one of the outgoing edges to a thread path.
 137
 138      In theory we could have multiple block duplicates in a jump
 139      threading path, but I haven't tried that.
 140
 141      The duplicate blocks appear in this array in the same order in
 142      which they appear in the jump thread path.  */
 143   basic_block dup_blocks[2];
 144
 145   /* The jump threading path.  */
 146   vec<jump_thread_edge *> *path;
 147
 148   /* A list of incoming edges which we want to thread to the
 149      same path.  */
 150   struct el *incoming_edges;
 151
 152   /* hash_table support.  */
 153   static inline hashval_t hash (const redirection_data *);
 154   static inline int equal (const redirection_data *, const redirection_data *);
 155 };
 156
 157 /* Dump a jump threading path, including annotations about each
 158    edge in the path.  */
 159
 160 static void
 161 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 162                        bool registering)
 163 {
 164   fprintf (dump_file,
 165            "  %s%s jump thread: (%d, %d) incoming edge; ",
 166            (registering ? "Registering" : "Cancelling"),
 167            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 168            path[0]->e->src->index, path[0]->e->dest->index);
 169
 170   for (unsigned int i = 1; i < path.length (); i++)
 171     {
 172       /* We can get paths with a NULL edge when the final destination
 173          of a jump thread turns out to be a constant address.  We dump
 174          those paths when debugging, so we have to be prepared for that
 175          possibility here.  */
 176       if (path[i]->e == NULL)
 177         continue;
 178
 179       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 180         fprintf (dump_file, " (%d, %d) joiner; ",
 181                  path[i]->e->src->index, path[i]->e->dest->index);
 182       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 183        fprintf (dump_file, " (%d, %d) normal;",
 184                  path[i]->e->src->index, path[i]->e->dest->index);
 185       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 186        fprintf (dump_file, " (%d, %d) nocopy;",
 187                  path[i]->e->src->index, path[i]->e->dest->index);
 188       if (path[0]->type == EDGE_FSM_THREAD)
 189         fprintf (dump_file, " (%d, %d) ",
 190                  path[i]->e->src->index, path[i]->e->dest->index);
 191     }
 192   fputc ('\n', dump_file);
 193 }
 194
 195 /* Simple hashing function.  For any given incoming edge E, we're going
 196    to be most concerned with the final destination of its jump thread
 197    path.  So hash on the block index of the final edge in the path.  */
 198
 199 inline hashval_t
 200 redirection_data::hash (const redirection_data *p)
 201 {
 202   vec<jump_thread_edge *> *path = p->path;
 203   return path->last ()->e->dest->index;
 204 }
 205
 206 /* Given two hash table entries, return true if they have the same
 207    jump threading path.  */
 208 inline int
 209 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 210 {
 211   vec<jump_thread_edge *> *path1 = p1->path;
 212   vec<jump_thread_edge *> *path2 = p2->path;
 213
 214   if (path1->length () != path2->length ())
 215     return false;
 216
 217   for (unsigned int i = 1; i < path1->length (); i++)
 218     {
 219       if ((*path1)[i]->type != (*path2)[i]->type
 220           || (*path1)[i]->e != (*path2)[i]->e)
 221         return false;
 222     }
 223
 224   return true;
 225 }
 226
 227 /* Data structure of information to pass to hash table traversal routines.  */
 228 struct ssa_local_info_t
 229 {
 230   /* The current block we are working on.  */
 231   basic_block bb;
 232
 233   /* We only create a template block for the first duplicated block in a
 234      jump threading path as we may need many duplicates of that block.
 235
 236      The second duplicate block in a path is specific to that path.  Creating
 237      and sharing a template for that block is considerably more difficult.  */
 238   basic_block template_block;
 239
 240   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 241   bool jumps_threaded;
 242
 243   /* Blocks duplicated for the thread.  */
 244   bitmap duplicate_blocks;
 245 };
 246
 247 /* Passes which use the jump threading code register jump threading
 248    opportunities as they are discovered.  We keep the registered
 249    jump threading opportunities in this vector as edge pairs
 250    (original_edge, target_edge).  */
 251 static vec<vec<jump_thread_edge *> *> paths;
 252
 253 /* When we start updating the CFG for threading, data necessary for jump
 254    threading is attached to the AUX field for the incoming edge.  Use these
 255    macros to access the underlying structure attached to the AUX field.  */
 256 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 257
 258 /* Jump threading statistics.  */
 259
 260 struct thread_stats_d
 261 {
 262   unsigned long num_threaded_edges;
 263 };
 264
 265 struct thread_stats_d thread_stats;
 266
 267
 268 /* Remove the last statement in block BB if it is a control statement
 269    Also remove all outgoing edges except the edge which reaches DEST_BB.
 270    If DEST_BB is NULL, then remove all outgoing edges.  */
 271
 272 static void
 273 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 274 {
 275   gimple_stmt_iterator gsi;
 276   edge e;
 277   edge_iterator ei;
 278
 279   gsi = gsi_last_bb (bb);
 280
 281   /* If the duplicate ends with a control statement, then remove it.
 282
 283      Note that if we are duplicating the template block rather than the
 284      original basic block, then the duplicate might not have any real
 285      statements in it.  */
 286   if (!gsi_end_p (gsi)
 287       && gsi_stmt (gsi)
 288       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 289           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 290           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 291     gsi_remove (&gsi, true);
 292
 293   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 294     {
 295       if (e->dest != dest_bb)
 296         remove_edge (e);
 297       else
 298         ei_next (&ei);
 299     }
 300 }
 301
 302 /* Create a duplicate of BB.  Record the duplicate block in an array
 303    indexed by COUNT stored in RD.  */
 304
 305 static void
 306 create_block_for_threading (basic_block bb,
 307                             struct redirection_data *rd,
 308                             unsigned int count,
 309                             bitmap *duplicate_blocks)
 310 {
 311   edge_iterator ei;
 312   edge e;
 313
 314   /* We can use the generic block duplication code and simply remove
 315      the stuff we do not need.  */
 316   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 317
 318   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 319     e->aux = NULL;
 320
 321   /* Zero out the profile, since the block is unreachable for now.  */
 322   rd->dup_blocks[count]->frequency = 0;
 323   rd->dup_blocks[count]->count = 0;
 324   if (duplicate_blocks)
 325     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 326 }
 327
 328 /* Main data structure to hold information for duplicates of BB.  */
 329
 330 static hash_table<redirection_data> *redirection_data;
 331
 332 /* Given an outgoing edge E lookup and return its entry in our hash table.
 333
 334    If INSERT is true, then we insert the entry into the hash table if
 335    it is not already present.  INCOMING_EDGE is added to the list of incoming
 336    edges associated with E in the hash table.  */
 337
 338 static struct redirection_data *
 339 lookup_redirection_data (edge e, enum insert_option insert)
 340 {
 341   struct redirection_data **slot;
 342   struct redirection_data *elt;
 343   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 344
 345  /* Build a hash table element so we can see if E is already
 346      in the table.  */
 347   elt = XNEW (struct redirection_data);
 348   elt->path = path;
 349   elt->dup_blocks[0] = NULL;
 350   elt->dup_blocks[1] = NULL;
 351   elt->incoming_edges = NULL;
 352
 353   slot = redirection_data->find_slot (elt, insert);
 354
 355   /* This will only happen if INSERT is false and the entry is not
 356      in the hash table.  */
 357   if (slot == NULL)
 358     {
 359       free (elt);
 360       return NULL;
 361     }
 362
 363   /* This will only happen if E was not in the hash table and
 364      INSERT is true.  */
 365   if (*slot == NULL)
 366     {
 367       *slot = elt;
 368       elt->incoming_edges = XNEW (struct el);
 369       elt->incoming_edges->e = e;
 370       elt->incoming_edges->next = NULL;
 371       return elt;
 372     }
 373   /* E was in the hash table.  */
 374   else
 375     {
 376       /* Free ELT as we do not need it anymore, we will extract the
 377          relevant entry from the hash table itself.  */
 378       free (elt);
 379
 380       /* Get the entry stored in the hash table.  */
 381       elt = *slot;
 382
 383       /* If insertion was requested, then we need to add INCOMING_EDGE
 384          to the list of incoming edges associated with E.  */
 385       if (insert)
 386         {
 387           struct el *el = XNEW (struct el);
 388           el->next = elt->incoming_edges;
 389           el->e = e;
 390           elt->incoming_edges = el;
 391         }
 392
 393       return elt;
 394     }
 395 }
 396
 397 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 398    does not have a value associated with it.  */
 399
 400 static void
 401 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 402 {
 403   int src_idx = src_e->dest_idx;
 404   int tgt_idx = tgt_e->dest_idx;
 405
 406   /* Iterate over each PHI in e->dest.  */
 407   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 408                            gsi2 = gsi_start_phis (tgt_e->dest);
 409        !gsi_end_p (gsi);
 410        gsi_next (&gsi), gsi_next (&gsi2))
 411     {
 412       gphi *src_phi = gsi.phi ();
 413       gphi *dest_phi = gsi2.phi ();
 414       tree val = gimple_phi_arg_def (src_phi, src_idx);
 415       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 416
 417       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 418       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 419     }
 420 }
 421
 422 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 423    to see if it has constant value in a flow sensitive manner.  Set
 424    LOCUS to location of the constant phi arg and return the value.
 425    Return DEF directly if either PATH or idx is ZERO.  */
 426
 427 static tree
 428 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 429                          basic_block bb, int idx, source_location *locus)
 430 {
 431   tree arg;
 432   gphi *def_phi;
 433   basic_block def_bb;
 434
 435   if (path == NULL || idx == 0)
 436     return def;
 437
 438   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 439   if (!def_phi)
 440     return def;
 441
 442   def_bb = gimple_bb (def_phi);
 443   /* Don't propagate loop invariants into deeper loops.  */
 444   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 445     return def;
 446
 447   /* Backtrack jump threading path from IDX to see if def has constant
 448      value.  */
 449   for (int j = idx - 1; j >= 0; j--)
 450     {
 451       edge e = (*path)[j]->e;
 452       if (e->dest == def_bb)
 453         {
 454           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 455           if (is_gimple_min_invariant (arg))
 456             {
 457               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 458               return arg;
 459             }
 460           break;
 461         }
 462     }
 463
 464   return def;
 465 }
 466
 467 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 468    Try to backtrack jump threading PATH from node IDX to see if the arg
 469    has constant value, copy constant value instead of argument itself
 470    if yes.  */
 471
 472 static void
 473 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 474                vec<jump_thread_edge *> *path, int idx)
 475 {
 476   gphi_iterator gsi;
 477   int src_indx = src_e->dest_idx;
 478
 479   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 480     {
 481       gphi *phi = gsi.phi ();
 482       tree def = gimple_phi_arg_def (phi, src_indx);
 483       source_location locus = gimple_phi_arg_location (phi, src_indx);
 484
 485       if (TREE_CODE (def) == SSA_NAME
 486           && !virtual_operand_p (gimple_phi_result (phi)))
 487         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 488
 489       add_phi_arg (phi, def, tgt_e, locus);
 490     }
 491 }
 492
 493 /* We have recently made a copy of ORIG_BB, including its outgoing
 494    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 495    ORIG_BB has a new argument associated with edge from NEW_BB to the
 496    successor.  Initialize the PHI argument so that it is equal to the PHI
 497    argument associated with the edge from ORIG_BB to the successor.
 498    PATH and IDX are used to check if the new PHI argument has constant
 499    value in a flow sensitive manner.  */
 500
 501 static void
 502 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 503                          vec<jump_thread_edge *> *path, int idx)
 504 {
 505   edge_iterator ei;
 506   edge e;
 507
 508   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 509     {
 510       edge e2 = find_edge (new_bb, e->dest);
 511       copy_phi_args (e->dest, e, e2, path, idx);
 512     }
 513 }
 514
 515 /* Given a duplicate block and its single destination (both stored
 516    in RD).  Create an edge between the duplicate and its single
 517    destination.
 518
 519    Add an additional argument to any PHI nodes at the single
 520    destination.  IDX is the start node in jump threading path
 521    we start to check to see if the new PHI argument has constant
 522    value along the jump threading path.  */
 523
 524 static void
 525 create_edge_and_update_destination_phis (struct redirection_data *rd,
 526                                          basic_block bb, int idx)
 527 {
 528   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 529
 530   rescan_loop_exit (e, true, false);
 531   e->probability = REG_BR_PROB_BASE;
 532   e->count = bb->count;
 533
 534   /* We used to copy the thread path here.  That was added in 2007
 535      and dutifully updated through the representation changes in 2013.
 536
 537      In 2013 we added code to thread from an interior node through
 538      the backedge to another interior node.  That runs after the code
 539      to thread through loop headers from outside the loop.
 540
 541      The latter may delete edges in the CFG, including those
 542      which appeared in the jump threading path we copied here.  Thus
 543      we'd end up using a dangling pointer.
 544
 545      After reviewing the 2007/2011 code, I can't see how anything
 546      depended on copying the AUX field and clearly copying the jump
 547      threading path is problematical due to embedded edge pointers.
 548      It has been removed.  */
 549   e->aux = NULL;
 550
 551   /* If there are any PHI nodes at the destination of the outgoing edge
 552      from the duplicate block, then we will need to add a new argument
 553      to them.  The argument should have the same value as the argument
 554      associated with the outgoing edge stored in RD.  */
 555   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 556 }
 557
 558 /* Look through PATH beginning at START and return TRUE if there are
 559    any additional blocks that need to be duplicated.  Otherwise,
 560    return FALSE.  */
 561 static bool
 562 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 563                                  unsigned int start)
 564 {
 565   for (unsigned int i = start + 1; i < path->length (); i++)
 566     {
 567       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 568           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 569         return true;
 570     }
 571   return false;
 572 }
 573
 574
 575 /* Compute the amount of profile count/frequency coming into the jump threading
 576    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 577    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 578    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 579    identify blocks duplicated for jump threading, which have duplicated
 580    edges that need to be ignored in the analysis.  Return true if path contains
 581    a joiner, false otherwise.
 582
 583    In the non-joiner case, this is straightforward - all the counts/frequency
 584    flowing into the jump threading path should flow through the duplicated
 585    block and out of the duplicated path.
 586
 587    In the joiner case, it is very tricky.  Some of the counts flowing into
 588    the original path go offpath at the joiner.  The problem is that while
 589    we know how much total count goes off-path in the original control flow,
 590    we don't know how many of the counts corresponding to just the jump
 591    threading path go offpath at the joiner.
 592
 593    For example, assume we have the following control flow and identified
 594    jump threading paths:
 595
 596                 A     B     C
 597                  \    |    /
 598                Ea \   |Eb / Ec
 599                    \  |  /
 600                     v v v
 601                       J       <-- Joiner
 602                      / \
 603                 Eoff/   \Eon
 604                    /     \
 605                   v       v
 606                 Soff     Son  <--- Normal
 607                          /\
 608                       Ed/  \ Ee
 609                        /    \
 610                       v     v
 611                       D      E
 612
 613             Jump threading paths: A -> J -> Son -> D (path 1)
 614                                   C -> J -> Son -> E (path 2)
 615
 616    Note that the control flow could be more complicated:
 617    - Each jump threading path may have more than one incoming edge.  I.e. A and
 618    Ea could represent multiple incoming blocks/edges that are included in
 619    path 1.
 620    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 621    before or after the "normal" copy block).  These are not duplicated onto
 622    the jump threading path, as they are single-successor.
 623    - Any of the blocks along the path may have other incoming edges that
 624    are not part of any jump threading path, but add profile counts along
 625    the path.
 626
 627    In the aboe example, after all jump threading is complete, we will
 628    end up with the following control flow:
 629
 630                 A         B         C
 631                 |         |         |
 632               Ea|         |Eb     |Ec
 633                 |         |         |
 634                 v         v         v
 635                Ja         J        Jc
 636                / \      / \Eon'     / \
 637           Eona/   \   ---/---\--------   \Eonc
 638              /     \ /  /     \    \
 639             v       v  v       v          v
 640            Sona     Soff      Son       Sonc
 641              \           /\      /
 642               \___________    /  \  _____/
 643                           \  /    \/
 644                            vv      v
 645                             D      E
 646
 647    The main issue to notice here is that when we are processing path 1
 648    (A->J->Son->D) we need to figure out the outgoing edge weights to
 649    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 650    sum of the incoming weights to D remain Ed.  The problem with simply
 651    assuming that Ja (and Jc when processing path 2) has the same outgoing
 652    probabilities to its successors as the original block J, is that after
 653    all paths are processed and other edges/counts removed (e.g. none
 654    of Ec will reach D after processing path 2), we may end up with not
 655    enough count flowing along duplicated edge Sona->D.
 656
 657    Therefore, in the case of a joiner, we keep track of all counts
 658    coming in along the current path, as well as from predecessors not
 659    on any jump threading path (Eb in the above example).  While we
 660    first assume that the duplicated Eona for Ja->Sona has the same
 661    probability as the original, we later compensate for other jump
 662    threading paths that may eliminate edges.  We do that by keep track
 663    of all counts coming into the original path that are not in a jump
 664    thread (Eb in the above example, but as noted earlier, there could
 665    be other predecessors incoming to the path at various points, such
 666    as at Son).  Call this cumulative non-path count coming into the path
 667    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 668    least as big as (Ed - Enonpath), but no bigger than the minimum
 669    weight along the jump threading path.  The probabilities of both the
 670    original and duplicated joiner block J and Ja will be adjusted
 671    accordingly after the updates.  */
 672
 673 static bool
 674 compute_path_counts (struct redirection_data *rd,
 675                      ssa_local_info_t *local_info,
 676                      gcov_type *path_in_count_ptr,
 677                      gcov_type *path_out_count_ptr,
 678                      int *path_in_freq_ptr)
 679 {
 680   edge e = rd->incoming_edges->e;
 681   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 682   edge elast = path->last ()->e;
 683   gcov_type nonpath_count = 0;
 684   bool has_joiner = false;
 685   gcov_type path_in_count = 0;
 686   int path_in_freq = 0;
 687
 688   /* Start by accumulating incoming edge counts to the path's first bb
 689      into a couple buckets:
 690         path_in_count: total count of incoming edges that flow into the
 691                   current path.
 692         nonpath_count: total count of incoming edges that are not
 693                   flowing along *any* path.  These are the counts
 694                   that will still flow along the original path after
 695                   all path duplication is done by potentially multiple
 696                   calls to this routine.
 697      (any other incoming edge counts are for a different jump threading
 698      path that will be handled by a later call to this routine.)
 699      To make this easier, start by recording all incoming edges that flow into
 700      the current path in a bitmap.  We could add up the path's incoming edge
 701      counts here, but we still need to walk all the first bb's incoming edges
 702      below to add up the counts of the other edges not included in this jump
 703      threading path.  */
 704   struct el *next, *el;
 705   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 706   for (el = rd->incoming_edges; el; el = next)
 707     {
 708       next = el->next;
 709       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 710     }
 711   edge ein;
 712   edge_iterator ei;
 713   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 714     {
 715       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 716       /* Simply check the incoming edge src against the set captured above.  */
 717       if (ein_path
 718           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 719         {
 720           /* It is necessary but not sufficient that the last path edges
 721              are identical.  There may be different paths that share the
 722              same last path edge in the case where the last edge has a nocopy
 723              source block.  */
 724           gcc_assert (ein_path->last ()->e == elast);
 725           path_in_count += ein->count;
 726           path_in_freq += EDGE_FREQUENCY (ein);
 727         }
 728       else if (!ein_path)
 729         {
 730           /* Keep track of the incoming edges that are not on any jump-threading
 731              path.  These counts will still flow out of original path after all
 732              jump threading is complete.  */
 733             nonpath_count += ein->count;
 734         }
 735     }
 736
 737   /* This is needed due to insane incoming frequencies.  */
 738   if (path_in_freq > BB_FREQ_MAX)
 739     path_in_freq = BB_FREQ_MAX;
 740
 741   BITMAP_FREE (in_edge_srcs);
 742
 743   /* Now compute the fraction of the total count coming into the first
 744      path bb that is from the current threading path.  */
 745   gcov_type total_count = e->dest->count;
 746   /* Handle incoming profile insanities.  */
 747   if (total_count < path_in_count)
 748     path_in_count = total_count;
 749   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 750
 751   /* Walk the entire path to do some more computation in order to estimate
 752      how much of the path_in_count will flow out of the duplicated threading
 753      path.  In the non-joiner case this is straightforward (it should be
 754      the same as path_in_count, although we will handle incoming profile
 755      insanities by setting it equal to the minimum count along the path).
 756
 757      In the joiner case, we need to estimate how much of the path_in_count
 758      will stay on the threading path after the joiner's conditional branch.
 759      We don't really know for sure how much of the counts
 760      associated with this path go to each successor of the joiner, but we'll
 761      estimate based on the fraction of the total count coming into the path
 762      bb was from the threading paths (computed above in onpath_scale).
 763      Afterwards, we will need to do some fixup to account for other threading
 764      paths and possible profile insanities.
 765
 766      In order to estimate the joiner case's counts we also need to update
 767      nonpath_count with any additional counts coming into the path.  Other
 768      blocks along the path may have additional predecessors from outside
 769      the path.  */
 770   gcov_type path_out_count = path_in_count;
 771   gcov_type min_path_count = path_in_count;
 772   for (unsigned int i = 1; i < path->length (); i++)
 773     {
 774       edge epath = (*path)[i]->e;
 775       gcov_type cur_count = epath->count;
 776       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 777         {
 778           has_joiner = true;
 779           cur_count = apply_probability (cur_count, onpath_scale);
 780         }
 781       /* In the joiner case we need to update nonpath_count for any edges
 782          coming into the path that will contribute to the count flowing
 783          into the path successor.  */
 784       if (has_joiner && epath != elast)
 785       {
 786         /* Look for other incoming edges after joiner.  */
 787         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 788           {
 789             if (ein != epath
 790                 /* Ignore in edges from blocks we have duplicated for a
 791                    threading path, which have duplicated edge counts until
 792                    they are redirected by an invocation of this routine.  */
 793                 && !bitmap_bit_p (local_info->duplicate_blocks,
 794                                   ein->src->index))
 795               nonpath_count += ein->count;
 796           }
 797       }
 798       if (cur_count < path_out_count)
 799         path_out_count = cur_count;
 800       if (epath->count < min_path_count)
 801         min_path_count = epath->count;
 802     }
 803
 804   /* We computed path_out_count above assuming that this path targeted
 805      the joiner's on-path successor with the same likelihood as it
 806      reached the joiner.  However, other thread paths through the joiner
 807      may take a different path through the normal copy source block
 808      (i.e. they have a different elast), meaning that they do not
 809      contribute any counts to this path's elast.  As a result, it may
 810      turn out that this path must have more count flowing to the on-path
 811      successor of the joiner.  Essentially, all of this path's elast
 812      count must be contributed by this path and any nonpath counts
 813      (since any path through the joiner with a different elast will not
 814      include a copy of this elast in its duplicated path).
 815      So ensure that this path's path_out_count is at least the
 816      difference between elast->count and nonpath_count.  Otherwise the edge
 817      counts after threading will not be sane.  */
 818   if (has_joiner && path_out_count < elast->count - nonpath_count)
 819   {
 820     path_out_count = elast->count - nonpath_count;
 821     /* But neither can we go above the minimum count along the path
 822        we are duplicating.  This can be an issue due to profile
 823        insanities coming in to this pass.  */
 824     if (path_out_count > min_path_count)
 825       path_out_count = min_path_count;
 826   }
 827
 828   *path_in_count_ptr = path_in_count;
 829   *path_out_count_ptr = path_out_count;
 830   *path_in_freq_ptr = path_in_freq;
 831   return has_joiner;
 832 }
 833
 834
 835 /* Update the counts and frequencies for both an original path
 836    edge EPATH and its duplicate EDUP.  The duplicate source block
 837    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 838    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 839 static void
 840 update_profile (edge epath, edge edup, gcov_type path_in_count,
 841                 gcov_type path_out_count, int path_in_freq)
 842 {
 843
 844   /* First update the duplicated block's count / frequency.  */
 845   if (edup)
 846     {
 847       basic_block dup_block = edup->src;
 848       gcc_assert (dup_block->count == 0);
 849       gcc_assert (dup_block->frequency == 0);
 850       dup_block->count = path_in_count;
 851       dup_block->frequency = path_in_freq;
 852     }
 853
 854   /* Now update the original block's count and frequency in the
 855      opposite manner - remove the counts/freq that will flow
 856      into the duplicated block.  Handle underflow due to precision/
 857      rounding issues.  */
 858   epath->src->count -= path_in_count;
 859   if (epath->src->count < 0)
 860     epath->src->count = 0;
 861   epath->src->frequency -= path_in_freq;
 862   if (epath->src->frequency < 0)
 863     epath->src->frequency = 0;
 864
 865   /* Next update this path edge's original and duplicated counts.  We know
 866      that the duplicated path will have path_out_count flowing
 867      out of it (in the joiner case this is the count along the duplicated path
 868      out of the duplicated joiner).  This count can then be removed from the
 869      original path edge.  */
 870   if (edup)
 871     edup->count = path_out_count;
 872   epath->count -= path_out_count;
 873   gcc_assert (epath->count >= 0);
 874 }
 875
 876
 877 /* The duplicate and original joiner blocks may end up with different
 878    probabilities (different from both the original and from each other).
 879    Recompute the probabilities here once we have updated the edge
 880    counts and frequencies.  */
 881
 882 static void
 883 recompute_probabilities (basic_block bb)
 884 {
 885   edge esucc;
 886   edge_iterator ei;
 887   FOR_EACH_EDGE (esucc, ei, bb->succs)
 888     {
 889       if (!bb->count)
 890         continue;
 891
 892       /* Prevent overflow computation due to insane profiles.  */
 893       if (esucc->count < bb->count)
 894         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 895                                                  bb->count);
 896       else
 897         /* Can happen with missing/guessed probabilities, since we
 898            may determine that more is flowing along duplicated
 899            path than joiner succ probabilities allowed.
 900            Counts and freqs will be insane after jump threading,
 901            at least make sure probability is sane or we will
 902            get a flow verification error.
 903            Not much we can do to make counts/freqs sane without
 904            redoing the profile estimation.  */
 905         esucc->probability = REG_BR_PROB_BASE;
 906     }
 907 }
 908
 909
 910 /* Update the counts of the original and duplicated edges from a joiner
 911    that go off path, given that we have already determined that the
 912    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 913    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 914    edge from joiner is EPATH.  */
 915
 916 static void
 917 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 918                               gcov_type path_in_count,
 919                               gcov_type path_out_count)
 920 {
 921   /* Compute the count that currently flows off path from the joiner.
 922      In other words, the total count of joiner's out edges other than
 923      epath.  Compute this by walking the successors instead of
 924      subtracting epath's count from the joiner bb count, since there
 925      are sometimes slight insanities where the total out edge count is
 926      larger than the bb count (possibly due to rounding/truncation
 927      errors).  */
 928   gcov_type total_orig_off_path_count = 0;
 929   edge enonpath;
 930   edge_iterator ei;
 931   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 932     {
 933       if (enonpath == epath)
 934         continue;
 935       total_orig_off_path_count += enonpath->count;
 936     }
 937
 938   /* For the path that we are duplicating, the amount that will flow
 939      off path from the duplicated joiner is the delta between the
 940      path's cumulative in count and the portion of that count we
 941      estimated above as flowing from the joiner along the duplicated
 942      path.  */
 943   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 944
 945   /* Now do the actual updates of the off-path edges.  */
 946   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 947     {
 948       /* Look for edges going off of the threading path.  */
 949       if (enonpath == epath)
 950         continue;
 951
 952       /* Find the corresponding edge out of the duplicated joiner.  */
 953       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 954       gcc_assert (enonpathdup);
 955
 956       /* We can't use the original probability of the joiner's out
 957          edges, since the probabilities of the original branch
 958          and the duplicated branches may vary after all threading is
 959          complete.  But apportion the duplicated joiner's off-path
 960          total edge count computed earlier (total_dup_off_path_count)
 961          among the duplicated off-path edges based on their original
 962          ratio to the full off-path count (total_orig_off_path_count).
 963          */
 964       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 965                                       total_orig_off_path_count);
 966       /* Give the duplicated offpath edge a portion of the duplicated
 967          total.  */
 968       enonpathdup->count = apply_scale (scale,
 969                                         total_dup_off_path_count);
 970       /* Now update the original offpath edge count, handling underflow
 971          due to rounding errors.  */
 972       enonpath->count -= enonpathdup->count;
 973       if (enonpath->count < 0)
 974         enonpath->count = 0;
 975     }
 976 }
 977
 978
 979 /* Check if the paths through RD all have estimated frequencies but zero
 980    profile counts.  This is more accurate than checking the entry block
 981    for a zero profile count, since profile insanities sometimes creep in.  */
 982
 983 static bool
 984 estimated_freqs_path (struct redirection_data *rd)
 985 {
 986   edge e = rd->incoming_edges->e;
 987   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 988   edge ein;
 989   edge_iterator ei;
 990   bool non_zero_freq = false;
 991   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 992     {
 993       if (ein->count)
 994         return false;
 995       non_zero_freq |= ein->src->frequency != 0;
 996     }
 997
 998   for (unsigned int i = 1; i < path->length (); i++)
 999     {
1000       edge epath = (*path)[i]->e;
1001       if (epath->src->count)
1002         return false;
1003       non_zero_freq |= epath->src->frequency != 0;
1004       edge esucc;
1005       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1006         {
1007           if (esucc->count)
1008             return false;
1009           non_zero_freq |= esucc->src->frequency != 0;
1010         }
1011     }
1012   return non_zero_freq;
1013 }
1014
1015
1016 /* Invoked for routines that have guessed frequencies and no profile
1017    counts to record the block and edge frequencies for paths through RD
1018    in the profile count fields of those blocks and edges.  This is because
1019    ssa_fix_duplicate_block_edges incrementally updates the block and
1020    edge counts as edges are redirected, and it is difficult to do that
1021    for edge frequencies which are computed on the fly from the source
1022    block frequency and probability.  When a block frequency is updated
1023    its outgoing edge frequencies are affected and become difficult to
1024    adjust.  */
1025
1026 static void
1027 freqs_to_counts_path (struct redirection_data *rd)
1028 {
1029   edge e = rd->incoming_edges->e;
1030   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1031   edge ein;
1032   edge_iterator ei;
1033   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1034     {
1035       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1036          errors applying the probability when the frequencies are very
1037          small.  */
1038       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1039                                       ein->probability);
1040     }
1041
1042   for (unsigned int i = 1; i < path->length (); i++)
1043     {
1044       edge epath = (*path)[i]->e;
1045       edge esucc;
1046       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1047          errors applying the edge probability when the frequencies are very
1048          small.  */
1049       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1050       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1051         esucc->count = apply_probability (esucc->src->count,
1052                                           esucc->probability);
1053     }
1054 }
1055
1056
1057 /* For routines that have guessed frequencies and no profile counts, where we
1058    used freqs_to_counts_path to record block and edge frequencies for paths
1059    through RD, we clear the counts after completing all updates for RD.
1060    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1061    but the block frequencies and edge probabilities were updated as well,
1062    so we can simply clear the count fields.  */
1063
1064 static void
1065 clear_counts_path (struct redirection_data *rd)
1066 {
1067   edge e = rd->incoming_edges->e;
1068   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1069   edge ein, esucc;
1070   edge_iterator ei;
1071   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1072     ein->count = 0;
1073
1074   /* First clear counts along original path.  */
1075   for (unsigned int i = 1; i < path->length (); i++)
1076     {
1077       edge epath = (*path)[i]->e;
1078       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1079         esucc->count = 0;
1080       epath->src->count = 0;
1081     }
1082   /* Also need to clear the counts along duplicated path.  */
1083   for (unsigned int i = 0; i < 2; i++)
1084     {
1085       basic_block dup = rd->dup_blocks[i];
1086       if (!dup)
1087         continue;
1088       FOR_EACH_EDGE (esucc, ei, dup->succs)
1089         esucc->count = 0;
1090       dup->count = 0;
1091     }
1092 }
1093
1094 /* Wire up the outgoing edges from the duplicate blocks and
1095    update any PHIs as needed.  Also update the profile counts
1096    on the original and duplicate blocks and edges.  */
1097 void
1098 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1099                                ssa_local_info_t *local_info)
1100 {
1101   bool multi_incomings = (rd->incoming_edges->next != NULL);
1102   edge e = rd->incoming_edges->e;
1103   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1104   edge elast = path->last ()->e;
1105   gcov_type path_in_count = 0;
1106   gcov_type path_out_count = 0;
1107   int path_in_freq = 0;
1108
1109   /* This routine updates profile counts, frequencies, and probabilities
1110      incrementally. Since it is difficult to do the incremental updates
1111      using frequencies/probabilities alone, for routines without profile
1112      data we first take a snapshot of the existing block and edge frequencies
1113      by copying them into the empty profile count fields.  These counts are
1114      then used to do the incremental updates, and cleared at the end of this
1115      routine.  If the function is marked as having a profile, we still check
1116      to see if the paths through RD are using estimated frequencies because
1117      the routine had zero profile counts.  */
1118   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1119                              || estimated_freqs_path (rd));
1120   if (do_freqs_to_counts)
1121     freqs_to_counts_path (rd);
1122
1123   /* First determine how much profile count to move from original
1124      path to the duplicate path.  This is tricky in the presence of
1125      a joiner (see comments for compute_path_counts), where some portion
1126      of the path's counts will flow off-path from the joiner.  In the
1127      non-joiner case the path_in_count and path_out_count should be the
1128      same.  */
1129   bool has_joiner = compute_path_counts (rd, local_info,
1130                                          &path_in_count, &path_out_count,
1131                                          &path_in_freq);
1132
1133   int cur_path_freq = path_in_freq;
1134   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1135     {
1136       edge epath = (*path)[i]->e;
1137
1138       /* If we were threading through an joiner block, then we want
1139          to keep its control statement and redirect an outgoing edge.
1140          Else we want to remove the control statement & edges, then create
1141          a new outgoing edge.  In both cases we may need to update PHIs.  */
1142       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1143         {
1144           edge victim;
1145           edge e2;
1146
1147           gcc_assert (has_joiner);
1148
1149           /* This updates the PHIs at the destination of the duplicate
1150              block.  Pass 0 instead of i if we are threading a path which
1151              has multiple incoming edges.  */
1152           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1153                                    path, multi_incomings ? 0 : i);
1154
1155           /* Find the edge from the duplicate block to the block we're
1156              threading through.  That's the edge we want to redirect.  */
1157           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1158
1159           /* If there are no remaining blocks on the path to duplicate,
1160              then redirect VICTIM to the final destination of the jump
1161              threading path.  */
1162           if (!any_remaining_duplicated_blocks (path, i))
1163             {
1164               e2 = redirect_edge_and_branch (victim, elast->dest);
1165               /* If we redirected the edge, then we need to copy PHI arguments
1166                  at the target.  If the edge already existed (e2 != victim
1167                  case), then the PHIs in the target already have the correct
1168                  arguments.  */
1169               if (e2 == victim)
1170                 copy_phi_args (e2->dest, elast, e2,
1171                                path, multi_incomings ? 0 : i);
1172             }
1173           else
1174             {
1175               /* Redirect VICTIM to the next duplicated block in the path.  */
1176               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1177
1178               /* We need to update the PHIs in the next duplicated block.  We
1179                  want the new PHI args to have the same value as they had
1180                  in the source of the next duplicate block.
1181
1182                  Thus, we need to know which edge we traversed into the
1183                  source of the duplicate.  Furthermore, we may have
1184                  traversed many edges to reach the source of the duplicate.
1185
1186                  Walk through the path starting at element I until we
1187                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1188                  the edge from the prior element.  */
1189               for (unsigned int j = i + 1; j < path->length (); j++)
1190                 {
1191                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1192                     {
1193                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1194                       break;
1195                     }
1196                 }
1197             }
1198
1199           /* Update the counts and frequency of both the original block
1200              and path edge, and the duplicates.  The path duplicate's
1201              incoming count and frequency are the totals for all edges
1202              incoming to this jump threading path computed earlier.
1203              And we know that the duplicated path will have path_out_count
1204              flowing out of it (i.e. along the duplicated path out of the
1205              duplicated joiner).  */
1206           update_profile (epath, e2, path_in_count, path_out_count,
1207                           path_in_freq);
1208
1209           /* Next we need to update the counts of the original and duplicated
1210              edges from the joiner that go off path.  */
1211           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1212                                         path_out_count);
1213
1214           /* Finally, we need to set the probabilities on the duplicated
1215              edges out of the duplicated joiner (e2->src).  The probabilities
1216              along the original path will all be updated below after we finish
1217              processing the whole path.  */
1218           recompute_probabilities (e2->src);
1219
1220           /* Record the frequency flowing to the downstream duplicated
1221              path blocks.  */
1222           cur_path_freq = EDGE_FREQUENCY (e2);
1223         }
1224       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1225         {
1226           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1227           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1228                                                    multi_incomings ? 0 : i);
1229           if (count == 1)
1230             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1231
1232           /* Update the counts and frequency of both the original block
1233              and path edge, and the duplicates.  Since we are now after
1234              any joiner that may have existed on the path, the count
1235              flowing along the duplicated threaded path is path_out_count.
1236              If we didn't have a joiner, then cur_path_freq was the sum
1237              of the total frequencies along all incoming edges to the
1238              thread path (path_in_freq).  If we had a joiner, it would have
1239              been updated at the end of that handling to the edge frequency
1240              along the duplicated joiner path edge.  */
1241           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1242                           path_out_count, path_out_count,
1243                           cur_path_freq);
1244         }
1245       else
1246         {
1247           /* No copy case.  In this case we don't have an equivalent block
1248              on the duplicated thread path to update, but we do need
1249              to remove the portion of the counts/freqs that were moved
1250              to the duplicated path from the counts/freqs flowing through
1251              this block on the original path.  Since all the no-copy edges
1252              are after any joiner, the removed count is the same as
1253              path_out_count.
1254
1255              If we didn't have a joiner, then cur_path_freq was the sum
1256              of the total frequencies along all incoming edges to the
1257              thread path (path_in_freq).  If we had a joiner, it would have
1258              been updated at the end of that handling to the edge frequency
1259              along the duplicated joiner path edge.  */
1260              update_profile (epath, NULL, path_out_count, path_out_count,
1261                              cur_path_freq);
1262         }
1263
1264       /* Increment the index into the duplicated path when we processed
1265          a duplicated block.  */
1266       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1267           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1268       {
1269           count++;
1270       }
1271     }
1272
1273   /* Now walk orig blocks and update their probabilities, since the
1274      counts and freqs should be updated properly by above loop.  */
1275   for (unsigned int i = 1; i < path->length (); i++)
1276     {
1277       edge epath = (*path)[i]->e;
1278       recompute_probabilities (epath->src);
1279     }
1280
1281   /* Done with all profile and frequency updates, clear counts if they
1282      were copied.  */
1283   if (do_freqs_to_counts)
1284     clear_counts_path (rd);
1285 }
1286
1287 /* Hash table traversal callback routine to create duplicate blocks.  */
1288
1289 int
1290 ssa_create_duplicates (struct redirection_data **slot,
1291                        ssa_local_info_t *local_info)
1292 {
1293   struct redirection_data *rd = *slot;
1294
1295   /* The second duplicated block in a jump threading path is specific
1296      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1297
1298      Each time we're called, we have to look through the path and see
1299      if a second block needs to be duplicated.
1300
1301      Note the search starts with the third edge on the path.  The first
1302      edge is the incoming edge, the second edge always has its source
1303      duplicated.  Thus we start our search with the third edge.  */
1304   vec<jump_thread_edge *> *path = rd->path;
1305   for (unsigned int i = 2; i < path->length (); i++)
1306     {
1307       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1308           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1309         {
1310           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1311                                       &local_info->duplicate_blocks);
1312           break;
1313         }
1314     }
1315
1316   /* Create a template block if we have not done so already.  Otherwise
1317      use the template to create a new block.  */
1318   if (local_info->template_block == NULL)
1319     {
1320       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1321                                   &local_info->duplicate_blocks);
1322       local_info->template_block = rd->dup_blocks[0];
1323
1324       /* We do not create any outgoing edges for the template.  We will
1325          take care of that in a later traversal.  That way we do not
1326          create edges that are going to just be deleted.  */
1327     }
1328   else
1329     {
1330       create_block_for_threading (local_info->template_block, rd, 0,
1331                                   &local_info->duplicate_blocks);
1332
1333       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1334          block.   */
1335       ssa_fix_duplicate_block_edges (rd, local_info);
1336     }
1337
1338   /* Keep walking the hash table.  */
1339   return 1;
1340 }
1341
1342 /* We did not create any outgoing edges for the template block during
1343    block creation.  This hash table traversal callback creates the
1344    outgoing edge for the template block.  */
1345
1346 inline int
1347 ssa_fixup_template_block (struct redirection_data **slot,
1348                           ssa_local_info_t *local_info)
1349 {
1350   struct redirection_data *rd = *slot;
1351
1352   /* If this is the template block halt the traversal after updating
1353      it appropriately.
1354
1355      If we were threading through an joiner block, then we want
1356      to keep its control statement and redirect an outgoing edge.
1357      Else we want to remove the control statement & edges, then create
1358      a new outgoing edge.  In both cases we may need to update PHIs.  */
1359   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1360     {
1361       ssa_fix_duplicate_block_edges (rd, local_info);
1362       return 0;
1363     }
1364
1365   return 1;
1366 }
1367
1368 /* Hash table traversal callback to redirect each incoming edge
1369    associated with this hash table element to its new destination.  */
1370
1371 int
1372 ssa_redirect_edges (struct redirection_data **slot,
1373                     ssa_local_info_t *local_info)
1374 {
1375   struct redirection_data *rd = *slot;
1376   struct el *next, *el;
1377
1378   /* Walk over all the incoming edges associated associated with this
1379      hash table entry.  */
1380   for (el = rd->incoming_edges; el; el = next)
1381     {
1382       edge e = el->e;
1383       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1384
1385       /* Go ahead and free this element from the list.  Doing this now
1386          avoids the need for another list walk when we destroy the hash
1387          table.  */
1388       next = el->next;
1389       free (el);
1390
1391       thread_stats.num_threaded_edges++;
1392
1393       if (rd->dup_blocks[0])
1394         {
1395           edge e2;
1396
1397           if (dump_file && (dump_flags & TDF_DETAILS))
1398             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1399                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1400
1401           /* If we redirect a loop latch edge cancel its loop.  */
1402           if (e->src == e->src->loop_father->latch)
1403             mark_loop_for_removal (e->src->loop_father);
1404
1405           /* Redirect the incoming edge (possibly to the joiner block) to the
1406              appropriate duplicate block.  */
1407           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1408           gcc_assert (e == e2);
1409           flush_pending_stmts (e2);
1410         }
1411
1412       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1413          to clear it will cause all kinds of unpleasant problems later.  */
1414       delete_jump_thread_path (path);
1415       e->aux = NULL;
1416
1417     }
1418
1419   /* Indicate that we actually threaded one or more jumps.  */
1420   if (rd->incoming_edges)
1421     local_info->jumps_threaded = true;
1422
1423   return 1;
1424 }
1425
1426 /* Return true if this block has no executable statements other than
1427    a simple ctrl flow instruction.  When the number of outgoing edges
1428    is one, this is equivalent to a "forwarder" block.  */
1429
1430 static bool
1431 redirection_block_p (basic_block bb)
1432 {
1433   gimple_stmt_iterator gsi;
1434
1435   /* Advance to the first executable statement.  */
1436   gsi = gsi_start_bb (bb);
1437   while (!gsi_end_p (gsi)
1438          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1439              || is_gimple_debug (gsi_stmt (gsi))
1440              || gimple_nop_p (gsi_stmt (gsi))
1441              || gimple_clobber_p (gsi_stmt (gsi))))
1442     gsi_next (&gsi);
1443
1444   /* Check if this is an empty block.  */
1445   if (gsi_end_p (gsi))
1446     return true;
1447
1448   /* Test that we've reached the terminating control statement.  */
1449   return gsi_stmt (gsi)
1450          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1451              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1452              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1453 }
1454
1455 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1456    is reached via one or more specific incoming edges, we know which
1457    outgoing edge from BB will be traversed.
1458
1459    We want to redirect those incoming edges to the target of the
1460    appropriate outgoing edge.  Doing so avoids a conditional branch
1461    and may expose new optimization opportunities.  Note that we have
1462    to update dominator tree and SSA graph after such changes.
1463
1464    The key to keeping the SSA graph update manageable is to duplicate
1465    the side effects occurring in BB so that those side effects still
1466    occur on the paths which bypass BB after redirecting edges.
1467
1468    We accomplish this by creating duplicates of BB and arranging for
1469    the duplicates to unconditionally pass control to one specific
1470    successor of BB.  We then revector the incoming edges into BB to
1471    the appropriate duplicate of BB.
1472
1473    If NOLOOP_ONLY is true, we only perform the threading as long as it
1474    does not affect the structure of the loops in a nontrivial way.
1475
1476    If JOINERS is true, then thread through joiner blocks as well.  */
1477
1478 static bool
1479 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1480 {
1481   /* E is an incoming edge into BB that we may or may not want to
1482      redirect to a duplicate of BB.  */
1483   edge e, e2;
1484   edge_iterator ei;
1485   ssa_local_info_t local_info;
1486
1487   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1488
1489   /* To avoid scanning a linear array for the element we need we instead
1490      use a hash table.  For normal code there should be no noticeable
1491      difference.  However, if we have a block with a large number of
1492      incoming and outgoing edges such linear searches can get expensive.  */
1493   redirection_data
1494     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1495
1496   /* Record each unique threaded destination into a hash table for
1497      efficient lookups.  */
1498   FOR_EACH_EDGE (e, ei, bb->preds)
1499     {
1500       if (e->aux == NULL)
1501         continue;
1502
1503       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1504
1505       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1506           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1507         continue;
1508
1509       e2 = path->last ()->e;
1510       if (!e2 || noloop_only)
1511         {
1512           /* If NOLOOP_ONLY is true, we only allow threading through the
1513              header of a loop to exit edges.  */
1514
1515           /* One case occurs when there was loop header buried in a jump
1516              threading path that crosses loop boundaries.  We do not try
1517              and thread this elsewhere, so just cancel the jump threading
1518              request by clearing the AUX field now.  */
1519           if ((bb->loop_father != e2->src->loop_father
1520                && !loop_exit_edge_p (e2->src->loop_father, e2))
1521               || (e2->src->loop_father != e2->dest->loop_father
1522                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1523             {
1524               /* Since this case is not handled by our special code
1525                  to thread through a loop header, we must explicitly
1526                  cancel the threading request here.  */
1527               delete_jump_thread_path (path);
1528               e->aux = NULL;
1529               continue;
1530             }
1531
1532           /* Another case occurs when trying to thread through our
1533              own loop header, possibly from inside the loop.  We will
1534              thread these later.  */
1535           unsigned int i;
1536           for (i = 1; i < path->length (); i++)
1537             {
1538               if ((*path)[i]->e->src == bb->loop_father->header
1539                   && (!loop_exit_edge_p (bb->loop_father, e2)
1540                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1541                 break;
1542             }
1543
1544           if (i != path->length ())
1545             continue;
1546         }
1547
1548       /* Insert the outgoing edge into the hash table if it is not
1549          already in the hash table.  */
1550       lookup_redirection_data (e, INSERT);
1551     }
1552
1553   /* We do not update dominance info.  */
1554   free_dominance_info (CDI_DOMINATORS);
1555
1556   /* We know we only thread through the loop header to loop exits.
1557      Let the basic block duplication hook know we are not creating
1558      a multiple entry loop.  */
1559   if (noloop_only
1560       && bb == bb->loop_father->header)
1561     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1562
1563   /* Now create duplicates of BB.
1564
1565      Note that for a block with a high outgoing degree we can waste
1566      a lot of time and memory creating and destroying useless edges.
1567
1568      So we first duplicate BB and remove the control structure at the
1569      tail of the duplicate as well as all outgoing edges from the
1570      duplicate.  We then use that duplicate block as a template for
1571      the rest of the duplicates.  */
1572   local_info.template_block = NULL;
1573   local_info.bb = bb;
1574   local_info.jumps_threaded = false;
1575   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1576                             (&local_info);
1577
1578   /* The template does not have an outgoing edge.  Create that outgoing
1579      edge and update PHI nodes as the edge's target as necessary.
1580
1581      We do this after creating all the duplicates to avoid creating
1582      unnecessary edges.  */
1583   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1584                             (&local_info);
1585
1586   /* The hash table traversals above created the duplicate blocks (and the
1587      statements within the duplicate blocks).  This loop creates PHI nodes for
1588      the duplicated blocks and redirects the incoming edges into BB to reach
1589      the duplicates of BB.  */
1590   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1591                             (&local_info);
1592
1593   /* Done with this block.  Clear REDIRECTION_DATA.  */
1594   delete redirection_data;
1595   redirection_data = NULL;
1596
1597   if (noloop_only
1598       && bb == bb->loop_father->header)
1599     set_loop_copy (bb->loop_father, NULL);
1600
1601   BITMAP_FREE (local_info.duplicate_blocks);
1602   local_info.duplicate_blocks = NULL;
1603
1604   /* Indicate to our caller whether or not any jumps were threaded.  */
1605   return local_info.jumps_threaded;
1606 }
1607
1608 /* Wrapper for thread_block_1 so that we can first handle jump
1609    thread paths which do not involve copying joiner blocks, then
1610    handle jump thread paths which have joiner blocks.
1611
1612    By doing things this way we can be as aggressive as possible and
1613    not worry that copying a joiner block will create a jump threading
1614    opportunity.  */
1615
1616 static bool
1617 thread_block (basic_block bb, bool noloop_only)
1618 {
1619   bool retval;
1620   retval = thread_block_1 (bb, noloop_only, false);
1621   retval |= thread_block_1 (bb, noloop_only, true);
1622   return retval;
1623 }
1624
1625
1626 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1627    copy of E->dest created during threading, or E->dest if it was not necessary
1628    to copy it (E is its single predecessor).  */
1629
1630 static basic_block
1631 thread_single_edge (edge e)
1632 {
1633   basic_block bb = e->dest;
1634   struct redirection_data rd;
1635   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1636   edge eto = (*path)[1]->e;
1637
1638   delete_jump_thread_path (path);
1639   e->aux = NULL;
1640
1641   thread_stats.num_threaded_edges++;
1642
1643   if (single_pred_p (bb))
1644     {
1645       /* If BB has just a single predecessor, we should only remove the
1646          control statements at its end, and successors except for ETO.  */
1647       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1648
1649       /* And fixup the flags on the single remaining edge.  */
1650       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1651       eto->flags |= EDGE_FALLTHRU;
1652
1653       return bb;
1654     }
1655
1656   /* Otherwise, we need to create a copy.  */
1657   if (e->dest == eto->src)
1658     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1659
1660   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1661   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1662   npath->safe_push (x);
1663
1664   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1665   npath->safe_push (x);
1666   rd.path = npath;
1667
1668   create_block_for_threading (bb, &rd, 0, NULL);
1669   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1670   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1671
1672   if (dump_file && (dump_flags & TDF_DETAILS))
1673     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1674              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1675
1676   rd.dup_blocks[0]->count = e->count;
1677   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1678   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1679   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1680   flush_pending_stmts (e);
1681
1682   delete_jump_thread_path (npath);
1683   return rd.dup_blocks[0];
1684 }
1685
1686 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1687    from STOP and DBDS_CE_STOP.  */
1688
1689 static basic_block dbds_ce_stop;
1690 static bool
1691 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1692 {
1693   return (bb != (const_basic_block) stop
1694           && bb != dbds_ce_stop);
1695 }
1696
1697 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1698    returns the state.  */
1699
1700 enum bb_dom_status
1701 {
1702   /* BB does not dominate latch of the LOOP.  */
1703   DOMST_NONDOMINATING,
1704   /* The LOOP is broken (there is no path from the header to its latch.  */
1705   DOMST_LOOP_BROKEN,
1706   /* BB dominates the latch of the LOOP.  */
1707   DOMST_DOMINATING
1708 };
1709
1710 static enum bb_dom_status
1711 determine_bb_domination_status (struct loop *loop, basic_block bb)
1712 {
1713   basic_block *bblocks;
1714   unsigned nblocks, i;
1715   bool bb_reachable = false;
1716   edge_iterator ei;
1717   edge e;
1718
1719   /* This function assumes BB is a successor of LOOP->header.
1720      If that is not the case return DOMST_NONDOMINATING which
1721      is always safe.  */
1722     {
1723       bool ok = false;
1724
1725       FOR_EACH_EDGE (e, ei, bb->preds)
1726         {
1727           if (e->src == loop->header)
1728             {
1729               ok = true;
1730               break;
1731             }
1732         }
1733
1734       if (!ok)
1735         return DOMST_NONDOMINATING;
1736     }
1737
1738   if (bb == loop->latch)
1739     return DOMST_DOMINATING;
1740
1741   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1742      from it.  */
1743
1744   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1745   dbds_ce_stop = loop->header;
1746   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1747                                 bblocks, loop->num_nodes, bb);
1748   for (i = 0; i < nblocks; i++)
1749     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1750       {
1751         if (e->src == loop->header)
1752           {
1753             free (bblocks);
1754             return DOMST_NONDOMINATING;
1755           }
1756         if (e->src == bb)
1757           bb_reachable = true;
1758       }
1759
1760   free (bblocks);
1761   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1762 }
1763
1764 /* Return true if BB is part of the new pre-header that is created
1765    when threading the latch to DATA.  */
1766
1767 static bool
1768 def_split_header_continue_p (const_basic_block bb, const void *data)
1769 {
1770   const_basic_block new_header = (const_basic_block) data;
1771   const struct loop *l;
1772
1773   if (bb == new_header
1774       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1775     return false;
1776   for (l = bb->loop_father; l; l = loop_outer (l))
1777     if (l == new_header->loop_father)
1778       return true;
1779   return false;
1780 }
1781
1782 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1783    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1784    to the inside of the loop.  */
1785
1786 static bool
1787 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1788 {
1789   basic_block header = loop->header;
1790   edge e, tgt_edge, latch = loop_latch_edge (loop);
1791   edge_iterator ei;
1792   basic_block tgt_bb, atgt_bb;
1793   enum bb_dom_status domst;
1794
1795   /* We have already threaded through headers to exits, so all the threading
1796      requests now are to the inside of the loop.  We need to avoid creating
1797      irreducible regions (i.e., loops with more than one entry block), and
1798      also loop with several latch edges, or new subloops of the loop (although
1799      there are cases where it might be appropriate, it is difficult to decide,
1800      and doing it wrongly may confuse other optimizers).
1801
1802      We could handle more general cases here.  However, the intention is to
1803      preserve some information about the loop, which is impossible if its
1804      structure changes significantly, in a way that is not well understood.
1805      Thus we only handle few important special cases, in which also updating
1806      of the loop-carried information should be feasible:
1807
1808      1) Propagation of latch edge to a block that dominates the latch block
1809         of a loop.  This aims to handle the following idiom:
1810
1811         first = 1;
1812         while (1)
1813           {
1814             if (first)
1815               initialize;
1816             first = 0;
1817             body;
1818           }
1819
1820         After threading the latch edge, this becomes
1821
1822         first = 1;
1823         if (first)
1824           initialize;
1825         while (1)
1826           {
1827             first = 0;
1828             body;
1829           }
1830
1831         The original header of the loop is moved out of it, and we may thread
1832         the remaining edges through it without further constraints.
1833
1834      2) All entry edges are propagated to a single basic block that dominates
1835         the latch block of the loop.  This aims to handle the following idiom
1836         (normally created for "for" loops):
1837
1838         i = 0;
1839         while (1)
1840           {
1841             if (i >= 100)
1842               break;
1843             body;
1844             i++;
1845           }
1846
1847         This becomes
1848
1849         i = 0;
1850         while (1)
1851           {
1852             body;
1853             i++;
1854             if (i >= 100)
1855               break;
1856           }
1857      */
1858
1859   /* Threading through the header won't improve the code if the header has just
1860      one successor.  */
1861   if (single_succ_p (header))
1862     goto fail;
1863
1864   /* If we threaded the latch using a joiner block, we cancel the
1865      threading opportunity out of an abundance of caution.  However,
1866      still allow threading from outside to inside the loop.  */
1867   if (latch->aux)
1868     {
1869       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1870       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1871         {
1872           delete_jump_thread_path (path);
1873           latch->aux = NULL;
1874         }
1875     }
1876
1877   if (latch->aux)
1878     {
1879       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1880       tgt_edge = (*path)[1]->e;
1881       tgt_bb = tgt_edge->dest;
1882     }
1883   else if (!may_peel_loop_headers
1884            && !redirection_block_p (loop->header))
1885     goto fail;
1886   else
1887     {
1888       tgt_bb = NULL;
1889       tgt_edge = NULL;
1890       FOR_EACH_EDGE (e, ei, header->preds)
1891         {
1892           if (!e->aux)
1893             {
1894               if (e == latch)
1895                 continue;
1896
1897               /* If latch is not threaded, and there is a header
1898                  edge that is not threaded, we would create loop
1899                  with multiple entries.  */
1900               goto fail;
1901             }
1902
1903           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1904
1905           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1906             goto fail;
1907           tgt_edge = (*path)[1]->e;
1908           atgt_bb = tgt_edge->dest;
1909           if (!tgt_bb)
1910             tgt_bb = atgt_bb;
1911           /* Two targets of threading would make us create loop
1912              with multiple entries.  */
1913           else if (tgt_bb != atgt_bb)
1914             goto fail;
1915         }
1916
1917       if (!tgt_bb)
1918         {
1919           /* There are no threading requests.  */
1920           return false;
1921         }
1922
1923       /* Redirecting to empty loop latch is useless.  */
1924       if (tgt_bb == loop->latch
1925           && empty_block_p (loop->latch))
1926         goto fail;
1927     }
1928
1929   /* The target block must dominate the loop latch, otherwise we would be
1930      creating a subloop.  */
1931   domst = determine_bb_domination_status (loop, tgt_bb);
1932   if (domst == DOMST_NONDOMINATING)
1933     goto fail;
1934   if (domst == DOMST_LOOP_BROKEN)
1935     {
1936       /* If the loop ceased to exist, mark it as such, and thread through its
1937          original header.  */
1938       mark_loop_for_removal (loop);
1939       return thread_block (header, false);
1940     }
1941
1942   if (tgt_bb->loop_father->header == tgt_bb)
1943     {
1944       /* If the target of the threading is a header of a subloop, we need
1945          to create a preheader for it, so that the headers of the two loops
1946          do not merge.  */
1947       if (EDGE_COUNT (tgt_bb->preds) > 2)
1948         {
1949           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1950           gcc_assert (tgt_bb != NULL);
1951         }
1952       else
1953         tgt_bb = split_edge (tgt_edge);
1954     }
1955
1956   if (latch->aux)
1957     {
1958       basic_block *bblocks;
1959       unsigned nblocks, i;
1960
1961       /* First handle the case latch edge is redirected.  We are copying
1962          the loop header but not creating a multiple entry loop.  Make the
1963          cfg manipulation code aware of that fact.  */
1964       set_loop_copy (loop, loop);
1965       loop->latch = thread_single_edge (latch);
1966       set_loop_copy (loop, NULL);
1967       gcc_assert (single_succ (loop->latch) == tgt_bb);
1968       loop->header = tgt_bb;
1969
1970       /* Remove the new pre-header blocks from our loop.  */
1971       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1972       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1973                                     bblocks, loop->num_nodes, tgt_bb);
1974       for (i = 0; i < nblocks; i++)
1975         if (bblocks[i]->loop_father == loop)
1976           {
1977             remove_bb_from_loops (bblocks[i]);
1978             add_bb_to_loop (bblocks[i], loop_outer (loop));
1979           }
1980       free (bblocks);
1981
1982       /* If the new header has multiple latches mark it so.  */
1983       FOR_EACH_EDGE (e, ei, loop->header->preds)
1984         if (e->src->loop_father == loop
1985             && e->src != loop->latch)
1986           {
1987             loop->latch = NULL;
1988             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1989           }
1990
1991       /* Cancel remaining threading requests that would make the
1992          loop a multiple entry loop.  */
1993       FOR_EACH_EDGE (e, ei, header->preds)
1994         {
1995           edge e2;
1996
1997           if (e->aux == NULL)
1998             continue;
1999
2000           vec<jump_thread_edge *> *path = THREAD_PATH (e);
2001           e2 = path->last ()->e;
2002
2003           if (e->src->loop_father != e2->dest->loop_father
2004               && e2->dest != loop->header)
2005             {
2006               delete_jump_thread_path (path);
2007               e->aux = NULL;
2008             }
2009         }
2010
2011       /* Thread the remaining edges through the former header.  */
2012       thread_block (header, false);
2013     }
2014   else
2015     {
2016       basic_block new_preheader;
2017
2018       /* Now consider the case entry edges are redirected to the new entry
2019          block.  Remember one entry edge, so that we can find the new
2020          preheader (its destination after threading).  */
2021       FOR_EACH_EDGE (e, ei, header->preds)
2022         {
2023           if (e->aux)
2024             break;
2025         }
2026
2027       /* The duplicate of the header is the new preheader of the loop.  Ensure
2028          that it is placed correctly in the loop hierarchy.  */
2029       set_loop_copy (loop, loop_outer (loop));
2030
2031       thread_block (header, false);
2032       set_loop_copy (loop, NULL);
2033       new_preheader = e->dest;
2034
2035       /* Create the new latch block.  This is always necessary, as the latch
2036          must have only a single successor, but the original header had at
2037          least two successors.  */
2038       loop->latch = NULL;
2039       mfb_kj_edge = single_succ_edge (new_preheader);
2040       loop->header = mfb_kj_edge->dest;
2041       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2042       loop->header = latch->dest;
2043       loop->latch = latch->src;
2044     }
2045
2046   return true;
2047
2048 fail:
2049   /* We failed to thread anything.  Cancel the requests.  */
2050   FOR_EACH_EDGE (e, ei, header->preds)
2051     {
2052       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2053
2054       if (path)
2055         {
2056           delete_jump_thread_path (path);
2057           e->aux = NULL;
2058         }
2059     }
2060   return false;
2061 }
2062
2063 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2064    PHI arguments associated with those edges are equal or there are no
2065    PHI arguments, otherwise return FALSE.  */
2066
2067 static bool
2068 phi_args_equal_on_edges (edge e1, edge e2)
2069 {
2070   gphi_iterator gsi;
2071   int indx1 = e1->dest_idx;
2072   int indx2 = e2->dest_idx;
2073
2074   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2075     {
2076       gphi *phi = gsi.phi ();
2077
2078       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2079                             gimple_phi_arg_def (phi, indx2), 0))
2080         return false;
2081     }
2082   return true;
2083 }
2084
2085 /* Walk through the registered jump threads and convert them into a
2086    form convenient for this pass.
2087
2088    Any block which has incoming edges threaded to outgoing edges
2089    will have its entry in THREADED_BLOCK set.
2090
2091    Any threaded edge will have its new outgoing edge stored in the
2092    original edge's AUX field.
2093
2094    This form avoids the need to walk all the edges in the CFG to
2095    discover blocks which need processing and avoids unnecessary
2096    hash table lookups to map from threaded edge to new target.  */
2097
2098 static void
2099 mark_threaded_blocks (bitmap threaded_blocks)
2100 {
2101   unsigned int i;
2102   bitmap_iterator bi;
2103   bitmap tmp = BITMAP_ALLOC (NULL);
2104   basic_block bb;
2105   edge e;
2106   edge_iterator ei;
2107
2108   /* It is possible to have jump threads in which one is a subpath
2109      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2110      block and (B, C), (C, D) where no joiner block exists.
2111
2112      When this occurs ignore the jump thread request with the joiner
2113      block.  It's totally subsumed by the simpler jump thread request.
2114
2115      This results in less block copying, simpler CFGs.  More importantly,
2116      when we duplicate the joiner block, B, in this case we will create
2117      a new threading opportunity that we wouldn't be able to optimize
2118      until the next jump threading iteration.
2119
2120      So first convert the jump thread requests which do not require a
2121      joiner block.  */
2122   for (i = 0; i < paths.length (); i++)
2123     {
2124       vec<jump_thread_edge *> *path = paths[i];
2125
2126       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2127         {
2128           edge e = (*path)[0]->e;
2129           e->aux = (void *)path;
2130           bitmap_set_bit (tmp, e->dest->index);
2131         }
2132     }
2133
2134   /* Now iterate again, converting cases where we want to thread
2135      through a joiner block, but only if no other edge on the path
2136      already has a jump thread attached to it.  We do this in two passes,
2137      to avoid situations where the order in the paths vec can hide overlapping
2138      threads (the path is recorded on the incoming edge, so we would miss
2139      cases where the second path starts at a downstream edge on the same
2140      path).  First record all joiner paths, deleting any in the unexpected
2141      case where there is already a path for that incoming edge.  */
2142   for (i = 0; i < paths.length (); i++)
2143     {
2144       vec<jump_thread_edge *> *path = paths[i];
2145
2146       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2147         {
2148           /* Attach the path to the starting edge if none is yet recorded.  */
2149           if ((*path)[0]->e->aux == NULL)
2150             {
2151               (*path)[0]->e->aux = path;
2152             }
2153           else
2154             {
2155               paths.unordered_remove (i);
2156               if (dump_file && (dump_flags & TDF_DETAILS))
2157                 dump_jump_thread_path (dump_file, *path, false);
2158               delete_jump_thread_path (path);
2159             }
2160         }
2161     }
2162   /* Second, look for paths that have any other jump thread attached to
2163      them, and either finish converting them or cancel them.  */
2164   for (i = 0; i < paths.length (); i++)
2165     {
2166       vec<jump_thread_edge *> *path = paths[i];
2167       edge e = (*path)[0]->e;
2168
2169       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2170         {
2171           unsigned int j;
2172           for (j = 1; j < path->length (); j++)
2173             if ((*path)[j]->e->aux != NULL)
2174               break;
2175
2176           /* If we iterated through the entire path without exiting the loop,
2177              then we are good to go, record it.  */
2178           if (j == path->length ())
2179             bitmap_set_bit (tmp, e->dest->index);
2180           else
2181             {
2182               e->aux = NULL;
2183               paths.unordered_remove (i);
2184               if (dump_file && (dump_flags & TDF_DETAILS))
2185                 dump_jump_thread_path (dump_file, *path, false);
2186               delete_jump_thread_path (path);
2187             }
2188         }
2189     }
2190
2191   /* If optimizing for size, only thread through block if we don't have
2192      to duplicate it or it's an otherwise empty redirection block.  */
2193   if (optimize_function_for_size_p (cfun))
2194     {
2195       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2196         {
2197           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2198           if (EDGE_COUNT (bb->preds) > 1
2199               && !redirection_block_p (bb))
2200             {
2201               FOR_EACH_EDGE (e, ei, bb->preds)
2202                 {
2203                   if (e->aux)
2204                     {
2205                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2206                       delete_jump_thread_path (path);
2207                       e->aux = NULL;
2208                     }
2209                 }
2210             }
2211           else
2212             bitmap_set_bit (threaded_blocks, i);
2213         }
2214     }
2215   else
2216     bitmap_copy (threaded_blocks, tmp);
2217
2218   /* Look for jump threading paths which cross multiple loop headers.
2219
2220      The code to thread through loop headers will change the CFG in ways
2221      that break assumptions made by the loop optimization code.
2222
2223      We don't want to blindly cancel the requests.  We can instead do better
2224      by trimming off the end of the jump thread path.  */
2225   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2226     {
2227       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2228       FOR_EACH_EDGE (e, ei, bb->preds)
2229         {
2230           if (e->aux)
2231             {
2232               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2233
2234               for (unsigned int i = 0, crossed_headers = 0;
2235                    i < path->length ();
2236                    i++)
2237                 {
2238                   basic_block dest = (*path)[i]->e->dest;
2239                   crossed_headers += (dest == dest->loop_father->header);
2240                   if (crossed_headers > 1)
2241                     {
2242                       /* Trim from entry I onwards.  */
2243                       for (unsigned int j = i; j < path->length (); j++)
2244                         delete (*path)[j];
2245                       path->truncate (i);
2246
2247                       /* Now that we've truncated the path, make sure
2248                          what's left is still valid.   We need at least
2249                          two edges on the path and the last edge can not
2250                          be a joiner.  This should never happen, but let's
2251                          be safe.  */
2252                       if (path->length () < 2
2253                           || (path->last ()->type
2254                               == EDGE_COPY_SRC_JOINER_BLOCK))
2255                         {
2256                           delete_jump_thread_path (path);
2257                           e->aux = NULL;
2258                         }
2259                       break;
2260                     }
2261                 }
2262             }
2263         }
2264     }
2265
2266   /* If we have a joiner block (J) which has two successors S1 and S2 and
2267      we are threading though S1 and the final destination of the thread
2268      is S2, then we must verify that any PHI nodes in S2 have the same
2269      PHI arguments for the edge J->S2 and J->S1->...->S2.
2270
2271      We used to detect this prior to registering the jump thread, but
2272      that prohibits propagation of edge equivalences into non-dominated
2273      PHI nodes as the equivalency test might occur before propagation.
2274
2275      This must also occur after we truncate any jump threading paths
2276      as this scenario may only show up after truncation.
2277
2278      This works for now, but will need improvement as part of the FSA
2279      optimization.
2280
2281      Note since we've moved the thread request data to the edges,
2282      we have to iterate on those rather than the threaded_edges vector.  */
2283   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2284     {
2285       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2286       FOR_EACH_EDGE (e, ei, bb->preds)
2287         {
2288           if (e->aux)
2289             {
2290               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2291               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2292
2293               if (have_joiner)
2294                 {
2295                   basic_block joiner = e->dest;
2296                   edge final_edge = path->last ()->e;
2297                   basic_block final_dest = final_edge->dest;
2298                   edge e2 = find_edge (joiner, final_dest);
2299
2300                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2301                     {
2302                       delete_jump_thread_path (path);
2303                       e->aux = NULL;
2304                     }
2305                 }
2306             }
2307         }
2308     }
2309
2310   BITMAP_FREE (tmp);
2311 }
2312
2313
2314 /* Return TRUE if BB ends with a switch statement or a computed goto.
2315    Otherwise return false.  */
2316 static bool
2317 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2318 {
2319   gimple stmt = last_stmt (bb);
2320   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2321     return true;
2322   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2323       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2324     return true;
2325   return false;
2326 }
2327
2328 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2329    case of SEME Single Entry Multiple Exits region in which all nodes in the
2330    REGION have exactly one incoming edge.  The only exception is the first block
2331    that may not have been connected to the rest of the cfg yet.  */
2332
2333 DEBUG_FUNCTION void
2334 verify_jump_thread (basic_block *region, unsigned n_region)
2335 {
2336   for (unsigned i = 0; i < n_region; i++)
2337     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2338 }
2339
2340 /* Return true when BB is one of the first N items in BBS.  */
2341
2342 static inline bool
2343 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2344 {
2345   for (int i = 0; i < n; i++)
2346     if (bb == bbs[i])
2347       return true;
2348
2349   return false;
2350 }
2351
2352 /* Duplicates a jump-thread path of N_REGION basic blocks.
2353    The ENTRY edge is redirected to the duplicate of the region.
2354
2355    Remove the last conditional statement in the last basic block in the REGION,
2356    and create a single fallthru edge pointing to the same destination as the
2357    EXIT edge.
2358
2359    The new basic blocks are stored to REGION_COPY in the same order as they had
2360    in REGION, provided that REGION_COPY is not NULL.
2361
2362    Returns false if it is unable to copy the region, true otherwise.  */
2363
2364 static bool
2365 duplicate_thread_path (edge entry, edge exit,
2366                        basic_block *region, unsigned n_region,
2367                        basic_block *region_copy)
2368 {
2369   unsigned i;
2370   bool free_region_copy = false;
2371   struct loop *loop = entry->dest->loop_father;
2372   edge exit_copy;
2373   edge redirected;
2374   int total_freq = 0, entry_freq = 0;
2375   gcov_type total_count = 0, entry_count = 0;
2376
2377   if (!can_copy_bbs_p (region, n_region))
2378     return false;
2379
2380   /* Some sanity checking.  Note that we do not check for all possible
2381      missuses of the functions.  I.e. if you ask to copy something weird,
2382      it will work, but the state of structures probably will not be
2383      correct.  */
2384   for (i = 0; i < n_region; i++)
2385     {
2386       /* We do not handle subloops, i.e. all the blocks must belong to the
2387          same loop.  */
2388       if (region[i]->loop_father != loop)
2389         return false;
2390     }
2391
2392   initialize_original_copy_tables ();
2393
2394   set_loop_copy (loop, loop);
2395
2396   if (!region_copy)
2397     {
2398       region_copy = XNEWVEC (basic_block, n_region);
2399       free_region_copy = true;
2400     }
2401
2402   if (entry->dest->count)
2403     {
2404       total_count = entry->dest->count;
2405       entry_count = entry->count;
2406       /* Fix up corner cases, to avoid division by zero or creation of negative
2407          frequencies.  */
2408       if (entry_count > total_count)
2409         entry_count = total_count;
2410     }
2411   else
2412     {
2413       total_freq = entry->dest->frequency;
2414       entry_freq = EDGE_FREQUENCY (entry);
2415       /* Fix up corner cases, to avoid division by zero or creation of negative
2416          frequencies.  */
2417       if (total_freq == 0)
2418         total_freq = 1;
2419       else if (entry_freq > total_freq)
2420         entry_freq = total_freq;
2421     }
2422
2423   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2424             split_edge_bb_loc (entry), false);
2425
2426   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2427      following code ensures that all the edges exiting the jump-thread path are
2428      redirected back to the original code: these edges are exceptions
2429      invalidating the property that is propagated by executing all the blocks of
2430      the jump-thread path in order.  */
2431
2432   for (i = 0; i < n_region; i++)
2433     {
2434       edge e;
2435       edge_iterator ei;
2436       basic_block bb = region_copy[i];
2437
2438       if (single_succ_p (bb))
2439         {
2440           /* Make sure the successor is the next node in the path.  */
2441           gcc_assert (i + 1 == n_region
2442                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2443           continue;
2444         }
2445
2446       /* Special case the last block on the path: make sure that it does not
2447          jump back on the copied path.  */
2448       if (i + 1 == n_region)
2449         {
2450           FOR_EACH_EDGE (e, ei, bb->succs)
2451             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2452               {
2453                 basic_block orig = get_bb_original (e->dest);
2454                 if (orig)
2455                   redirect_edge_and_branch_force (e, orig);
2456               }
2457           continue;
2458         }
2459
2460       /* Redirect all other edges jumping to non-adjacent blocks back to the
2461          original code.  */
2462       FOR_EACH_EDGE (e, ei, bb->succs)
2463         if (region_copy[i + 1] != e->dest)
2464           {
2465             basic_block orig = get_bb_original (e->dest);
2466             if (orig)
2467               redirect_edge_and_branch_force (e, orig);
2468           }
2469     }
2470
2471   if (total_count)
2472     {
2473       scale_bbs_frequencies_gcov_type (region, n_region,
2474                                        total_count - entry_count,
2475                                        total_count);
2476       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2477                                        total_count);
2478     }
2479   else
2480     {
2481       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2482                                  total_freq);
2483       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2484     }
2485
2486 #ifdef ENABLE_CHECKING
2487   verify_jump_thread (region_copy, n_region);
2488 #endif
2489
2490   /* Remove the last branch in the jump thread path.  */
2491   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2492   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2493
2494   if (e) {
2495     rescan_loop_exit (e, true, false);
2496     e->probability = REG_BR_PROB_BASE;
2497     e->count = region_copy[n_region - 1]->count;
2498   }
2499
2500   /* Redirect the entry and add the phi node arguments.  */
2501   if (entry->dest == loop->header)
2502     mark_loop_for_removal (loop);
2503   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2504   gcc_assert (redirected != NULL);
2505   flush_pending_stmts (entry);
2506
2507   /* Add the other PHI node arguments.  */
2508   add_phi_args_after_copy (region_copy, n_region, NULL);
2509
2510   if (free_region_copy)
2511     free (region_copy);
2512
2513   free_original_copy_tables ();
2514   return true;
2515 }
2516
2517 /* Return true when PATH is a valid jump-thread path.  */
2518
2519 static bool
2520 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2521 {
2522   unsigned len = path->length ();
2523
2524   /* Check that the path is connected.  */
2525   for (unsigned int j = 0; j < len - 1; j++)
2526     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2527       return false;
2528
2529   return true;
2530 }
2531
2532 /* Walk through all blocks and thread incoming edges to the appropriate
2533    outgoing edge for each edge pair recorded in THREADED_EDGES.
2534
2535    It is the caller's responsibility to fix the dominance information
2536    and rewrite duplicated SSA_NAMEs back into SSA form.
2537
2538    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2539    loop headers if it does not simplify the loop.
2540
2541    Returns true if one or more edges were threaded, false otherwise.  */
2542
2543 bool
2544 thread_through_all_blocks (bool may_peel_loop_headers)
2545 {
2546   bool retval = false;
2547   unsigned int i;
2548   bitmap_iterator bi;
2549   bitmap threaded_blocks;
2550   struct loop *loop;
2551
2552   if (!paths.exists ())
2553     return false;
2554
2555   threaded_blocks = BITMAP_ALLOC (NULL);
2556   memset (&thread_stats, 0, sizeof (thread_stats));
2557
2558   /* Jump-thread all FSM threads before other jump-threads.  */
2559   for (i = 0; i < paths.length ();)
2560     {
2561       vec<jump_thread_edge *> *path = paths[i];
2562       edge entry = (*path)[0]->e;
2563
2564       /* Only code-generate FSM jump-threads in this loop.  */
2565       if ((*path)[0]->type != EDGE_FSM_THREAD)
2566         {
2567           i++;
2568           continue;
2569         }
2570
2571       /* Do not jump-thread twice from the same block.  */
2572       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2573           /* Verify that the jump thread path is still valid: a
2574              previous jump-thread may have changed the CFG, and
2575              invalidated the current path.  */
2576           || !valid_jump_thread_path (path))
2577         {
2578           /* Remove invalid FSM jump-thread paths.  */
2579           delete_jump_thread_path (path);
2580           paths.unordered_remove (i);
2581           continue;
2582         }
2583
2584       unsigned len = path->length ();
2585       edge exit = (*path)[len - 1]->e;
2586       basic_block *region = XNEWVEC (basic_block, len - 1);
2587
2588       for (unsigned int j = 0; j < len - 1; j++)
2589         region[j] = (*path)[j]->e->dest;
2590
2591       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2592         {
2593           /* We do not update dominance info.  */
2594           free_dominance_info (CDI_DOMINATORS);
2595           bitmap_set_bit (threaded_blocks, entry->src->index);
2596           retval = true;
2597         }
2598
2599       delete_jump_thread_path (path);
2600       paths.unordered_remove (i);
2601     }
2602
2603   /* Remove from PATHS all the jump-threads starting with an edge already
2604      jump-threaded.  */
2605   for (i = 0; i < paths.length ();)
2606     {
2607       vec<jump_thread_edge *> *path = paths[i];
2608       edge entry = (*path)[0]->e;
2609
2610       /* Do not jump-thread twice from the same block.  */
2611       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2612         {
2613           delete_jump_thread_path (path);
2614           paths.unordered_remove (i);
2615         }
2616       else
2617         i++;
2618     }
2619
2620   bitmap_clear (threaded_blocks);
2621
2622   mark_threaded_blocks (threaded_blocks);
2623
2624   initialize_original_copy_tables ();
2625
2626   /* First perform the threading requests that do not affect
2627      loop structure.  */
2628   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2629     {
2630       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2631
2632       if (EDGE_COUNT (bb->preds) > 0)
2633         retval |= thread_block (bb, true);
2634     }
2635
2636   /* Then perform the threading through loop headers.  We start with the
2637      innermost loop, so that the changes in cfg we perform won't affect
2638      further threading.  */
2639   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2640     {
2641       if (!loop->header
2642           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2643         continue;
2644
2645       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2646     }
2647
2648   /* Any jump threading paths that are still attached to edges at this
2649      point must be one of two cases.
2650
2651      First, we could have a jump threading path which went from outside
2652      a loop to inside a loop that was ignored because a prior jump thread
2653      across a backedge was realized (which indirectly causes the loop
2654      above to ignore the latter thread).  We can detect these because the
2655      loop structures will be different and we do not currently try to
2656      optimize this case.
2657
2658      Second, we could be threading across a backedge to a point within the
2659      same loop.  This occurrs for the FSA/FSM optimization and we would
2660      like to optimize it.  However, we have to be very careful as this
2661      may completely scramble the loop structures, with the result being
2662      irreducible loops causing us to throw away our loop structure.
2663
2664      As a compromise for the latter case, if the thread path ends in
2665      a block where the last statement is a multiway branch, then go
2666      ahead and thread it, else ignore it.  */
2667   basic_block bb;
2668   edge e;
2669   FOR_EACH_BB_FN (bb, cfun)
2670     {
2671       /* If we do end up threading here, we can remove elements from
2672          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2673       for (edge_iterator ei = ei_start (bb->preds);
2674            (e = ei_safe_edge (ei));)
2675         if (e->aux)
2676           {
2677             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2678
2679             /* Case 1, threading from outside to inside the loop
2680                after we'd already threaded through the header.  */
2681             if ((*path)[0]->e->dest->loop_father
2682                 != path->last ()->e->src->loop_father)
2683               {
2684                 delete_jump_thread_path (path);
2685                 e->aux = NULL;
2686                 ei_next (&ei);
2687               }
2688            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2689               {
2690                 /* The code to thread through loop headers may have
2691                    split a block with jump threads attached to it.
2692
2693                    We can identify this with a disjoint jump threading
2694                    path.  If found, just remove it.  */
2695                 for (unsigned int i = 0; i < path->length () - 1; i++)
2696                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2697                     {
2698                       delete_jump_thread_path (path);
2699                       e->aux = NULL;
2700                       ei_next (&ei);
2701                       break;
2702                     }
2703
2704                 /* Our path is still valid, thread it.  */
2705                 if (e->aux)
2706                   {
2707                     if (thread_block ((*path)[0]->e->dest, false))
2708                       e->aux = NULL;
2709                     else
2710                       {
2711                         delete_jump_thread_path (path);
2712                         e->aux = NULL;
2713                         ei_next (&ei);
2714                       }
2715                   }
2716               }
2717            else
2718               {
2719                 delete_jump_thread_path (path);
2720                 e->aux = NULL;
2721                 ei_next (&ei);
2722               }
2723           }
2724         else
2725           ei_next (&ei);
2726     }
2727
2728   statistics_counter_event (cfun, "Jumps threaded",
2729                             thread_stats.num_threaded_edges);
2730
2731   free_original_copy_tables ();
2732
2733   BITMAP_FREE (threaded_blocks);
2734   threaded_blocks = NULL;
2735   paths.release ();
2736
2737   if (retval)
2738     loops_state_set (LOOPS_NEED_FIXUP);
2739
2740   return retval;
2741 }
2742
2743 /* Delete the jump threading path PATH.  We have to explcitly delete
2744    each entry in the vector, then the container.  */
2745
2746 void
2747 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2748 {
2749   for (unsigned int i = 0; i < path->length (); i++)
2750     delete (*path)[i];
2751   path->release();
2752   delete path;
2753 }
2754
2755 /* Register a jump threading opportunity.  We queue up all the jump
2756    threading opportunities discovered by a pass and update the CFG
2757    and SSA form all at once.
2758
2759    E is the edge we can thread, E2 is the new target edge, i.e., we
2760    are effectively recording that E->dest can be changed to E2->dest
2761    after fixing the SSA graph.  */
2762
2763 void
2764 register_jump_thread (vec<jump_thread_edge *> *path)
2765 {
2766   if (!dbg_cnt (registered_jump_thread))
2767     {
2768       delete_jump_thread_path (path);
2769       return;
2770     }
2771
2772   /* First make sure there are no NULL outgoing edges on the jump threading
2773      path.  That can happen for jumping to a constant address.  */
2774   for (unsigned int i = 0; i < path->length (); i++)
2775     if ((*path)[i]->e == NULL)
2776       {
2777         if (dump_file && (dump_flags & TDF_DETAILS))
2778           {
2779             fprintf (dump_file,
2780                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2781             dump_jump_thread_path (dump_file, *path, false);
2782           }
2783
2784         delete_jump_thread_path (path);
2785         return;
2786       }
2787
2788   if (dump_file && (dump_flags & TDF_DETAILS))
2789     dump_jump_thread_path (dump_file, *path, true);
2790
2791   if (!paths.exists ())
2792     paths.create (5);
2793
2794   paths.safe_push (path);
2795 }