gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "cfghooks.h"
  27 #include "tree-pass.h"
  28 #include "ssa.h"
  29 #include "fold-const.h"
  30 #include "cfganal.h"
  31 #include "gimple-iterator.h"
  32 #include "tree-ssa.h"
  33 #include "tree-ssa-threadupdate.h"
  34 #include "cfgloop.h"
  35 #include "dbgcnt.h"
  36 #include "tree-cfg.h"
  37 #include "tree-vectorizer.h"
  38
  39 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  40    one or more in-edges to B to instead reach the destination of an
  41    out-edge from B while preserving any side effects in B.
  42
  43    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  44    side effects of executing B.
  45
  46      1. Make a copy of B (including its outgoing edges and statements).  Call
  47         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  48
  49      2. Remove the control statement at the end of B' and all outgoing edges
  50         except B'->C.
  51
  52      3. Add a new argument to each PHI in C with the same value as the existing
  53         argument associated with edge B->C.  Associate the new PHI arguments
  54         with the edge B'->C.
  55
  56      4. For each PHI in B, find or create a PHI in B' with an identical
  57         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  58         value as the PHI in B associated with the edge A->B.  Associate
  59         the new argument in the PHI in B' with the edge A->B.
  60
  61      5. Change the edge A->B to A->B'.
  62
  63         5a. This automatically deletes any PHI arguments associated with the
  64             edge A->B in B.
  65
  66         5b. This automatically associates each new argument added in step 4
  67             with the edge A->B'.
  68
  69      6. Repeat for other incoming edges into B.
  70
  71      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  72
  73    Note that block duplication can be minimized by first collecting the
  74    set of unique destination blocks that the incoming edges should
  75    be threaded to.
  76
  77    We reduce the number of edges and statements we create by not copying all
  78    the outgoing edges and the control statement in step #1.  We instead create
  79    a template block without the outgoing edges and duplicate the template.
  80
  81    Another case this code handles is threading through a "joiner" block.  In
  82    this case, we do not know the destination of the joiner block, but one
  83    of the outgoing edges from the joiner block leads to a threadable path.  This
  84    case largely works as outlined above, except the duplicate of the joiner
  85    block still contains a full set of outgoing edges and its control statement.
  86    We just redirect one of its outgoing edges to our jump threading path.  */
  87
  88
  89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  90    all the incoming edges which thread to the same destination edge at
  91    the same time.  That avoids lots of table lookups to get information
  92    for the destination edge.
  93
  94    To realize that implementation we create a list of incoming edges
  95    which thread to the same outgoing edge.  Thus to implement steps
  96    #5 and #6 we traverse our hash table of outgoing edge information.
  97    For each entry we walk the list of incoming edges which thread to
  98    the current outgoing edge.  */
  99
 100 struct el
 101 {
 102   edge e;
 103   struct el *next;
 104 };
 105
 106 /* Main data structure recording information regarding B's duplicate
 107    blocks.  */
 108
 109 /* We need to efficiently record the unique thread destinations of this
 110    block and specific information associated with those destinations.  We
 111    may have many incoming edges threaded to the same outgoing edge.  This
 112    can be naturally implemented with a hash table.  */
 113
 114 struct redirection_data : free_ptr_hash<redirection_data>
 115 {
 116   /* We support wiring up two block duplicates in a jump threading path.
 117
 118      One is a normal block copy where we remove the control statement
 119      and wire up its single remaining outgoing edge to the thread path.
 120
 121      The other is a joiner block where we leave the control statement
 122      in place, but wire one of the outgoing edges to a thread path.
 123
 124      In theory we could have multiple block duplicates in a jump
 125      threading path, but I haven't tried that.
 126
 127      The duplicate blocks appear in this array in the same order in
 128      which they appear in the jump thread path.  */
 129   basic_block dup_blocks[2];
 130
 131   /* The jump threading path.  */
 132   vec<jump_thread_edge *> *path;
 133
 134   /* A list of incoming edges which we want to thread to the
 135      same path.  */
 136   struct el *incoming_edges;
 137
 138   /* hash_table support.  */
 139   static inline hashval_t hash (const redirection_data *);
 140   static inline int equal (const redirection_data *, const redirection_data *);
 141 };
 142
 143 /* Dump a jump threading path, including annotations about each
 144    edge in the path.  */
 145
 146 static void
 147 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 148                        bool registering)
 149 {
 150   fprintf (dump_file,
 151            "  %s%s jump thread: (%d, %d) incoming edge; ",
 152            (registering ? "Registering" : "Cancelling"),
 153            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 154            path[0]->e->src->index, path[0]->e->dest->index);
 155
 156   for (unsigned int i = 1; i < path.length (); i++)
 157     {
 158       /* We can get paths with a NULL edge when the final destination
 159          of a jump thread turns out to be a constant address.  We dump
 160          those paths when debugging, so we have to be prepared for that
 161          possibility here.  */
 162       if (path[i]->e == NULL)
 163         continue;
 164
 165       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 166         fprintf (dump_file, " (%d, %d) joiner; ",
 167                  path[i]->e->src->index, path[i]->e->dest->index);
 168       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 169        fprintf (dump_file, " (%d, %d) normal;",
 170                  path[i]->e->src->index, path[i]->e->dest->index);
 171       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 172        fprintf (dump_file, " (%d, %d) nocopy;",
 173                  path[i]->e->src->index, path[i]->e->dest->index);
 174       if (path[0]->type == EDGE_FSM_THREAD)
 175         fprintf (dump_file, " (%d, %d) ",
 176                  path[i]->e->src->index, path[i]->e->dest->index);
 177     }
 178   fputc ('\n', dump_file);
 179 }
 180
 181 /* Simple hashing function.  For any given incoming edge E, we're going
 182    to be most concerned with the final destination of its jump thread
 183    path.  So hash on the block index of the final edge in the path.  */
 184
 185 inline hashval_t
 186 redirection_data::hash (const redirection_data *p)
 187 {
 188   vec<jump_thread_edge *> *path = p->path;
 189   return path->last ()->e->dest->index;
 190 }
 191
 192 /* Given two hash table entries, return true if they have the same
 193    jump threading path.  */
 194 inline int
 195 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 196 {
 197   vec<jump_thread_edge *> *path1 = p1->path;
 198   vec<jump_thread_edge *> *path2 = p2->path;
 199
 200   if (path1->length () != path2->length ())
 201     return false;
 202
 203   for (unsigned int i = 1; i < path1->length (); i++)
 204     {
 205       if ((*path1)[i]->type != (*path2)[i]->type
 206           || (*path1)[i]->e != (*path2)[i]->e)
 207         return false;
 208     }
 209
 210   return true;
 211 }
 212
 213 /* Rather than search all the edges in jump thread paths each time
 214    DOM is able to simply if control statement, we build a hash table
 215    with the deleted edges.  We only care about the address of the edge,
 216    not its contents.  */
 217 struct removed_edges : nofree_ptr_hash<edge_def>
 218 {
 219   static hashval_t hash (edge e) { return htab_hash_pointer (e); }
 220   static bool equal (edge e1, edge e2) { return e1 == e2; }
 221 };
 222
 223 static hash_table<removed_edges> *removed_edges;
 224
 225 /* Data structure of information to pass to hash table traversal routines.  */
 226 struct ssa_local_info_t
 227 {
 228   /* The current block we are working on.  */
 229   basic_block bb;
 230
 231   /* We only create a template block for the first duplicated block in a
 232      jump threading path as we may need many duplicates of that block.
 233
 234      The second duplicate block in a path is specific to that path.  Creating
 235      and sharing a template for that block is considerably more difficult.  */
 236   basic_block template_block;
 237
 238   /* Blocks duplicated for the thread.  */
 239   bitmap duplicate_blocks;
 240
 241   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 242   bool jumps_threaded;
 243
 244   /* When we have multiple paths through a joiner which reach different
 245      final destinations, then we may need to correct for potential
 246      profile insanities.  */
 247   bool need_profile_correction;
 248 };
 249
 250 /* Passes which use the jump threading code register jump threading
 251    opportunities as they are discovered.  We keep the registered
 252    jump threading opportunities in this vector as edge pairs
 253    (original_edge, target_edge).  */
 254 static vec<vec<jump_thread_edge *> *> paths;
 255
 256 /* When we start updating the CFG for threading, data necessary for jump
 257    threading is attached to the AUX field for the incoming edge.  Use these
 258    macros to access the underlying structure attached to the AUX field.  */
 259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 260
 261 /* Jump threading statistics.  */
 262
 263 struct thread_stats_d
 264 {
 265   unsigned long num_threaded_edges;
 266 };
 267
 268 struct thread_stats_d thread_stats;
 269
 270
 271 /* Remove the last statement in block BB if it is a control statement
 272    Also remove all outgoing edges except the edge which reaches DEST_BB.
 273    If DEST_BB is NULL, then remove all outgoing edges.  */
 274
 275 void
 276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 277 {
 278   gimple_stmt_iterator gsi;
 279   edge e;
 280   edge_iterator ei;
 281
 282   gsi = gsi_last_bb (bb);
 283
 284   /* If the duplicate ends with a control statement, then remove it.
 285
 286      Note that if we are duplicating the template block rather than the
 287      original basic block, then the duplicate might not have any real
 288      statements in it.  */
 289   if (!gsi_end_p (gsi)
 290       && gsi_stmt (gsi)
 291       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 292           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 293           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 294     gsi_remove (&gsi, true);
 295
 296   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 297     {
 298       if (e->dest != dest_bb)
 299         {
 300           free_dom_edge_info (e);
 301           remove_edge (e);
 302         }
 303       else
 304         {
 305           e->probability = profile_probability::always ();
 306           ei_next (&ei);
 307         }
 308     }
 309
 310   /* If the remaining edge is a loop exit, there must have
 311      a removed edge that was not a loop exit.
 312
 313      In that case BB and possibly other blocks were previously
 314      in the loop, but are now outside the loop.  Thus, we need
 315      to update the loop structures.  */
 316   if (single_succ_p (bb)
 317       && loop_outer (bb->loop_father)
 318       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
 319     loops_state_set (LOOPS_NEED_FIXUP);
 320 }
 321
 322 /* Create a duplicate of BB.  Record the duplicate block in an array
 323    indexed by COUNT stored in RD.  */
 324
 325 static void
 326 create_block_for_threading (basic_block bb,
 327                             struct redirection_data *rd,
 328                             unsigned int count,
 329                             bitmap *duplicate_blocks)
 330 {
 331   edge_iterator ei;
 332   edge e;
 333
 334   /* We can use the generic block duplication code and simply remove
 335      the stuff we do not need.  */
 336   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 337
 338   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 339     e->aux = NULL;
 340
 341   /* Zero out the profile, since the block is unreachable for now.  */
 342   rd->dup_blocks[count]->frequency = 0;
 343   rd->dup_blocks[count]->count = profile_count::uninitialized ();
 344   if (duplicate_blocks)
 345     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 346 }
 347
 348 /* Main data structure to hold information for duplicates of BB.  */
 349
 350 static hash_table<redirection_data> *redirection_data;
 351
 352 /* Given an outgoing edge E lookup and return its entry in our hash table.
 353
 354    If INSERT is true, then we insert the entry into the hash table if
 355    it is not already present.  INCOMING_EDGE is added to the list of incoming
 356    edges associated with E in the hash table.  */
 357
 358 static struct redirection_data *
 359 lookup_redirection_data (edge e, enum insert_option insert)
 360 {
 361   struct redirection_data **slot;
 362   struct redirection_data *elt;
 363   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 364
 365   /* Build a hash table element so we can see if E is already
 366      in the table.  */
 367   elt = XNEW (struct redirection_data);
 368   elt->path = path;
 369   elt->dup_blocks[0] = NULL;
 370   elt->dup_blocks[1] = NULL;
 371   elt->incoming_edges = NULL;
 372
 373   slot = redirection_data->find_slot (elt, insert);
 374
 375   /* This will only happen if INSERT is false and the entry is not
 376      in the hash table.  */
 377   if (slot == NULL)
 378     {
 379       free (elt);
 380       return NULL;
 381     }
 382
 383   /* This will only happen if E was not in the hash table and
 384      INSERT is true.  */
 385   if (*slot == NULL)
 386     {
 387       *slot = elt;
 388       elt->incoming_edges = XNEW (struct el);
 389       elt->incoming_edges->e = e;
 390       elt->incoming_edges->next = NULL;
 391       return elt;
 392     }
 393   /* E was in the hash table.  */
 394   else
 395     {
 396       /* Free ELT as we do not need it anymore, we will extract the
 397          relevant entry from the hash table itself.  */
 398       free (elt);
 399
 400       /* Get the entry stored in the hash table.  */
 401       elt = *slot;
 402
 403       /* If insertion was requested, then we need to add INCOMING_EDGE
 404          to the list of incoming edges associated with E.  */
 405       if (insert)
 406         {
 407           struct el *el = XNEW (struct el);
 408           el->next = elt->incoming_edges;
 409           el->e = e;
 410           elt->incoming_edges = el;
 411         }
 412
 413       return elt;
 414     }
 415 }
 416
 417 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 418    does not have a value associated with it.  */
 419
 420 static void
 421 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 422 {
 423   int src_idx = src_e->dest_idx;
 424   int tgt_idx = tgt_e->dest_idx;
 425
 426   /* Iterate over each PHI in e->dest.  */
 427   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 428                            gsi2 = gsi_start_phis (tgt_e->dest);
 429        !gsi_end_p (gsi);
 430        gsi_next (&gsi), gsi_next (&gsi2))
 431     {
 432       gphi *src_phi = gsi.phi ();
 433       gphi *dest_phi = gsi2.phi ();
 434       tree val = gimple_phi_arg_def (src_phi, src_idx);
 435       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 436
 437       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 438       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 439     }
 440 }
 441
 442 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 443    to see if it has constant value in a flow sensitive manner.  Set
 444    LOCUS to location of the constant phi arg and return the value.
 445    Return DEF directly if either PATH or idx is ZERO.  */
 446
 447 static tree
 448 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 449                          basic_block bb, int idx, source_location *locus)
 450 {
 451   tree arg;
 452   gphi *def_phi;
 453   basic_block def_bb;
 454
 455   if (path == NULL || idx == 0)
 456     return def;
 457
 458   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 459   if (!def_phi)
 460     return def;
 461
 462   def_bb = gimple_bb (def_phi);
 463   /* Don't propagate loop invariants into deeper loops.  */
 464   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 465     return def;
 466
 467   /* Backtrack jump threading path from IDX to see if def has constant
 468      value.  */
 469   for (int j = idx - 1; j >= 0; j--)
 470     {
 471       edge e = (*path)[j]->e;
 472       if (e->dest == def_bb)
 473         {
 474           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 475           if (is_gimple_min_invariant (arg))
 476             {
 477               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 478               return arg;
 479             }
 480           break;
 481         }
 482     }
 483
 484   return def;
 485 }
 486
 487 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 488    Try to backtrack jump threading PATH from node IDX to see if the arg
 489    has constant value, copy constant value instead of argument itself
 490    if yes.  */
 491
 492 static void
 493 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 494                vec<jump_thread_edge *> *path, int idx)
 495 {
 496   gphi_iterator gsi;
 497   int src_indx = src_e->dest_idx;
 498
 499   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 500     {
 501       gphi *phi = gsi.phi ();
 502       tree def = gimple_phi_arg_def (phi, src_indx);
 503       source_location locus = gimple_phi_arg_location (phi, src_indx);
 504
 505       if (TREE_CODE (def) == SSA_NAME
 506           && !virtual_operand_p (gimple_phi_result (phi)))
 507         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 508
 509       add_phi_arg (phi, def, tgt_e, locus);
 510     }
 511 }
 512
 513 /* We have recently made a copy of ORIG_BB, including its outgoing
 514    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 515    ORIG_BB has a new argument associated with edge from NEW_BB to the
 516    successor.  Initialize the PHI argument so that it is equal to the PHI
 517    argument associated with the edge from ORIG_BB to the successor.
 518    PATH and IDX are used to check if the new PHI argument has constant
 519    value in a flow sensitive manner.  */
 520
 521 static void
 522 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 523                          vec<jump_thread_edge *> *path, int idx)
 524 {
 525   edge_iterator ei;
 526   edge e;
 527
 528   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 529     {
 530       edge e2 = find_edge (new_bb, e->dest);
 531       copy_phi_args (e->dest, e, e2, path, idx);
 532     }
 533 }
 534
 535 /* Given a duplicate block and its single destination (both stored
 536    in RD).  Create an edge between the duplicate and its single
 537    destination.
 538
 539    Add an additional argument to any PHI nodes at the single
 540    destination.  IDX is the start node in jump threading path
 541    we start to check to see if the new PHI argument has constant
 542    value along the jump threading path.  */
 543
 544 static void
 545 create_edge_and_update_destination_phis (struct redirection_data *rd,
 546                                          basic_block bb, int idx)
 547 {
 548   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 549
 550   rescan_loop_exit (e, true, false);
 551
 552   /* We used to copy the thread path here.  That was added in 2007
 553      and dutifully updated through the representation changes in 2013.
 554
 555      In 2013 we added code to thread from an interior node through
 556      the backedge to another interior node.  That runs after the code
 557      to thread through loop headers from outside the loop.
 558
 559      The latter may delete edges in the CFG, including those
 560      which appeared in the jump threading path we copied here.  Thus
 561      we'd end up using a dangling pointer.
 562
 563      After reviewing the 2007/2011 code, I can't see how anything
 564      depended on copying the AUX field and clearly copying the jump
 565      threading path is problematical due to embedded edge pointers.
 566      It has been removed.  */
 567   e->aux = NULL;
 568
 569   /* If there are any PHI nodes at the destination of the outgoing edge
 570      from the duplicate block, then we will need to add a new argument
 571      to them.  The argument should have the same value as the argument
 572      associated with the outgoing edge stored in RD.  */
 573   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 574 }
 575
 576 /* Look through PATH beginning at START and return TRUE if there are
 577    any additional blocks that need to be duplicated.  Otherwise,
 578    return FALSE.  */
 579 static bool
 580 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 581                                  unsigned int start)
 582 {
 583   for (unsigned int i = start + 1; i < path->length (); i++)
 584     {
 585       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 586           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 587         return true;
 588     }
 589   return false;
 590 }
 591
 592
 593 /* Compute the amount of profile count/frequency coming into the jump threading
 594    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 595    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 596    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 597    identify blocks duplicated for jump threading, which have duplicated
 598    edges that need to be ignored in the analysis.  Return true if path contains
 599    a joiner, false otherwise.
 600
 601    In the non-joiner case, this is straightforward - all the counts/frequency
 602    flowing into the jump threading path should flow through the duplicated
 603    block and out of the duplicated path.
 604
 605    In the joiner case, it is very tricky.  Some of the counts flowing into
 606    the original path go offpath at the joiner.  The problem is that while
 607    we know how much total count goes off-path in the original control flow,
 608    we don't know how many of the counts corresponding to just the jump
 609    threading path go offpath at the joiner.
 610
 611    For example, assume we have the following control flow and identified
 612    jump threading paths:
 613
 614                 A     B     C
 615                  \    |    /
 616                Ea \   |Eb / Ec
 617                    \  |  /
 618                     v v v
 619                       J       <-- Joiner
 620                      / \
 621                 Eoff/   \Eon
 622                    /     \
 623                   v       v
 624                 Soff     Son  <--- Normal
 625                          /\
 626                       Ed/  \ Ee
 627                        /    \
 628                       v     v
 629                       D      E
 630
 631             Jump threading paths: A -> J -> Son -> D (path 1)
 632                                   C -> J -> Son -> E (path 2)
 633
 634    Note that the control flow could be more complicated:
 635    - Each jump threading path may have more than one incoming edge.  I.e. A and
 636    Ea could represent multiple incoming blocks/edges that are included in
 637    path 1.
 638    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 639    before or after the "normal" copy block).  These are not duplicated onto
 640    the jump threading path, as they are single-successor.
 641    - Any of the blocks along the path may have other incoming edges that
 642    are not part of any jump threading path, but add profile counts along
 643    the path.
 644
 645    In the above example, after all jump threading is complete, we will
 646    end up with the following control flow:
 647
 648                 A          B           C
 649                 |          |           |
 650               Ea|          |Eb         |Ec
 651                 |          |           |
 652                 v          v           v
 653                Ja          J          Jc
 654                / \        / \Eon'     / \
 655           Eona/   \   ---/---\--------   \Eonc
 656              /     \ /  /     \           \
 657             v       v  v       v          v
 658            Sona     Soff      Son       Sonc
 659              \                 /\         /
 660               \___________    /  \  _____/
 661                           \  /    \/
 662                            vv      v
 663                             D      E
 664
 665    The main issue to notice here is that when we are processing path 1
 666    (A->J->Son->D) we need to figure out the outgoing edge weights to
 667    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 668    sum of the incoming weights to D remain Ed.  The problem with simply
 669    assuming that Ja (and Jc when processing path 2) has the same outgoing
 670    probabilities to its successors as the original block J, is that after
 671    all paths are processed and other edges/counts removed (e.g. none
 672    of Ec will reach D after processing path 2), we may end up with not
 673    enough count flowing along duplicated edge Sona->D.
 674
 675    Therefore, in the case of a joiner, we keep track of all counts
 676    coming in along the current path, as well as from predecessors not
 677    on any jump threading path (Eb in the above example).  While we
 678    first assume that the duplicated Eona for Ja->Sona has the same
 679    probability as the original, we later compensate for other jump
 680    threading paths that may eliminate edges.  We do that by keep track
 681    of all counts coming into the original path that are not in a jump
 682    thread (Eb in the above example, but as noted earlier, there could
 683    be other predecessors incoming to the path at various points, such
 684    as at Son).  Call this cumulative non-path count coming into the path
 685    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 686    least as big as (Ed - Enonpath), but no bigger than the minimum
 687    weight along the jump threading path.  The probabilities of both the
 688    original and duplicated joiner block J and Ja will be adjusted
 689    accordingly after the updates.  */
 690
 691 static bool
 692 compute_path_counts (struct redirection_data *rd,
 693                      ssa_local_info_t *local_info,
 694                      profile_count *path_in_count_ptr,
 695                      profile_count *path_out_count_ptr,
 696                      int *path_in_freq_ptr)
 697 {
 698   edge e = rd->incoming_edges->e;
 699   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 700   edge elast = path->last ()->e;
 701   profile_count nonpath_count = profile_count::zero ();
 702   bool has_joiner = false;
 703   profile_count path_in_count = profile_count::zero ();
 704   int path_in_freq = 0;
 705
 706   /* Start by accumulating incoming edge counts to the path's first bb
 707      into a couple buckets:
 708         path_in_count: total count of incoming edges that flow into the
 709                   current path.
 710         nonpath_count: total count of incoming edges that are not
 711                   flowing along *any* path.  These are the counts
 712                   that will still flow along the original path after
 713                   all path duplication is done by potentially multiple
 714                   calls to this routine.
 715      (any other incoming edge counts are for a different jump threading
 716      path that will be handled by a later call to this routine.)
 717      To make this easier, start by recording all incoming edges that flow into
 718      the current path in a bitmap.  We could add up the path's incoming edge
 719      counts here, but we still need to walk all the first bb's incoming edges
 720      below to add up the counts of the other edges not included in this jump
 721      threading path.  */
 722   struct el *next, *el;
 723   auto_bitmap in_edge_srcs;
 724   for (el = rd->incoming_edges; el; el = next)
 725     {
 726       next = el->next;
 727       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 728     }
 729   edge ein;
 730   edge_iterator ei;
 731   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 732     {
 733       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 734       /* Simply check the incoming edge src against the set captured above.  */
 735       if (ein_path
 736           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 737         {
 738           /* It is necessary but not sufficient that the last path edges
 739              are identical.  There may be different paths that share the
 740              same last path edge in the case where the last edge has a nocopy
 741              source block.  */
 742           gcc_assert (ein_path->last ()->e == elast);
 743           path_in_count += ein->count ();
 744           path_in_freq += EDGE_FREQUENCY (ein);
 745         }
 746       else if (!ein_path)
 747         {
 748           /* Keep track of the incoming edges that are not on any jump-threading
 749              path.  These counts will still flow out of original path after all
 750              jump threading is complete.  */
 751             nonpath_count += ein->count ();
 752         }
 753     }
 754
 755   /* This is needed due to insane incoming frequencies.  */
 756   if (path_in_freq > BB_FREQ_MAX)
 757     path_in_freq = BB_FREQ_MAX;
 758
 759   /* Now compute the fraction of the total count coming into the first
 760      path bb that is from the current threading path.  */
 761   profile_count total_count = e->dest->count;
 762   /* Handle incoming profile insanities.  */
 763   if (total_count < path_in_count)
 764     path_in_count = total_count;
 765   profile_probability onpath_scale = path_in_count.probability_in (total_count);
 766
 767   /* Walk the entire path to do some more computation in order to estimate
 768      how much of the path_in_count will flow out of the duplicated threading
 769      path.  In the non-joiner case this is straightforward (it should be
 770      the same as path_in_count, although we will handle incoming profile
 771      insanities by setting it equal to the minimum count along the path).
 772
 773      In the joiner case, we need to estimate how much of the path_in_count
 774      will stay on the threading path after the joiner's conditional branch.
 775      We don't really know for sure how much of the counts
 776      associated with this path go to each successor of the joiner, but we'll
 777      estimate based on the fraction of the total count coming into the path
 778      bb was from the threading paths (computed above in onpath_scale).
 779      Afterwards, we will need to do some fixup to account for other threading
 780      paths and possible profile insanities.
 781
 782      In order to estimate the joiner case's counts we also need to update
 783      nonpath_count with any additional counts coming into the path.  Other
 784      blocks along the path may have additional predecessors from outside
 785      the path.  */
 786   profile_count path_out_count = path_in_count;
 787   profile_count min_path_count = path_in_count;
 788   for (unsigned int i = 1; i < path->length (); i++)
 789     {
 790       edge epath = (*path)[i]->e;
 791       profile_count cur_count = epath->count ();
 792       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 793         {
 794           has_joiner = true;
 795           cur_count = cur_count.apply_probability (onpath_scale);
 796         }
 797       /* In the joiner case we need to update nonpath_count for any edges
 798          coming into the path that will contribute to the count flowing
 799          into the path successor.  */
 800       if (has_joiner && epath != elast)
 801         {
 802           /* Look for other incoming edges after joiner.  */
 803           FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 804             {
 805               if (ein != epath
 806                   /* Ignore in edges from blocks we have duplicated for a
 807                      threading path, which have duplicated edge counts until
 808                      they are redirected by an invocation of this routine.  */
 809                   && !bitmap_bit_p (local_info->duplicate_blocks,
 810                                     ein->src->index))
 811                 nonpath_count += ein->count ();
 812             }
 813         }
 814       if (cur_count < path_out_count)
 815         path_out_count = cur_count;
 816       if (epath->count () < min_path_count)
 817         min_path_count = epath->count ();
 818     }
 819
 820   /* We computed path_out_count above assuming that this path targeted
 821      the joiner's on-path successor with the same likelihood as it
 822      reached the joiner.  However, other thread paths through the joiner
 823      may take a different path through the normal copy source block
 824      (i.e. they have a different elast), meaning that they do not
 825      contribute any counts to this path's elast.  As a result, it may
 826      turn out that this path must have more count flowing to the on-path
 827      successor of the joiner.  Essentially, all of this path's elast
 828      count must be contributed by this path and any nonpath counts
 829      (since any path through the joiner with a different elast will not
 830      include a copy of this elast in its duplicated path).
 831      So ensure that this path's path_out_count is at least the
 832      difference between elast->count () and nonpath_count.  Otherwise the edge
 833      counts after threading will not be sane.  */
 834   if (local_info->need_profile_correction
 835       && has_joiner && path_out_count < elast->count () - nonpath_count)
 836     {
 837       path_out_count = elast->count () - nonpath_count;
 838       /* But neither can we go above the minimum count along the path
 839          we are duplicating.  This can be an issue due to profile
 840          insanities coming in to this pass.  */
 841       if (path_out_count > min_path_count)
 842         path_out_count = min_path_count;
 843     }
 844
 845   *path_in_count_ptr = path_in_count;
 846   *path_out_count_ptr = path_out_count;
 847   *path_in_freq_ptr = path_in_freq;
 848   return has_joiner;
 849 }
 850
 851
 852 /* Update the counts and frequencies for both an original path
 853    edge EPATH and its duplicate EDUP.  The duplicate source block
 854    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 855    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 856 static void
 857 update_profile (edge epath, edge edup, profile_count path_in_count,
 858                 profile_count path_out_count, int path_in_freq)
 859 {
 860   if (!(path_in_count > 0))
 861     return;
 862
 863   /* First update the duplicated block's count / frequency.  */
 864   if (edup)
 865     {
 866       basic_block dup_block = edup->src;
 867
 868       /* Edup's count is reduced by path_out_count.  We need to redistribute
 869          probabilities to the remaining edges.  */
 870
 871       edge esucc;
 872       edge_iterator ei;
 873       profile_probability edup_prob
 874          = path_out_count.probability_in (path_in_count);
 875
 876       /* Either scale up or down the remaining edges.
 877          probabilities are always in range <0,1> and thus we can't do
 878          both by same loop.  */
 879       if (edup->probability > edup_prob)
 880         {
 881            profile_probability rev_scale
 882              = (profile_probability::always () - edup->probability)
 883                / (profile_probability::always () - edup_prob);
 884            FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 885              if (esucc != edup)
 886                esucc->probability /= rev_scale;
 887         }
 888       else if (edup->probability < edup_prob)
 889         {
 890            profile_probability scale
 891              = (profile_probability::always () - edup_prob)
 892                / (profile_probability::always () - edup->probability);
 893           FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 894             if (esucc != edup)
 895               esucc->probability *= scale;
 896         }
 897       edup->probability = edup_prob;
 898
 899       /* FIXME once freqs_to_counts is dropped re-enable this check.  */
 900       gcc_assert (!dup_block->count.initialized_p () || 1);
 901       gcc_assert (dup_block->frequency == 0);
 902       dup_block->count = path_in_count;
 903       dup_block->frequency = path_in_freq;
 904     }
 905
 906   profile_count final_count = epath->count () - path_out_count;
 907
 908   /* Now update the original block's count and frequency in the
 909      opposite manner - remove the counts/freq that will flow
 910      into the duplicated block.  Handle underflow due to precision/
 911      rounding issues.  */
 912   epath->src->count -= path_in_count;
 913   epath->src->frequency -= path_in_freq;
 914   if (epath->src->frequency < 0)
 915     epath->src->frequency = 0;
 916
 917   /* Next update this path edge's original and duplicated counts.  We know
 918      that the duplicated path will have path_out_count flowing
 919      out of it (in the joiner case this is the count along the duplicated path
 920      out of the duplicated joiner).  This count can then be removed from the
 921      original path edge.  */
 922   if (epath->src->count > 0)
 923     {
 924       edge esucc;
 925       edge_iterator ei;
 926       profile_probability epath_prob = final_count.probability_in (epath->src->count);
 927
 928       if (epath->probability > epath_prob)
 929         {
 930            profile_probability rev_scale
 931              = (profile_probability::always () - epath->probability)
 932                / (profile_probability::always () - epath_prob);
 933            FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 934              if (esucc != epath)
 935                esucc->probability /= rev_scale;
 936         }
 937       else if (epath->probability < epath_prob)
 938         {
 939            profile_probability scale
 940              = (profile_probability::always () - epath_prob)
 941                / (profile_probability::always () - epath->probability);
 942           FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 943             if (esucc != epath)
 944               esucc->probability *= scale;
 945         }
 946       epath->probability = epath_prob;
 947     }
 948 }
 949
 950
 951 /* Check if the paths through RD all have estimated frequencies but zero
 952    profile counts.  This is more accurate than checking the entry block
 953    for a zero profile count, since profile insanities sometimes creep in.  */
 954
 955 static bool
 956 estimated_freqs_path (struct redirection_data *rd)
 957 {
 958   edge e = rd->incoming_edges->e;
 959   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 960   edge ein;
 961   edge_iterator ei;
 962   bool non_zero_freq = false;
 963   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 964     {
 965       if (ein->count () > 0)
 966         return false;
 967       non_zero_freq |= ein->src->frequency != 0;
 968     }
 969
 970   for (unsigned int i = 1; i < path->length (); i++)
 971     {
 972       edge epath = (*path)[i]->e;
 973       if (epath->src->count > 0)
 974         return false;
 975       non_zero_freq |= epath->src->frequency != 0;
 976       edge esucc;
 977       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 978         {
 979           if (esucc->count () > 0)
 980             return false;
 981           non_zero_freq |= esucc->src->frequency != 0;
 982         }
 983     }
 984   return non_zero_freq;
 985 }
 986
 987
 988 /* Invoked for routines that have guessed frequencies and no profile
 989    counts to record the block and edge frequencies for paths through RD
 990    in the profile count fields of those blocks and edges.  This is because
 991    ssa_fix_duplicate_block_edges incrementally updates the block and
 992    edge counts as edges are redirected, and it is difficult to do that
 993    for edge frequencies which are computed on the fly from the source
 994    block frequency and probability.  When a block frequency is updated
 995    its outgoing edge frequencies are affected and become difficult to
 996    adjust.  */
 997
 998 static void
 999 freqs_to_counts_path (struct redirection_data *rd)
1000 {
1001   edge e = rd->incoming_edges->e;
1002   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1003   edge ein;
1004   edge_iterator ei;
1005
1006   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1007     ein->src->count = profile_count::from_gcov_type
1008           (ein->src->frequency * REG_BR_PROB_BASE);
1009   for (unsigned int i = 1; i < path->length (); i++)
1010     {
1011       edge epath = (*path)[i]->e;
1012       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1013          errors applying the edge probability when the frequencies are very
1014          small.  */
1015       epath->src->count =
1016         profile_count::from_gcov_type
1017           (epath->src->frequency * REG_BR_PROB_BASE);
1018     }
1019 }
1020
1021
1022 /* For routines that have guessed frequencies and no profile counts, where we
1023    used freqs_to_counts_path to record block and edge frequencies for paths
1024    through RD, we clear the counts after completing all updates for RD.
1025    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1026    but the block frequencies and edge probabilities were updated as well,
1027    so we can simply clear the count fields.  */
1028
1029 static void
1030 clear_counts_path (struct redirection_data *rd)
1031 {
1032   edge e = rd->incoming_edges->e;
1033   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1034   profile_count val = profile_count::uninitialized ();
1035   if (profile_status_for_fn (cfun) == PROFILE_READ)
1036     val = profile_count::zero ();
1037
1038   edge ein;
1039   edge_iterator ei;
1040
1041   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1042     ein->src->count = val;
1043
1044   /* First clear counts along original path.  */
1045   for (unsigned int i = 1; i < path->length (); i++)
1046     {
1047       edge epath = (*path)[i]->e;
1048       epath->src->count = val;
1049     }
1050   /* Also need to clear the counts along duplicated path.  */
1051   for (unsigned int i = 0; i < 2; i++)
1052     {
1053       basic_block dup = rd->dup_blocks[i];
1054       if (!dup)
1055         continue;
1056       dup->count = val;
1057     }
1058 }
1059
1060 /* Wire up the outgoing edges from the duplicate blocks and
1061    update any PHIs as needed.  Also update the profile counts
1062    on the original and duplicate blocks and edges.  */
1063 void
1064 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1065                                ssa_local_info_t *local_info)
1066 {
1067   bool multi_incomings = (rd->incoming_edges->next != NULL);
1068   edge e = rd->incoming_edges->e;
1069   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1070   edge elast = path->last ()->e;
1071   profile_count path_in_count = profile_count::zero ();
1072   profile_count path_out_count = profile_count::zero ();
1073   int path_in_freq = 0;
1074
1075   /* This routine updates profile counts, frequencies, and probabilities
1076      incrementally. Since it is difficult to do the incremental updates
1077      using frequencies/probabilities alone, for routines without profile
1078      data we first take a snapshot of the existing block and edge frequencies
1079      by copying them into the empty profile count fields.  These counts are
1080      then used to do the incremental updates, and cleared at the end of this
1081      routine.  If the function is marked as having a profile, we still check
1082      to see if the paths through RD are using estimated frequencies because
1083      the routine had zero profile counts.  */
1084   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1085                              || estimated_freqs_path (rd));
1086   if (do_freqs_to_counts)
1087     freqs_to_counts_path (rd);
1088
1089   /* First determine how much profile count to move from original
1090      path to the duplicate path.  This is tricky in the presence of
1091      a joiner (see comments for compute_path_counts), where some portion
1092      of the path's counts will flow off-path from the joiner.  In the
1093      non-joiner case the path_in_count and path_out_count should be the
1094      same.  */
1095   bool has_joiner = compute_path_counts (rd, local_info,
1096                                          &path_in_count, &path_out_count,
1097                                          &path_in_freq);
1098
1099   int cur_path_freq = path_in_freq;
1100   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1101     {
1102       edge epath = (*path)[i]->e;
1103
1104       /* If we were threading through an joiner block, then we want
1105          to keep its control statement and redirect an outgoing edge.
1106          Else we want to remove the control statement & edges, then create
1107          a new outgoing edge.  In both cases we may need to update PHIs.  */
1108       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1109         {
1110           edge victim;
1111           edge e2;
1112
1113           gcc_assert (has_joiner);
1114
1115           /* This updates the PHIs at the destination of the duplicate
1116              block.  Pass 0 instead of i if we are threading a path which
1117              has multiple incoming edges.  */
1118           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1119                                    path, multi_incomings ? 0 : i);
1120
1121           /* Find the edge from the duplicate block to the block we're
1122              threading through.  That's the edge we want to redirect.  */
1123           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1124
1125           /* If there are no remaining blocks on the path to duplicate,
1126              then redirect VICTIM to the final destination of the jump
1127              threading path.  */
1128           if (!any_remaining_duplicated_blocks (path, i))
1129             {
1130               e2 = redirect_edge_and_branch (victim, elast->dest);
1131               /* If we redirected the edge, then we need to copy PHI arguments
1132                  at the target.  If the edge already existed (e2 != victim
1133                  case), then the PHIs in the target already have the correct
1134                  arguments.  */
1135               if (e2 == victim)
1136                 copy_phi_args (e2->dest, elast, e2,
1137                                path, multi_incomings ? 0 : i);
1138             }
1139           else
1140             {
1141               /* Redirect VICTIM to the next duplicated block in the path.  */
1142               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1143
1144               /* We need to update the PHIs in the next duplicated block.  We
1145                  want the new PHI args to have the same value as they had
1146                  in the source of the next duplicate block.
1147
1148                  Thus, we need to know which edge we traversed into the
1149                  source of the duplicate.  Furthermore, we may have
1150                  traversed many edges to reach the source of the duplicate.
1151
1152                  Walk through the path starting at element I until we
1153                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1154                  the edge from the prior element.  */
1155               for (unsigned int j = i + 1; j < path->length (); j++)
1156                 {
1157                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1158                     {
1159                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1160                       break;
1161                     }
1162                 }
1163             }
1164
1165           /* Update the counts and frequency of both the original block
1166              and path edge, and the duplicates.  The path duplicate's
1167              incoming count and frequency are the totals for all edges
1168              incoming to this jump threading path computed earlier.
1169              And we know that the duplicated path will have path_out_count
1170              flowing out of it (i.e. along the duplicated path out of the
1171              duplicated joiner).  */
1172           update_profile (epath, e2, path_in_count, path_out_count,
1173                           path_in_freq);
1174
1175           /* Record the frequency flowing to the downstream duplicated
1176              path blocks.  */
1177           cur_path_freq = EDGE_FREQUENCY (e2);
1178         }
1179       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1180         {
1181           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1182           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1183                                                    multi_incomings ? 0 : i);
1184           if (count == 1)
1185             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1186
1187           /* Update the counts and frequency of both the original block
1188              and path edge, and the duplicates.  Since we are now after
1189              any joiner that may have existed on the path, the count
1190              flowing along the duplicated threaded path is path_out_count.
1191              If we didn't have a joiner, then cur_path_freq was the sum
1192              of the total frequencies along all incoming edges to the
1193              thread path (path_in_freq).  If we had a joiner, it would have
1194              been updated at the end of that handling to the edge frequency
1195              along the duplicated joiner path edge.  */
1196           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1197                           path_out_count, path_out_count, cur_path_freq);
1198         }
1199       else
1200         {
1201           /* No copy case.  In this case we don't have an equivalent block
1202              on the duplicated thread path to update, but we do need
1203              to remove the portion of the counts/freqs that were moved
1204              to the duplicated path from the counts/freqs flowing through
1205              this block on the original path.  Since all the no-copy edges
1206              are after any joiner, the removed count is the same as
1207              path_out_count.
1208
1209              If we didn't have a joiner, then cur_path_freq was the sum
1210              of the total frequencies along all incoming edges to the
1211              thread path (path_in_freq).  If we had a joiner, it would have
1212              been updated at the end of that handling to the edge frequency
1213              along the duplicated joiner path edge.  */
1214            update_profile (epath, NULL, path_out_count, path_out_count,
1215                            cur_path_freq);
1216         }
1217
1218       /* Increment the index into the duplicated path when we processed
1219          a duplicated block.  */
1220       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1221           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1222         {
1223           count++;
1224         }
1225     }
1226
1227   /* Done with all profile and frequency updates, clear counts if they
1228      were copied.  */
1229   if (do_freqs_to_counts)
1230     clear_counts_path (rd);
1231 }
1232
1233 /* Hash table traversal callback routine to create duplicate blocks.  */
1234
1235 int
1236 ssa_create_duplicates (struct redirection_data **slot,
1237                        ssa_local_info_t *local_info)
1238 {
1239   struct redirection_data *rd = *slot;
1240
1241   /* The second duplicated block in a jump threading path is specific
1242      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1243
1244      Each time we're called, we have to look through the path and see
1245      if a second block needs to be duplicated.
1246
1247      Note the search starts with the third edge on the path.  The first
1248      edge is the incoming edge, the second edge always has its source
1249      duplicated.  Thus we start our search with the third edge.  */
1250   vec<jump_thread_edge *> *path = rd->path;
1251   for (unsigned int i = 2; i < path->length (); i++)
1252     {
1253       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1254           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1255         {
1256           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1257                                       &local_info->duplicate_blocks);
1258           break;
1259         }
1260     }
1261
1262   /* Create a template block if we have not done so already.  Otherwise
1263      use the template to create a new block.  */
1264   if (local_info->template_block == NULL)
1265     {
1266       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1267                                   &local_info->duplicate_blocks);
1268       local_info->template_block = rd->dup_blocks[0];
1269
1270       /* We do not create any outgoing edges for the template.  We will
1271          take care of that in a later traversal.  That way we do not
1272          create edges that are going to just be deleted.  */
1273     }
1274   else
1275     {
1276       create_block_for_threading (local_info->template_block, rd, 0,
1277                                   &local_info->duplicate_blocks);
1278
1279       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1280          block.   */
1281       ssa_fix_duplicate_block_edges (rd, local_info);
1282     }
1283
1284   /* Keep walking the hash table.  */
1285   return 1;
1286 }
1287
1288 /* We did not create any outgoing edges for the template block during
1289    block creation.  This hash table traversal callback creates the
1290    outgoing edge for the template block.  */
1291
1292 inline int
1293 ssa_fixup_template_block (struct redirection_data **slot,
1294                           ssa_local_info_t *local_info)
1295 {
1296   struct redirection_data *rd = *slot;
1297
1298   /* If this is the template block halt the traversal after updating
1299      it appropriately.
1300
1301      If we were threading through an joiner block, then we want
1302      to keep its control statement and redirect an outgoing edge.
1303      Else we want to remove the control statement & edges, then create
1304      a new outgoing edge.  In both cases we may need to update PHIs.  */
1305   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1306     {
1307       ssa_fix_duplicate_block_edges (rd, local_info);
1308       return 0;
1309     }
1310
1311   return 1;
1312 }
1313
1314 /* Hash table traversal callback to redirect each incoming edge
1315    associated with this hash table element to its new destination.  */
1316
1317 int
1318 ssa_redirect_edges (struct redirection_data **slot,
1319                     ssa_local_info_t *local_info)
1320 {
1321   struct redirection_data *rd = *slot;
1322   struct el *next, *el;
1323
1324   /* Walk over all the incoming edges associated with this hash table
1325      entry.  */
1326   for (el = rd->incoming_edges; el; el = next)
1327     {
1328       edge e = el->e;
1329       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1330
1331       /* Go ahead and free this element from the list.  Doing this now
1332          avoids the need for another list walk when we destroy the hash
1333          table.  */
1334       next = el->next;
1335       free (el);
1336
1337       thread_stats.num_threaded_edges++;
1338
1339       if (rd->dup_blocks[0])
1340         {
1341           edge e2;
1342
1343           if (dump_file && (dump_flags & TDF_DETAILS))
1344             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1345                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1346
1347           /* Redirect the incoming edge (possibly to the joiner block) to the
1348              appropriate duplicate block.  */
1349           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1350           gcc_assert (e == e2);
1351           flush_pending_stmts (e2);
1352         }
1353
1354       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1355          to clear it will cause all kinds of unpleasant problems later.  */
1356       delete_jump_thread_path (path);
1357       e->aux = NULL;
1358
1359     }
1360
1361   /* Indicate that we actually threaded one or more jumps.  */
1362   if (rd->incoming_edges)
1363     local_info->jumps_threaded = true;
1364
1365   return 1;
1366 }
1367
1368 /* Return true if this block has no executable statements other than
1369    a simple ctrl flow instruction.  When the number of outgoing edges
1370    is one, this is equivalent to a "forwarder" block.  */
1371
1372 static bool
1373 redirection_block_p (basic_block bb)
1374 {
1375   gimple_stmt_iterator gsi;
1376
1377   /* Advance to the first executable statement.  */
1378   gsi = gsi_start_bb (bb);
1379   while (!gsi_end_p (gsi)
1380          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1381              || is_gimple_debug (gsi_stmt (gsi))
1382              || gimple_nop_p (gsi_stmt (gsi))
1383              || gimple_clobber_p (gsi_stmt (gsi))))
1384     gsi_next (&gsi);
1385
1386   /* Check if this is an empty block.  */
1387   if (gsi_end_p (gsi))
1388     return true;
1389
1390   /* Test that we've reached the terminating control statement.  */
1391   return gsi_stmt (gsi)
1392          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1393              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1394              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1395 }
1396
1397 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1398    is reached via one or more specific incoming edges, we know which
1399    outgoing edge from BB will be traversed.
1400
1401    We want to redirect those incoming edges to the target of the
1402    appropriate outgoing edge.  Doing so avoids a conditional branch
1403    and may expose new optimization opportunities.  Note that we have
1404    to update dominator tree and SSA graph after such changes.
1405
1406    The key to keeping the SSA graph update manageable is to duplicate
1407    the side effects occurring in BB so that those side effects still
1408    occur on the paths which bypass BB after redirecting edges.
1409
1410    We accomplish this by creating duplicates of BB and arranging for
1411    the duplicates to unconditionally pass control to one specific
1412    successor of BB.  We then revector the incoming edges into BB to
1413    the appropriate duplicate of BB.
1414
1415    If NOLOOP_ONLY is true, we only perform the threading as long as it
1416    does not affect the structure of the loops in a nontrivial way.
1417
1418    If JOINERS is true, then thread through joiner blocks as well.  */
1419
1420 static bool
1421 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1422 {
1423   /* E is an incoming edge into BB that we may or may not want to
1424      redirect to a duplicate of BB.  */
1425   edge e, e2;
1426   edge_iterator ei;
1427   ssa_local_info_t local_info;
1428
1429   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1430   local_info.need_profile_correction = false;
1431
1432   /* To avoid scanning a linear array for the element we need we instead
1433      use a hash table.  For normal code there should be no noticeable
1434      difference.  However, if we have a block with a large number of
1435      incoming and outgoing edges such linear searches can get expensive.  */
1436   redirection_data
1437     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1438
1439   /* Record each unique threaded destination into a hash table for
1440      efficient lookups.  */
1441   edge last = NULL;
1442   FOR_EACH_EDGE (e, ei, bb->preds)
1443     {
1444       if (e->aux == NULL)
1445         continue;
1446
1447       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1448
1449       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1450           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1451         continue;
1452
1453       e2 = path->last ()->e;
1454       if (!e2 || noloop_only)
1455         {
1456           /* If NOLOOP_ONLY is true, we only allow threading through the
1457              header of a loop to exit edges.  */
1458
1459           /* One case occurs when there was loop header buried in a jump
1460              threading path that crosses loop boundaries.  We do not try
1461              and thread this elsewhere, so just cancel the jump threading
1462              request by clearing the AUX field now.  */
1463           if (bb->loop_father != e2->src->loop_father
1464               && !loop_exit_edge_p (e2->src->loop_father, e2))
1465             {
1466               /* Since this case is not handled by our special code
1467                  to thread through a loop header, we must explicitly
1468                  cancel the threading request here.  */
1469               delete_jump_thread_path (path);
1470               e->aux = NULL;
1471               continue;
1472             }
1473
1474           /* Another case occurs when trying to thread through our
1475              own loop header, possibly from inside the loop.  We will
1476              thread these later.  */
1477           unsigned int i;
1478           for (i = 1; i < path->length (); i++)
1479             {
1480               if ((*path)[i]->e->src == bb->loop_father->header
1481                   && (!loop_exit_edge_p (bb->loop_father, e2)
1482                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1483                 break;
1484             }
1485
1486           if (i != path->length ())
1487             continue;
1488         }
1489
1490       /* Insert the outgoing edge into the hash table if it is not
1491          already in the hash table.  */
1492       lookup_redirection_data (e, INSERT);
1493
1494       /* When we have thread paths through a common joiner with different
1495          final destinations, then we may need corrections to deal with
1496          profile insanities.  See the big comment before compute_path_counts.  */
1497       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1498         {
1499           if (!last)
1500             last = e2;
1501           else if (e2 != last)
1502             local_info.need_profile_correction = true;
1503         }
1504     }
1505
1506   /* We do not update dominance info.  */
1507   free_dominance_info (CDI_DOMINATORS);
1508
1509   /* We know we only thread through the loop header to loop exits.
1510      Let the basic block duplication hook know we are not creating
1511      a multiple entry loop.  */
1512   if (noloop_only
1513       && bb == bb->loop_father->header)
1514     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1515
1516   /* Now create duplicates of BB.
1517
1518      Note that for a block with a high outgoing degree we can waste
1519      a lot of time and memory creating and destroying useless edges.
1520
1521      So we first duplicate BB and remove the control structure at the
1522      tail of the duplicate as well as all outgoing edges from the
1523      duplicate.  We then use that duplicate block as a template for
1524      the rest of the duplicates.  */
1525   local_info.template_block = NULL;
1526   local_info.bb = bb;
1527   local_info.jumps_threaded = false;
1528   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1529                             (&local_info);
1530
1531   /* The template does not have an outgoing edge.  Create that outgoing
1532      edge and update PHI nodes as the edge's target as necessary.
1533
1534      We do this after creating all the duplicates to avoid creating
1535      unnecessary edges.  */
1536   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1537                             (&local_info);
1538
1539   /* The hash table traversals above created the duplicate blocks (and the
1540      statements within the duplicate blocks).  This loop creates PHI nodes for
1541      the duplicated blocks and redirects the incoming edges into BB to reach
1542      the duplicates of BB.  */
1543   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1544                             (&local_info);
1545
1546   /* Done with this block.  Clear REDIRECTION_DATA.  */
1547   delete redirection_data;
1548   redirection_data = NULL;
1549
1550   if (noloop_only
1551       && bb == bb->loop_father->header)
1552     set_loop_copy (bb->loop_father, NULL);
1553
1554   BITMAP_FREE (local_info.duplicate_blocks);
1555   local_info.duplicate_blocks = NULL;
1556
1557   /* Indicate to our caller whether or not any jumps were threaded.  */
1558   return local_info.jumps_threaded;
1559 }
1560
1561 /* Wrapper for thread_block_1 so that we can first handle jump
1562    thread paths which do not involve copying joiner blocks, then
1563    handle jump thread paths which have joiner blocks.
1564
1565    By doing things this way we can be as aggressive as possible and
1566    not worry that copying a joiner block will create a jump threading
1567    opportunity.  */
1568
1569 static bool
1570 thread_block (basic_block bb, bool noloop_only)
1571 {
1572   bool retval;
1573   retval = thread_block_1 (bb, noloop_only, false);
1574   retval |= thread_block_1 (bb, noloop_only, true);
1575   return retval;
1576 }
1577
1578 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1579    from STOP and DBDS_CE_STOP.  */
1580
1581 static basic_block dbds_ce_stop;
1582 static bool
1583 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1584 {
1585   return (bb != (const_basic_block) stop
1586           && bb != dbds_ce_stop);
1587 }
1588
1589 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1590    returns the state.  */
1591
1592 enum bb_dom_status
1593 determine_bb_domination_status (struct loop *loop, basic_block bb)
1594 {
1595   basic_block *bblocks;
1596   unsigned nblocks, i;
1597   bool bb_reachable = false;
1598   edge_iterator ei;
1599   edge e;
1600
1601   /* This function assumes BB is a successor of LOOP->header.
1602      If that is not the case return DOMST_NONDOMINATING which
1603      is always safe.  */
1604     {
1605       bool ok = false;
1606
1607       FOR_EACH_EDGE (e, ei, bb->preds)
1608         {
1609           if (e->src == loop->header)
1610             {
1611               ok = true;
1612               break;
1613             }
1614         }
1615
1616       if (!ok)
1617         return DOMST_NONDOMINATING;
1618     }
1619
1620   if (bb == loop->latch)
1621     return DOMST_DOMINATING;
1622
1623   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1624      from it.  */
1625
1626   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1627   dbds_ce_stop = loop->header;
1628   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1629                                 bblocks, loop->num_nodes, bb);
1630   for (i = 0; i < nblocks; i++)
1631     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1632       {
1633         if (e->src == loop->header)
1634           {
1635             free (bblocks);
1636             return DOMST_NONDOMINATING;
1637           }
1638         if (e->src == bb)
1639           bb_reachable = true;
1640       }
1641
1642   free (bblocks);
1643   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1644 }
1645
1646 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1647    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1648    to the inside of the loop.  */
1649
1650 static bool
1651 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1652 {
1653   basic_block header = loop->header;
1654   edge e, tgt_edge, latch = loop_latch_edge (loop);
1655   edge_iterator ei;
1656   basic_block tgt_bb, atgt_bb;
1657   enum bb_dom_status domst;
1658
1659   /* We have already threaded through headers to exits, so all the threading
1660      requests now are to the inside of the loop.  We need to avoid creating
1661      irreducible regions (i.e., loops with more than one entry block), and
1662      also loop with several latch edges, or new subloops of the loop (although
1663      there are cases where it might be appropriate, it is difficult to decide,
1664      and doing it wrongly may confuse other optimizers).
1665
1666      We could handle more general cases here.  However, the intention is to
1667      preserve some information about the loop, which is impossible if its
1668      structure changes significantly, in a way that is not well understood.
1669      Thus we only handle few important special cases, in which also updating
1670      of the loop-carried information should be feasible:
1671
1672      1) Propagation of latch edge to a block that dominates the latch block
1673         of a loop.  This aims to handle the following idiom:
1674
1675         first = 1;
1676         while (1)
1677           {
1678             if (first)
1679               initialize;
1680             first = 0;
1681             body;
1682           }
1683
1684         After threading the latch edge, this becomes
1685
1686         first = 1;
1687         if (first)
1688           initialize;
1689         while (1)
1690           {
1691             first = 0;
1692             body;
1693           }
1694
1695         The original header of the loop is moved out of it, and we may thread
1696         the remaining edges through it without further constraints.
1697
1698      2) All entry edges are propagated to a single basic block that dominates
1699         the latch block of the loop.  This aims to handle the following idiom
1700         (normally created for "for" loops):
1701
1702         i = 0;
1703         while (1)
1704           {
1705             if (i >= 100)
1706               break;
1707             body;
1708             i++;
1709           }
1710
1711         This becomes
1712
1713         i = 0;
1714         while (1)
1715           {
1716             body;
1717             i++;
1718             if (i >= 100)
1719               break;
1720           }
1721      */
1722
1723   /* Threading through the header won't improve the code if the header has just
1724      one successor.  */
1725   if (single_succ_p (header))
1726     goto fail;
1727
1728   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1729     goto fail;
1730   else
1731     {
1732       tgt_bb = NULL;
1733       tgt_edge = NULL;
1734       FOR_EACH_EDGE (e, ei, header->preds)
1735         {
1736           if (!e->aux)
1737             {
1738               if (e == latch)
1739                 continue;
1740
1741               /* If latch is not threaded, and there is a header
1742                  edge that is not threaded, we would create loop
1743                  with multiple entries.  */
1744               goto fail;
1745             }
1746
1747           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1748
1749           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1750             goto fail;
1751           tgt_edge = (*path)[1]->e;
1752           atgt_bb = tgt_edge->dest;
1753           if (!tgt_bb)
1754             tgt_bb = atgt_bb;
1755           /* Two targets of threading would make us create loop
1756              with multiple entries.  */
1757           else if (tgt_bb != atgt_bb)
1758             goto fail;
1759         }
1760
1761       if (!tgt_bb)
1762         {
1763           /* There are no threading requests.  */
1764           return false;
1765         }
1766
1767       /* Redirecting to empty loop latch is useless.  */
1768       if (tgt_bb == loop->latch
1769           && empty_block_p (loop->latch))
1770         goto fail;
1771     }
1772
1773   /* The target block must dominate the loop latch, otherwise we would be
1774      creating a subloop.  */
1775   domst = determine_bb_domination_status (loop, tgt_bb);
1776   if (domst == DOMST_NONDOMINATING)
1777     goto fail;
1778   if (domst == DOMST_LOOP_BROKEN)
1779     {
1780       /* If the loop ceased to exist, mark it as such, and thread through its
1781          original header.  */
1782       mark_loop_for_removal (loop);
1783       return thread_block (header, false);
1784     }
1785
1786   if (tgt_bb->loop_father->header == tgt_bb)
1787     {
1788       /* If the target of the threading is a header of a subloop, we need
1789          to create a preheader for it, so that the headers of the two loops
1790          do not merge.  */
1791       if (EDGE_COUNT (tgt_bb->preds) > 2)
1792         {
1793           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1794           gcc_assert (tgt_bb != NULL);
1795         }
1796       else
1797         tgt_bb = split_edge (tgt_edge);
1798     }
1799
1800   basic_block new_preheader;
1801
1802   /* Now consider the case entry edges are redirected to the new entry
1803      block.  Remember one entry edge, so that we can find the new
1804      preheader (its destination after threading).  */
1805   FOR_EACH_EDGE (e, ei, header->preds)
1806     {
1807       if (e->aux)
1808         break;
1809     }
1810
1811   /* The duplicate of the header is the new preheader of the loop.  Ensure
1812      that it is placed correctly in the loop hierarchy.  */
1813   set_loop_copy (loop, loop_outer (loop));
1814
1815   thread_block (header, false);
1816   set_loop_copy (loop, NULL);
1817   new_preheader = e->dest;
1818
1819   /* Create the new latch block.  This is always necessary, as the latch
1820      must have only a single successor, but the original header had at
1821      least two successors.  */
1822   loop->latch = NULL;
1823   mfb_kj_edge = single_succ_edge (new_preheader);
1824   loop->header = mfb_kj_edge->dest;
1825   latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1826   loop->header = latch->dest;
1827   loop->latch = latch->src;
1828   return true;
1829
1830 fail:
1831   /* We failed to thread anything.  Cancel the requests.  */
1832   FOR_EACH_EDGE (e, ei, header->preds)
1833     {
1834       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1835
1836       if (path)
1837         {
1838           delete_jump_thread_path (path);
1839           e->aux = NULL;
1840         }
1841     }
1842   return false;
1843 }
1844
1845 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1846    PHI arguments associated with those edges are equal or there are no
1847    PHI arguments, otherwise return FALSE.  */
1848
1849 static bool
1850 phi_args_equal_on_edges (edge e1, edge e2)
1851 {
1852   gphi_iterator gsi;
1853   int indx1 = e1->dest_idx;
1854   int indx2 = e2->dest_idx;
1855
1856   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1857     {
1858       gphi *phi = gsi.phi ();
1859
1860       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1861                             gimple_phi_arg_def (phi, indx2), 0))
1862         return false;
1863     }
1864   return true;
1865 }
1866
1867 /* Walk through the registered jump threads and convert them into a
1868    form convenient for this pass.
1869
1870    Any block which has incoming edges threaded to outgoing edges
1871    will have its entry in THREADED_BLOCK set.
1872
1873    Any threaded edge will have its new outgoing edge stored in the
1874    original edge's AUX field.
1875
1876    This form avoids the need to walk all the edges in the CFG to
1877    discover blocks which need processing and avoids unnecessary
1878    hash table lookups to map from threaded edge to new target.  */
1879
1880 static void
1881 mark_threaded_blocks (bitmap threaded_blocks)
1882 {
1883   unsigned int i;
1884   bitmap_iterator bi;
1885   auto_bitmap tmp;
1886   basic_block bb;
1887   edge e;
1888   edge_iterator ei;
1889
1890   /* It is possible to have jump threads in which one is a subpath
1891      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
1892      block and (B, C), (C, D) where no joiner block exists.
1893
1894      When this occurs ignore the jump thread request with the joiner
1895      block.  It's totally subsumed by the simpler jump thread request.
1896
1897      This results in less block copying, simpler CFGs.  More importantly,
1898      when we duplicate the joiner block, B, in this case we will create
1899      a new threading opportunity that we wouldn't be able to optimize
1900      until the next jump threading iteration.
1901
1902      So first convert the jump thread requests which do not require a
1903      joiner block.  */
1904   for (i = 0; i < paths.length (); i++)
1905     {
1906       vec<jump_thread_edge *> *path = paths[i];
1907
1908       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1909         {
1910           edge e = (*path)[0]->e;
1911           e->aux = (void *)path;
1912           bitmap_set_bit (tmp, e->dest->index);
1913         }
1914     }
1915
1916   /* Now iterate again, converting cases where we want to thread
1917      through a joiner block, but only if no other edge on the path
1918      already has a jump thread attached to it.  We do this in two passes,
1919      to avoid situations where the order in the paths vec can hide overlapping
1920      threads (the path is recorded on the incoming edge, so we would miss
1921      cases where the second path starts at a downstream edge on the same
1922      path).  First record all joiner paths, deleting any in the unexpected
1923      case where there is already a path for that incoming edge.  */
1924   for (i = 0; i < paths.length ();)
1925     {
1926       vec<jump_thread_edge *> *path = paths[i];
1927
1928       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1929         {
1930           /* Attach the path to the starting edge if none is yet recorded.  */
1931           if ((*path)[0]->e->aux == NULL)
1932             {
1933               (*path)[0]->e->aux = path;
1934               i++;
1935             }
1936           else
1937             {
1938               paths.unordered_remove (i);
1939               if (dump_file && (dump_flags & TDF_DETAILS))
1940                 dump_jump_thread_path (dump_file, *path, false);
1941               delete_jump_thread_path (path);
1942             }
1943         }
1944       else
1945         {
1946           i++;
1947         }
1948     }
1949
1950   /* Second, look for paths that have any other jump thread attached to
1951      them, and either finish converting them or cancel them.  */
1952   for (i = 0; i < paths.length ();)
1953     {
1954       vec<jump_thread_edge *> *path = paths[i];
1955       edge e = (*path)[0]->e;
1956
1957       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
1958         {
1959           unsigned int j;
1960           for (j = 1; j < path->length (); j++)
1961             if ((*path)[j]->e->aux != NULL)
1962               break;
1963
1964           /* If we iterated through the entire path without exiting the loop,
1965              then we are good to go, record it.  */
1966           if (j == path->length ())
1967             {
1968               bitmap_set_bit (tmp, e->dest->index);
1969               i++;
1970             }
1971           else
1972             {
1973               e->aux = NULL;
1974               paths.unordered_remove (i);
1975               if (dump_file && (dump_flags & TDF_DETAILS))
1976                 dump_jump_thread_path (dump_file, *path, false);
1977               delete_jump_thread_path (path);
1978             }
1979         }
1980       else
1981         {
1982           i++;
1983         }
1984     }
1985
1986   /* If optimizing for size, only thread through block if we don't have
1987      to duplicate it or it's an otherwise empty redirection block.  */
1988   if (optimize_function_for_size_p (cfun))
1989     {
1990       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1991         {
1992           bb = BASIC_BLOCK_FOR_FN (cfun, i);
1993           if (EDGE_COUNT (bb->preds) > 1
1994               && !redirection_block_p (bb))
1995             {
1996               FOR_EACH_EDGE (e, ei, bb->preds)
1997                 {
1998                   if (e->aux)
1999                     {
2000                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2001                       delete_jump_thread_path (path);
2002                       e->aux = NULL;
2003                     }
2004                 }
2005             }
2006           else
2007             bitmap_set_bit (threaded_blocks, i);
2008         }
2009     }
2010   else
2011     bitmap_copy (threaded_blocks, tmp);
2012
2013   /* If we have a joiner block (J) which has two successors S1 and S2 and
2014      we are threading though S1 and the final destination of the thread
2015      is S2, then we must verify that any PHI nodes in S2 have the same
2016      PHI arguments for the edge J->S2 and J->S1->...->S2.
2017
2018      We used to detect this prior to registering the jump thread, but
2019      that prohibits propagation of edge equivalences into non-dominated
2020      PHI nodes as the equivalency test might occur before propagation.
2021
2022      This must also occur after we truncate any jump threading paths
2023      as this scenario may only show up after truncation.
2024
2025      This works for now, but will need improvement as part of the FSA
2026      optimization.
2027
2028      Note since we've moved the thread request data to the edges,
2029      we have to iterate on those rather than the threaded_edges vector.  */
2030   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2031     {
2032       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2033       FOR_EACH_EDGE (e, ei, bb->preds)
2034         {
2035           if (e->aux)
2036             {
2037               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2038               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2039
2040               if (have_joiner)
2041                 {
2042                   basic_block joiner = e->dest;
2043                   edge final_edge = path->last ()->e;
2044                   basic_block final_dest = final_edge->dest;
2045                   edge e2 = find_edge (joiner, final_dest);
2046
2047                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2048                     {
2049                       delete_jump_thread_path (path);
2050                       e->aux = NULL;
2051                     }
2052                 }
2053             }
2054         }
2055     }
2056
2057   /* Look for jump threading paths which cross multiple loop headers.
2058
2059      The code to thread through loop headers will change the CFG in ways
2060      that invalidate the cached loop iteration information.  So we must
2061      detect that case and wipe the cached information.  */
2062   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2063     {
2064       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2065       FOR_EACH_EDGE (e, ei, bb->preds)
2066         {
2067           if (e->aux)
2068             {
2069               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2070
2071               for (unsigned int i = 0, crossed_headers = 0;
2072                    i < path->length ();
2073                    i++)
2074                 {
2075                   basic_block dest = (*path)[i]->e->dest;
2076                   basic_block src = (*path)[i]->e->src;
2077                   /* If we enter a loop.  */
2078                   if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2079                     ++crossed_headers;
2080                   /* If we step from a block outside an irreducible region
2081                      to a block inside an irreducible region, then we have
2082                      crossed into a loop.  */
2083                   else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2084                            && (dest->flags & BB_IRREDUCIBLE_LOOP))
2085                       ++crossed_headers;
2086                   if (crossed_headers > 1)
2087                     {
2088                       vect_free_loop_info_assumptions
2089                         ((*path)[path->length () - 1]->e->dest->loop_father);
2090                       break;
2091                     }
2092                 }
2093             }
2094         }
2095     }
2096 }
2097
2098
2099 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2100    case of SEME Single Entry Multiple Exits region in which all nodes in the
2101    REGION have exactly one incoming edge.  The only exception is the first block
2102    that may not have been connected to the rest of the cfg yet.  */
2103
2104 DEBUG_FUNCTION void
2105 verify_jump_thread (basic_block *region, unsigned n_region)
2106 {
2107   for (unsigned i = 0; i < n_region; i++)
2108     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2109 }
2110
2111 /* Return true when BB is one of the first N items in BBS.  */
2112
2113 static inline bool
2114 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2115 {
2116   for (int i = 0; i < n; i++)
2117     if (bb == bbs[i])
2118       return true;
2119
2120   return false;
2121 }
2122
2123 /* Duplicates a jump-thread path of N_REGION basic blocks.
2124    The ENTRY edge is redirected to the duplicate of the region.
2125
2126    Remove the last conditional statement in the last basic block in the REGION,
2127    and create a single fallthru edge pointing to the same destination as the
2128    EXIT edge.
2129
2130    Returns false if it is unable to copy the region, true otherwise.  */
2131
2132 static bool
2133 duplicate_thread_path (edge entry, edge exit, basic_block *region,
2134                        unsigned n_region)
2135 {
2136   unsigned i;
2137   struct loop *loop = entry->dest->loop_father;
2138   edge exit_copy;
2139   edge redirected;
2140   int curr_freq;
2141   profile_count curr_count;
2142
2143   if (!can_copy_bbs_p (region, n_region))
2144     return false;
2145
2146   /* Some sanity checking.  Note that we do not check for all possible
2147      missuses of the functions.  I.e. if you ask to copy something weird,
2148      it will work, but the state of structures probably will not be
2149      correct.  */
2150   for (i = 0; i < n_region; i++)
2151     {
2152       /* We do not handle subloops, i.e. all the blocks must belong to the
2153          same loop.  */
2154       if (region[i]->loop_father != loop)
2155         return false;
2156     }
2157
2158   initialize_original_copy_tables ();
2159
2160   set_loop_copy (loop, loop);
2161
2162   basic_block *region_copy = XNEWVEC (basic_block, n_region);
2163   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2164             split_edge_bb_loc (entry), false);
2165
2166   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2167      following code ensures that all the edges exiting the jump-thread path are
2168      redirected back to the original code: these edges are exceptions
2169      invalidating the property that is propagated by executing all the blocks of
2170      the jump-thread path in order.  */
2171
2172   curr_count = entry->count ();
2173   curr_freq = EDGE_FREQUENCY (entry);
2174
2175   for (i = 0; i < n_region; i++)
2176     {
2177       edge e;
2178       edge_iterator ei;
2179       basic_block bb = region_copy[i];
2180
2181       /* Watch inconsistent profile.  */
2182       if (curr_count > region[i]->count)
2183         curr_count = region[i]->count;
2184       if (curr_freq > region[i]->frequency)
2185         curr_freq = region[i]->frequency;
2186       /* Scale current BB.  */
2187       if (region[i]->count > 0 && curr_count.initialized_p ())
2188         {
2189           /* In the middle of the path we only scale the frequencies.
2190              In last BB we need to update probabilities of outgoing edges
2191              because we know which one is taken at the threaded path.  */
2192           if (i + 1 != n_region)
2193             scale_bbs_frequencies_profile_count (region + i, 1,
2194                                                  region[i]->count - curr_count,
2195                                                  region[i]->count);
2196           else
2197             update_bb_profile_for_threading (region[i],
2198                                              curr_freq, curr_count,
2199                                              exit);
2200           scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2201                                                region_copy[i]->count);
2202         }
2203       else if (region[i]->frequency)
2204         {
2205           if (i + 1 != n_region)
2206             scale_bbs_frequencies_int (region + i, 1,
2207                                        region[i]->frequency - curr_freq,
2208                                        region[i]->frequency);
2209           else
2210             update_bb_profile_for_threading (region[i],
2211                                              curr_freq, curr_count,
2212                                              exit);
2213           scale_bbs_frequencies_int (region_copy + i, 1, curr_freq,
2214                                      region_copy[i]->frequency);
2215         }
2216
2217       if (single_succ_p (bb))
2218         {
2219           /* Make sure the successor is the next node in the path.  */
2220           gcc_assert (i + 1 == n_region
2221                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2222           if (i + 1 != n_region)
2223             {
2224               curr_freq = EDGE_FREQUENCY (single_succ_edge (bb));
2225               curr_count = single_succ_edge (bb)->count ();
2226             }
2227           continue;
2228         }
2229
2230       /* Special case the last block on the path: make sure that it does not
2231          jump back on the copied path, including back to itself.  */
2232       if (i + 1 == n_region)
2233         {
2234           FOR_EACH_EDGE (e, ei, bb->succs)
2235             if (bb_in_bbs (e->dest, region_copy, n_region))
2236               {
2237                 basic_block orig = get_bb_original (e->dest);
2238                 if (orig)
2239                   redirect_edge_and_branch_force (e, orig);
2240               }
2241           continue;
2242         }
2243
2244       /* Redirect all other edges jumping to non-adjacent blocks back to the
2245          original code.  */
2246       FOR_EACH_EDGE (e, ei, bb->succs)
2247         if (region_copy[i + 1] != e->dest)
2248           {
2249             basic_block orig = get_bb_original (e->dest);
2250             if (orig)
2251               redirect_edge_and_branch_force (e, orig);
2252           }
2253         else
2254           {
2255             curr_freq = EDGE_FREQUENCY (e);
2256             curr_count = e->count ();
2257           }
2258     }
2259
2260
2261   if (flag_checking)
2262     verify_jump_thread (region_copy, n_region);
2263
2264   /* Remove the last branch in the jump thread path.  */
2265   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2266
2267   /* And fixup the flags on the single remaining edge.  */
2268   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2269   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2270   fix_e->flags |= EDGE_FALLTHRU;
2271
2272   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2273
2274   if (e)
2275     {
2276       rescan_loop_exit (e, true, false);
2277       e->probability = profile_probability::always ();
2278     }
2279
2280   /* Redirect the entry and add the phi node arguments.  */
2281   if (entry->dest == loop->header)
2282     mark_loop_for_removal (loop);
2283   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2284   gcc_assert (redirected != NULL);
2285   flush_pending_stmts (entry);
2286
2287   /* Add the other PHI node arguments.  */
2288   add_phi_args_after_copy (region_copy, n_region, NULL);
2289
2290   free (region_copy);
2291
2292   free_original_copy_tables ();
2293   return true;
2294 }
2295
2296 /* Return true when PATH is a valid jump-thread path.  */
2297
2298 static bool
2299 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2300 {
2301   unsigned len = path->length ();
2302
2303   /* Check that the path is connected.  */
2304   for (unsigned int j = 0; j < len - 1; j++)
2305     {
2306       edge e = (*path)[j]->e;
2307       if (e->dest != (*path)[j+1]->e->src)
2308         return false;
2309     }
2310   return true;
2311 }
2312
2313 /* Remove any queued jump threads that include edge E.
2314
2315    We don't actually remove them here, just record the edges into ax
2316    hash table.  That way we can do the search once per iteration of
2317    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
2318
2319 void
2320 remove_jump_threads_including (edge_def *e)
2321 {
2322   if (!paths.exists ())
2323     return;
2324
2325   if (!removed_edges)
2326     removed_edges = new hash_table<struct removed_edges> (17);
2327
2328   edge *slot = removed_edges->find_slot (e, INSERT);
2329   *slot = e;
2330 }
2331
2332 /* Walk through all blocks and thread incoming edges to the appropriate
2333    outgoing edge for each edge pair recorded in THREADED_EDGES.
2334
2335    It is the caller's responsibility to fix the dominance information
2336    and rewrite duplicated SSA_NAMEs back into SSA form.
2337
2338    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2339    loop headers if it does not simplify the loop.
2340
2341    Returns true if one or more edges were threaded, false otherwise.  */
2342
2343 bool
2344 thread_through_all_blocks (bool may_peel_loop_headers)
2345 {
2346   bool retval = false;
2347   unsigned int i;
2348   bitmap_iterator bi;
2349   struct loop *loop;
2350   auto_bitmap threaded_blocks;
2351
2352   if (!paths.exists ())
2353     {
2354       retval = false;
2355       goto out;
2356     }
2357
2358   memset (&thread_stats, 0, sizeof (thread_stats));
2359
2360   /* Remove any paths that referenced removed edges.  */
2361   if (removed_edges)
2362     for (i = 0; i < paths.length (); )
2363       {
2364         unsigned int j;
2365         vec<jump_thread_edge *> *path = paths[i];
2366
2367         for (j = 0; j < path->length (); j++)
2368           {
2369             edge e = (*path)[j]->e;
2370             if (removed_edges->find_slot (e, NO_INSERT))
2371               break;
2372           }
2373
2374         if (j != path->length ())
2375           {
2376             delete_jump_thread_path (path);
2377             paths.unordered_remove (i);
2378             continue;
2379           }
2380         i++;
2381       }
2382
2383   /* Jump-thread all FSM threads before other jump-threads.  */
2384   for (i = 0; i < paths.length ();)
2385     {
2386       vec<jump_thread_edge *> *path = paths[i];
2387       edge entry = (*path)[0]->e;
2388
2389       /* Only code-generate FSM jump-threads in this loop.  */
2390       if ((*path)[0]->type != EDGE_FSM_THREAD)
2391         {
2392           i++;
2393           continue;
2394         }
2395
2396       /* Do not jump-thread twice from the same block.  */
2397       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2398           /* We may not want to realize this jump thread path
2399              for various reasons.  So check it first.  */
2400           || !valid_jump_thread_path (path))
2401         {
2402           /* Remove invalid FSM jump-thread paths.  */
2403           delete_jump_thread_path (path);
2404           paths.unordered_remove (i);
2405           continue;
2406         }
2407
2408       unsigned len = path->length ();
2409       edge exit = (*path)[len - 1]->e;
2410       basic_block *region = XNEWVEC (basic_block, len - 1);
2411
2412       for (unsigned int j = 0; j < len - 1; j++)
2413         region[j] = (*path)[j]->e->dest;
2414
2415       if (duplicate_thread_path (entry, exit, region, len - 1))
2416         {
2417           /* We do not update dominance info.  */
2418           free_dominance_info (CDI_DOMINATORS);
2419           bitmap_set_bit (threaded_blocks, entry->src->index);
2420           retval = true;
2421           thread_stats.num_threaded_edges++;
2422         }
2423
2424       delete_jump_thread_path (path);
2425       paths.unordered_remove (i);
2426       free (region);
2427     }
2428
2429   /* Remove from PATHS all the jump-threads starting with an edge already
2430      jump-threaded.  */
2431   for (i = 0; i < paths.length ();)
2432     {
2433       vec<jump_thread_edge *> *path = paths[i];
2434       edge entry = (*path)[0]->e;
2435
2436       /* Do not jump-thread twice from the same block.  */
2437       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2438         {
2439           delete_jump_thread_path (path);
2440           paths.unordered_remove (i);
2441         }
2442       else
2443         i++;
2444     }
2445
2446   bitmap_clear (threaded_blocks);
2447
2448   mark_threaded_blocks (threaded_blocks);
2449
2450   initialize_original_copy_tables ();
2451
2452   /* First perform the threading requests that do not affect
2453      loop structure.  */
2454   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2455     {
2456       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2457
2458       if (EDGE_COUNT (bb->preds) > 0)
2459         retval |= thread_block (bb, true);
2460     }
2461
2462   /* Then perform the threading through loop headers.  We start with the
2463      innermost loop, so that the changes in cfg we perform won't affect
2464      further threading.  */
2465   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2466     {
2467       if (!loop->header
2468           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2469         continue;
2470
2471       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2472     }
2473
2474   /* All jump threading paths should have been resolved at this
2475      point.  Verify that is the case.  */
2476   basic_block bb;
2477   FOR_EACH_BB_FN (bb, cfun)
2478     {
2479       edge_iterator ei;
2480       edge e;
2481       FOR_EACH_EDGE (e, ei, bb->preds)
2482         gcc_assert (e->aux == NULL);
2483     }
2484
2485   statistics_counter_event (cfun, "Jumps threaded",
2486                             thread_stats.num_threaded_edges);
2487
2488   free_original_copy_tables ();
2489
2490   paths.release ();
2491
2492   if (retval)
2493     loops_state_set (LOOPS_NEED_FIXUP);
2494
2495  out:
2496   delete removed_edges;
2497   removed_edges = NULL;
2498   return retval;
2499 }
2500
2501 /* Delete the jump threading path PATH.  We have to explicitly delete
2502    each entry in the vector, then the container.  */
2503
2504 void
2505 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2506 {
2507   for (unsigned int i = 0; i < path->length (); i++)
2508     delete (*path)[i];
2509   path->release();
2510   delete path;
2511 }
2512
2513 /* Register a jump threading opportunity.  We queue up all the jump
2514    threading opportunities discovered by a pass and update the CFG
2515    and SSA form all at once.
2516
2517    E is the edge we can thread, E2 is the new target edge, i.e., we
2518    are effectively recording that E->dest can be changed to E2->dest
2519    after fixing the SSA graph.  */
2520
2521 void
2522 register_jump_thread (vec<jump_thread_edge *> *path)
2523 {
2524   if (!dbg_cnt (registered_jump_thread))
2525     {
2526       delete_jump_thread_path (path);
2527       return;
2528     }
2529
2530   /* First make sure there are no NULL outgoing edges on the jump threading
2531      path.  That can happen for jumping to a constant address.  */
2532   for (unsigned int i = 0; i < path->length (); i++)
2533     {
2534       if ((*path)[i]->e == NULL)
2535         {
2536           if (dump_file && (dump_flags & TDF_DETAILS))
2537             {
2538               fprintf (dump_file,
2539                        "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2540               dump_jump_thread_path (dump_file, *path, false);
2541             }
2542
2543           delete_jump_thread_path (path);
2544           return;
2545         }
2546
2547       /* Only the FSM threader is allowed to thread across
2548          backedges in the CFG.  */
2549       if (flag_checking
2550           && (*path)[0]->type != EDGE_FSM_THREAD)
2551         gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0);
2552     }
2553
2554   if (dump_file && (dump_flags & TDF_DETAILS))
2555     dump_jump_thread_path (dump_file, *path, true);
2556
2557   if (!paths.exists ())
2558     paths.create (5);
2559
2560   paths.safe_push (path);
2561 }