gcc/tree-ssa-threadupdate.cc

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "cfghooks.h"
  27 #include "tree-pass.h"
  28 #include "ssa.h"
  29 #include "fold-const.h"
  30 #include "cfganal.h"
  31 #include "gimple-iterator.h"
  32 #include "tree-ssa.h"
  33 #include "tree-ssa-threadupdate.h"
  34 #include "cfgloop.h"
  35 #include "dbgcnt.h"
  36 #include "tree-cfg.h"
  37 #include "tree-vectorizer.h"
  38 #include "tree-pass.h"
  39
  40 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  41    one or more in-edges to B to instead reach the destination of an
  42    out-edge from B while preserving any side effects in B.
  43
  44    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  45    side effects of executing B.
  46
  47      1. Make a copy of B (including its outgoing edges and statements).  Call
  48         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  49
  50      2. Remove the control statement at the end of B' and all outgoing edges
  51         except B'->C.
  52
  53      3. Add a new argument to each PHI in C with the same value as the existing
  54         argument associated with edge B->C.  Associate the new PHI arguments
  55         with the edge B'->C.
  56
  57      4. For each PHI in B, find or create a PHI in B' with an identical
  58         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  59         value as the PHI in B associated with the edge A->B.  Associate
  60         the new argument in the PHI in B' with the edge A->B.
  61
  62      5. Change the edge A->B to A->B'.
  63
  64         5a. This automatically deletes any PHI arguments associated with the
  65             edge A->B in B.
  66
  67         5b. This automatically associates each new argument added in step 4
  68             with the edge A->B'.
  69
  70      6. Repeat for other incoming edges into B.
  71
  72      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  73
  74    Note that block duplication can be minimized by first collecting the
  75    set of unique destination blocks that the incoming edges should
  76    be threaded to.
  77
  78    We reduce the number of edges and statements we create by not copying all
  79    the outgoing edges and the control statement in step #1.  We instead create
  80    a template block without the outgoing edges and duplicate the template.
  81
  82    Another case this code handles is threading through a "joiner" block.  In
  83    this case, we do not know the destination of the joiner block, but one
  84    of the outgoing edges from the joiner block leads to a threadable path.  This
  85    case largely works as outlined above, except the duplicate of the joiner
  86    block still contains a full set of outgoing edges and its control statement.
  87    We just redirect one of its outgoing edges to our jump threading path.  */
  88
  89
  90 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  91    all the incoming edges which thread to the same destination edge at
  92    the same time.  That avoids lots of table lookups to get information
  93    for the destination edge.
  94
  95    To realize that implementation we create a list of incoming edges
  96    which thread to the same outgoing edge.  Thus to implement steps
  97    #5 and #6 we traverse our hash table of outgoing edge information.
  98    For each entry we walk the list of incoming edges which thread to
  99    the current outgoing edge.  */
 100
 101 struct el
 102 {
 103   edge e;
 104   struct el *next;
 105 };
 106
 107 /* Main data structure recording information regarding B's duplicate
 108    blocks.  */
 109
 110 /* We need to efficiently record the unique thread destinations of this
 111    block and specific information associated with those destinations.  We
 112    may have many incoming edges threaded to the same outgoing edge.  This
 113    can be naturally implemented with a hash table.  */
 114
 115 struct redirection_data : free_ptr_hash<redirection_data>
 116 {
 117   /* We support wiring up two block duplicates in a jump threading path.
 118
 119      One is a normal block copy where we remove the control statement
 120      and wire up its single remaining outgoing edge to the thread path.
 121
 122      The other is a joiner block where we leave the control statement
 123      in place, but wire one of the outgoing edges to a thread path.
 124
 125      In theory we could have multiple block duplicates in a jump
 126      threading path, but I haven't tried that.
 127
 128      The duplicate blocks appear in this array in the same order in
 129      which they appear in the jump thread path.  */
 130   basic_block dup_blocks[2];
 131
 132   vec<jump_thread_edge *> *path;
 133
 134   /* A list of incoming edges which we want to thread to the
 135      same path.  */
 136   struct el *incoming_edges;
 137
 138   /* hash_table support.  */
 139   static inline hashval_t hash (const redirection_data *);
 140   static inline int equal (const redirection_data *, const redirection_data *);
 141 };
 142
 143 jump_thread_path_allocator::jump_thread_path_allocator ()
 144 {
 145   obstack_init (&m_obstack);
 146 }
 147
 148 jump_thread_path_allocator::~jump_thread_path_allocator ()
 149 {
 150   obstack_free (&m_obstack, NULL);
 151 }
 152
 153 jump_thread_edge *
 154 jump_thread_path_allocator::allocate_thread_edge (edge e,
 155                                                   jump_thread_edge_type type)
 156 {
 157   void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
 158   return new (r) jump_thread_edge (e, type);
 159 }
 160
 161 vec<jump_thread_edge *> *
 162 jump_thread_path_allocator::allocate_thread_path ()
 163 {
 164   // ?? Since the paths live in an obstack, we should be able to remove all
 165   // references to path->release() throughout the code.
 166   void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
 167   return new (r) vec<jump_thread_edge *> ();
 168 }
 169
 170 jt_path_registry::jt_path_registry (bool backedge_threads)
 171 {
 172   m_paths.create (5);
 173   m_num_threaded_edges = 0;
 174   m_backedge_threads = backedge_threads;
 175 }
 176
 177 jt_path_registry::~jt_path_registry ()
 178 {
 179   m_paths.release ();
 180 }
 181
 182 fwd_jt_path_registry::fwd_jt_path_registry ()
 183   : jt_path_registry (/*backedge_threads=*/false)
 184 {
 185   m_removed_edges = new hash_table<struct removed_edges> (17);
 186   m_redirection_data = NULL;
 187 }
 188
 189 fwd_jt_path_registry::~fwd_jt_path_registry ()
 190 {
 191   delete m_removed_edges;
 192 }
 193
 194 back_jt_path_registry::back_jt_path_registry ()
 195   : jt_path_registry (/*backedge_threads=*/true)
 196 {
 197 }
 198
 199 void
 200 jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
 201                              edge e, jump_thread_edge_type type)
 202 {
 203   jump_thread_edge *x =  m_allocator.allocate_thread_edge (e, type);
 204   path->safe_push (x);
 205 }
 206
 207 vec<jump_thread_edge *> *
 208 jt_path_registry::allocate_thread_path ()
 209 {
 210   return m_allocator.allocate_thread_path ();
 211 }
 212
 213 /* Dump a jump threading path, including annotations about each
 214    edge in the path.  */
 215
 216 static void
 217 dump_jump_thread_path (FILE *dump_file,
 218                        const vec<jump_thread_edge *> &path,
 219                        bool registering)
 220 {
 221   if (registering)
 222     fprintf (dump_file,
 223              "  [%u] Registering jump thread: (%d, %d) incoming edge; ",
 224              dbg_cnt_counter (registered_jump_thread),
 225              path[0]->e->src->index, path[0]->e->dest->index);
 226   else
 227     fprintf (dump_file,
 228              "  Cancelling jump thread: (%d, %d) incoming edge; ",
 229              path[0]->e->src->index, path[0]->e->dest->index);
 230
 231   for (unsigned int i = 1; i < path.length (); i++)
 232     {
 233       /* We can get paths with a NULL edge when the final destination
 234          of a jump thread turns out to be a constant address.  We dump
 235          those paths when debugging, so we have to be prepared for that
 236          possibility here.  */
 237       if (path[i]->e == NULL)
 238         continue;
 239
 240       fprintf (dump_file, " (%d, %d) ",
 241                path[i]->e->src->index, path[i]->e->dest->index);
 242       switch (path[i]->type)
 243         {
 244         case EDGE_COPY_SRC_JOINER_BLOCK:
 245           fprintf (dump_file, "joiner");
 246           break;
 247         case EDGE_COPY_SRC_BLOCK:
 248           fprintf (dump_file, "normal");
 249           break;
 250         case EDGE_NO_COPY_SRC_BLOCK:
 251           fprintf (dump_file, "nocopy");
 252           break;
 253         default:
 254           gcc_unreachable ();
 255         }
 256
 257       if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
 258         fprintf (dump_file, " (back)");
 259     }
 260   fprintf (dump_file, "; \n");
 261 }
 262
 263 DEBUG_FUNCTION void
 264 debug (const vec<jump_thread_edge *> &path)
 265 {
 266   dump_jump_thread_path (stderr, path, true);
 267 }
 268
 269 DEBUG_FUNCTION void
 270 debug (const vec<jump_thread_edge *> *path)
 271 {
 272   debug (*path);
 273 }
 274
 275 /* Release the memory associated with PATH, and if dumping is enabled,
 276    dump out the reason why the thread was canceled.  */
 277
 278 static void
 279 cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
 280 {
 281   if (dump_file && (dump_flags & TDF_DETAILS))
 282     {
 283       if (reason)
 284         fprintf (dump_file, "%s: ", reason);
 285
 286       dump_jump_thread_path (dump_file, *path, false);
 287       fprintf (dump_file, "\n");
 288     }
 289   path->release ();
 290 }
 291
 292 /* Simple hashing function.  For any given incoming edge E, we're going
 293    to be most concerned with the final destination of its jump thread
 294    path.  So hash on the block index of the final edge in the path.  */
 295
 296 inline hashval_t
 297 redirection_data::hash (const redirection_data *p)
 298 {
 299   vec<jump_thread_edge *> *path = p->path;
 300   return path->last ()->e->dest->index;
 301 }
 302
 303 /* Given two hash table entries, return true if they have the same
 304    jump threading path.  */
 305 inline int
 306 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 307 {
 308   vec<jump_thread_edge *> *path1 = p1->path;
 309   vec<jump_thread_edge *> *path2 = p2->path;
 310
 311   if (path1->length () != path2->length ())
 312     return false;
 313
 314   for (unsigned int i = 1; i < path1->length (); i++)
 315     {
 316       if ((*path1)[i]->type != (*path2)[i]->type
 317           || (*path1)[i]->e != (*path2)[i]->e)
 318         return false;
 319     }
 320
 321   return true;
 322 }
 323
 324 /* Data structure of information to pass to hash table traversal routines.  */
 325 struct ssa_local_info_t
 326 {
 327   /* The current block we are working on.  */
 328   basic_block bb;
 329
 330   /* We only create a template block for the first duplicated block in a
 331      jump threading path as we may need many duplicates of that block.
 332
 333      The second duplicate block in a path is specific to that path.  Creating
 334      and sharing a template for that block is considerably more difficult.  */
 335   basic_block template_block;
 336
 337   /* If we append debug stmts to the template block after creating it,
 338      this iterator won't be the last one in the block, and further
 339      copies of the template block shouldn't get debug stmts after
 340      it.  */
 341   gimple_stmt_iterator template_last_to_copy;
 342
 343   /* Blocks duplicated for the thread.  */
 344   bitmap duplicate_blocks;
 345
 346   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 347   bool jumps_threaded;
 348
 349   /* When we have multiple paths through a joiner which reach different
 350      final destinations, then we may need to correct for potential
 351      profile insanities.  */
 352   bool need_profile_correction;
 353
 354   // Jump threading statistics.
 355   unsigned long num_threaded_edges;
 356 };
 357
 358 /* When we start updating the CFG for threading, data necessary for jump
 359    threading is attached to the AUX field for the incoming edge.  Use these
 360    macros to access the underlying structure attached to the AUX field.  */
 361 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 362
 363 /* Remove the last statement in block BB if it is a control statement
 364    Also remove all outgoing edges except the edge which reaches DEST_BB.
 365    If DEST_BB is NULL, then remove all outgoing edges.  */
 366
 367 static void
 368 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 369 {
 370   gimple_stmt_iterator gsi;
 371   edge e;
 372   edge_iterator ei;
 373
 374   gsi = gsi_last_bb (bb);
 375
 376   /* If the duplicate ends with a control statement, then remove it.
 377
 378      Note that if we are duplicating the template block rather than the
 379      original basic block, then the duplicate might not have any real
 380      statements in it.  */
 381   if (!gsi_end_p (gsi)
 382       && gsi_stmt (gsi)
 383       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 384           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 385           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 386     gsi_remove (&gsi, true);
 387
 388   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 389     {
 390       if (e->dest != dest_bb)
 391         {
 392           free_dom_edge_info (e);
 393           remove_edge (e);
 394         }
 395       else
 396         {
 397           e->probability = profile_probability::always ();
 398           ei_next (&ei);
 399         }
 400     }
 401
 402   /* If the remaining edge is a loop exit, there must have
 403      a removed edge that was not a loop exit.
 404
 405      In that case BB and possibly other blocks were previously
 406      in the loop, but are now outside the loop.  Thus, we need
 407      to update the loop structures.  */
 408   if (single_succ_p (bb)
 409       && loop_outer (bb->loop_father)
 410       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
 411     loops_state_set (LOOPS_NEED_FIXUP);
 412 }
 413
 414 /* Create a duplicate of BB.  Record the duplicate block in an array
 415    indexed by COUNT stored in RD.  */
 416
 417 static void
 418 create_block_for_threading (basic_block bb,
 419                             struct redirection_data *rd,
 420                             unsigned int count,
 421                             bitmap *duplicate_blocks)
 422 {
 423   edge_iterator ei;
 424   edge e;
 425
 426   /* We can use the generic block duplication code and simply remove
 427      the stuff we do not need.  */
 428   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 429
 430   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 431     {
 432       e->aux = NULL;
 433
 434       /* If we duplicate a block with an outgoing edge marked as
 435          EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
 436          leak out of the current pass.
 437
 438          It would be better to simplify switch statements and remove
 439          the edges before we get here, but the sequencing is nontrivial.  */
 440       e->flags &= ~EDGE_IGNORE;
 441     }
 442
 443   /* Zero out the profile, since the block is unreachable for now.  */
 444   rd->dup_blocks[count]->count = profile_count::uninitialized ();
 445   if (duplicate_blocks)
 446     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 447 }
 448
 449 /* Given an outgoing edge E lookup and return its entry in our hash table.
 450
 451    If INSERT is true, then we insert the entry into the hash table if
 452    it is not already present.  INCOMING_EDGE is added to the list of incoming
 453    edges associated with E in the hash table.  */
 454
 455 redirection_data *
 456 fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
 457 {
 458   struct redirection_data **slot;
 459   struct redirection_data *elt;
 460   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 461
 462   /* Build a hash table element so we can see if E is already
 463      in the table.  */
 464   elt = XNEW (struct redirection_data);
 465   elt->path = path;
 466   elt->dup_blocks[0] = NULL;
 467   elt->dup_blocks[1] = NULL;
 468   elt->incoming_edges = NULL;
 469
 470   slot = m_redirection_data->find_slot (elt, insert);
 471
 472   /* This will only happen if INSERT is false and the entry is not
 473      in the hash table.  */
 474   if (slot == NULL)
 475     {
 476       free (elt);
 477       return NULL;
 478     }
 479
 480   /* This will only happen if E was not in the hash table and
 481      INSERT is true.  */
 482   if (*slot == NULL)
 483     {
 484       *slot = elt;
 485       elt->incoming_edges = XNEW (struct el);
 486       elt->incoming_edges->e = e;
 487       elt->incoming_edges->next = NULL;
 488       return elt;
 489     }
 490   /* E was in the hash table.  */
 491   else
 492     {
 493       /* Free ELT as we do not need it anymore, we will extract the
 494          relevant entry from the hash table itself.  */
 495       free (elt);
 496
 497       /* Get the entry stored in the hash table.  */
 498       elt = *slot;
 499
 500       /* If insertion was requested, then we need to add INCOMING_EDGE
 501          to the list of incoming edges associated with E.  */
 502       if (insert)
 503         {
 504           struct el *el = XNEW (struct el);
 505           el->next = elt->incoming_edges;
 506           el->e = e;
 507           elt->incoming_edges = el;
 508         }
 509
 510       return elt;
 511     }
 512 }
 513
 514 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 515    does not have a value associated with it.  */
 516
 517 static void
 518 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 519 {
 520   int src_idx = src_e->dest_idx;
 521   int tgt_idx = tgt_e->dest_idx;
 522
 523   /* Iterate over each PHI in e->dest.  */
 524   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 525                            gsi2 = gsi_start_phis (tgt_e->dest);
 526        !gsi_end_p (gsi);
 527        gsi_next (&gsi), gsi_next (&gsi2))
 528     {
 529       gphi *src_phi = gsi.phi ();
 530       gphi *dest_phi = gsi2.phi ();
 531       tree val = gimple_phi_arg_def (src_phi, src_idx);
 532       location_t locus = gimple_phi_arg_location (src_phi, src_idx);
 533
 534       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 535       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 536     }
 537 }
 538
 539 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 540    to see if it has constant value in a flow sensitive manner.  Set
 541    LOCUS to location of the constant phi arg and return the value.
 542    Return DEF directly if either PATH or idx is ZERO.  */
 543
 544 static tree
 545 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 546                          basic_block bb, int idx, location_t *locus)
 547 {
 548   tree arg;
 549   gphi *def_phi;
 550   basic_block def_bb;
 551
 552   if (path == NULL || idx == 0)
 553     return def;
 554
 555   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 556   if (!def_phi)
 557     return def;
 558
 559   def_bb = gimple_bb (def_phi);
 560   /* Don't propagate loop invariants into deeper loops.  */
 561   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 562     return def;
 563
 564   /* Backtrack jump threading path from IDX to see if def has constant
 565      value.  */
 566   for (int j = idx - 1; j >= 0; j--)
 567     {
 568       edge e = (*path)[j]->e;
 569       if (e->dest == def_bb)
 570         {
 571           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 572           if (is_gimple_min_invariant (arg))
 573             {
 574               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 575               return arg;
 576             }
 577           break;
 578         }
 579     }
 580
 581   return def;
 582 }
 583
 584 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 585    Try to backtrack jump threading PATH from node IDX to see if the arg
 586    has constant value, copy constant value instead of argument itself
 587    if yes.  */
 588
 589 static void
 590 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 591                vec<jump_thread_edge *> *path, int idx)
 592 {
 593   gphi_iterator gsi;
 594   int src_indx = src_e->dest_idx;
 595
 596   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 597     {
 598       gphi *phi = gsi.phi ();
 599       tree def = gimple_phi_arg_def (phi, src_indx);
 600       location_t locus = gimple_phi_arg_location (phi, src_indx);
 601
 602       if (TREE_CODE (def) == SSA_NAME
 603           && !virtual_operand_p (gimple_phi_result (phi)))
 604         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 605
 606       add_phi_arg (phi, def, tgt_e, locus);
 607     }
 608 }
 609
 610 /* We have recently made a copy of ORIG_BB, including its outgoing
 611    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 612    ORIG_BB has a new argument associated with edge from NEW_BB to the
 613    successor.  Initialize the PHI argument so that it is equal to the PHI
 614    argument associated with the edge from ORIG_BB to the successor.
 615    PATH and IDX are used to check if the new PHI argument has constant
 616    value in a flow sensitive manner.  */
 617
 618 static void
 619 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 620                          vec<jump_thread_edge *> *path, int idx)
 621 {
 622   edge_iterator ei;
 623   edge e;
 624
 625   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 626     {
 627       edge e2 = find_edge (new_bb, e->dest);
 628       copy_phi_args (e->dest, e, e2, path, idx);
 629     }
 630 }
 631
 632 /* Given a duplicate block and its single destination (both stored
 633    in RD).  Create an edge between the duplicate and its single
 634    destination.
 635
 636    Add an additional argument to any PHI nodes at the single
 637    destination.  IDX is the start node in jump threading path
 638    we start to check to see if the new PHI argument has constant
 639    value along the jump threading path.  */
 640
 641 static void
 642 create_edge_and_update_destination_phis (struct redirection_data *rd,
 643                                          basic_block bb, int idx)
 644 {
 645   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 646
 647   rescan_loop_exit (e, true, false);
 648
 649   /* We used to copy the thread path here.  That was added in 2007
 650      and dutifully updated through the representation changes in 2013.
 651
 652      In 2013 we added code to thread from an interior node through
 653      the backedge to another interior node.  That runs after the code
 654      to thread through loop headers from outside the loop.
 655
 656      The latter may delete edges in the CFG, including those
 657      which appeared in the jump threading path we copied here.  Thus
 658      we'd end up using a dangling pointer.
 659
 660      After reviewing the 2007/2011 code, I can't see how anything
 661      depended on copying the AUX field and clearly copying the jump
 662      threading path is problematical due to embedded edge pointers.
 663      It has been removed.  */
 664   e->aux = NULL;
 665
 666   /* If there are any PHI nodes at the destination of the outgoing edge
 667      from the duplicate block, then we will need to add a new argument
 668      to them.  The argument should have the same value as the argument
 669      associated with the outgoing edge stored in RD.  */
 670   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 671 }
 672
 673 /* Look through PATH beginning at START and return TRUE if there are
 674    any additional blocks that need to be duplicated.  Otherwise,
 675    return FALSE.  */
 676 static bool
 677 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 678                                  unsigned int start)
 679 {
 680   for (unsigned int i = start + 1; i < path->length (); i++)
 681     {
 682       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 683           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 684         return true;
 685     }
 686   return false;
 687 }
 688
 689
 690 /* Compute the amount of profile count coming into the jump threading
 691    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 692    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 693    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 694    identify blocks duplicated for jump threading, which have duplicated
 695    edges that need to be ignored in the analysis.  Return true if path contains
 696    a joiner, false otherwise.
 697
 698    In the non-joiner case, this is straightforward - all the counts
 699    flowing into the jump threading path should flow through the duplicated
 700    block and out of the duplicated path.
 701
 702    In the joiner case, it is very tricky.  Some of the counts flowing into
 703    the original path go offpath at the joiner.  The problem is that while
 704    we know how much total count goes off-path in the original control flow,
 705    we don't know how many of the counts corresponding to just the jump
 706    threading path go offpath at the joiner.
 707
 708    For example, assume we have the following control flow and identified
 709    jump threading paths:
 710
 711                 A     B     C
 712                  \    |    /
 713                Ea \   |Eb / Ec
 714                    \  |  /
 715                     v v v
 716                       J       <-- Joiner
 717                      / \
 718                 Eoff/   \Eon
 719                    /     \
 720                   v       v
 721                 Soff     Son  <--- Normal
 722                          /\
 723                       Ed/  \ Ee
 724                        /    \
 725                       v     v
 726                       D      E
 727
 728             Jump threading paths: A -> J -> Son -> D (path 1)
 729                                   C -> J -> Son -> E (path 2)
 730
 731    Note that the control flow could be more complicated:
 732    - Each jump threading path may have more than one incoming edge.  I.e. A and
 733    Ea could represent multiple incoming blocks/edges that are included in
 734    path 1.
 735    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 736    before or after the "normal" copy block).  These are not duplicated onto
 737    the jump threading path, as they are single-successor.
 738    - Any of the blocks along the path may have other incoming edges that
 739    are not part of any jump threading path, but add profile counts along
 740    the path.
 741
 742    In the above example, after all jump threading is complete, we will
 743    end up with the following control flow:
 744
 745                 A          B           C
 746                 |          |           |
 747               Ea|          |Eb         |Ec
 748                 |          |           |
 749                 v          v           v
 750                Ja          J          Jc
 751                / \        / \Eon'     / \
 752           Eona/   \   ---/---\--------   \Eonc
 753              /     \ /  /     \           \
 754             v       v  v       v          v
 755            Sona     Soff      Son       Sonc
 756              \                 /\         /
 757               \___________    /  \  _____/
 758                           \  /    \/
 759                            vv      v
 760                             D      E
 761
 762    The main issue to notice here is that when we are processing path 1
 763    (A->J->Son->D) we need to figure out the outgoing edge weights to
 764    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 765    sum of the incoming weights to D remain Ed.  The problem with simply
 766    assuming that Ja (and Jc when processing path 2) has the same outgoing
 767    probabilities to its successors as the original block J, is that after
 768    all paths are processed and other edges/counts removed (e.g. none
 769    of Ec will reach D after processing path 2), we may end up with not
 770    enough count flowing along duplicated edge Sona->D.
 771
 772    Therefore, in the case of a joiner, we keep track of all counts
 773    coming in along the current path, as well as from predecessors not
 774    on any jump threading path (Eb in the above example).  While we
 775    first assume that the duplicated Eona for Ja->Sona has the same
 776    probability as the original, we later compensate for other jump
 777    threading paths that may eliminate edges.  We do that by keep track
 778    of all counts coming into the original path that are not in a jump
 779    thread (Eb in the above example, but as noted earlier, there could
 780    be other predecessors incoming to the path at various points, such
 781    as at Son).  Call this cumulative non-path count coming into the path
 782    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 783    least as big as (Ed - Enonpath), but no bigger than the minimum
 784    weight along the jump threading path.  The probabilities of both the
 785    original and duplicated joiner block J and Ja will be adjusted
 786    accordingly after the updates.  */
 787
 788 static bool
 789 compute_path_counts (struct redirection_data *rd,
 790                      ssa_local_info_t *local_info,
 791                      profile_count *path_in_count_ptr,
 792                      profile_count *path_out_count_ptr)
 793 {
 794   edge e = rd->incoming_edges->e;
 795   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 796   edge elast = path->last ()->e;
 797   profile_count nonpath_count = profile_count::zero ();
 798   bool has_joiner = false;
 799   profile_count path_in_count = profile_count::zero ();
 800
 801   /* Start by accumulating incoming edge counts to the path's first bb
 802      into a couple buckets:
 803         path_in_count: total count of incoming edges that flow into the
 804                   current path.
 805         nonpath_count: total count of incoming edges that are not
 806                   flowing along *any* path.  These are the counts
 807                   that will still flow along the original path after
 808                   all path duplication is done by potentially multiple
 809                   calls to this routine.
 810      (any other incoming edge counts are for a different jump threading
 811      path that will be handled by a later call to this routine.)
 812      To make this easier, start by recording all incoming edges that flow into
 813      the current path in a bitmap.  We could add up the path's incoming edge
 814      counts here, but we still need to walk all the first bb's incoming edges
 815      below to add up the counts of the other edges not included in this jump
 816      threading path.  */
 817   struct el *next, *el;
 818   auto_bitmap in_edge_srcs;
 819   for (el = rd->incoming_edges; el; el = next)
 820     {
 821       next = el->next;
 822       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 823     }
 824   edge ein;
 825   edge_iterator ei;
 826   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 827     {
 828       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 829       /* Simply check the incoming edge src against the set captured above.  */
 830       if (ein_path
 831           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 832         {
 833           /* It is necessary but not sufficient that the last path edges
 834              are identical.  There may be different paths that share the
 835              same last path edge in the case where the last edge has a nocopy
 836              source block.  */
 837           gcc_assert (ein_path->last ()->e == elast);
 838           path_in_count += ein->count ();
 839         }
 840       else if (!ein_path)
 841         {
 842           /* Keep track of the incoming edges that are not on any jump-threading
 843              path.  These counts will still flow out of original path after all
 844              jump threading is complete.  */
 845             nonpath_count += ein->count ();
 846         }
 847     }
 848
 849   /* Now compute the fraction of the total count coming into the first
 850      path bb that is from the current threading path.  */
 851   profile_count total_count = e->dest->count;
 852   /* Handle incoming profile insanities.  */
 853   if (total_count < path_in_count)
 854     path_in_count = total_count;
 855   profile_probability onpath_scale = path_in_count.probability_in (total_count);
 856
 857   /* Walk the entire path to do some more computation in order to estimate
 858      how much of the path_in_count will flow out of the duplicated threading
 859      path.  In the non-joiner case this is straightforward (it should be
 860      the same as path_in_count, although we will handle incoming profile
 861      insanities by setting it equal to the minimum count along the path).
 862
 863      In the joiner case, we need to estimate how much of the path_in_count
 864      will stay on the threading path after the joiner's conditional branch.
 865      We don't really know for sure how much of the counts
 866      associated with this path go to each successor of the joiner, but we'll
 867      estimate based on the fraction of the total count coming into the path
 868      bb was from the threading paths (computed above in onpath_scale).
 869      Afterwards, we will need to do some fixup to account for other threading
 870      paths and possible profile insanities.
 871
 872      In order to estimate the joiner case's counts we also need to update
 873      nonpath_count with any additional counts coming into the path.  Other
 874      blocks along the path may have additional predecessors from outside
 875      the path.  */
 876   profile_count path_out_count = path_in_count;
 877   profile_count min_path_count = path_in_count;
 878   for (unsigned int i = 1; i < path->length (); i++)
 879     {
 880       edge epath = (*path)[i]->e;
 881       profile_count cur_count = epath->count ();
 882       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 883         {
 884           has_joiner = true;
 885           cur_count = cur_count.apply_probability (onpath_scale);
 886         }
 887       /* In the joiner case we need to update nonpath_count for any edges
 888          coming into the path that will contribute to the count flowing
 889          into the path successor.  */
 890       if (has_joiner && epath != elast)
 891         {
 892           /* Look for other incoming edges after joiner.  */
 893           FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 894             {
 895               if (ein != epath
 896                   /* Ignore in edges from blocks we have duplicated for a
 897                      threading path, which have duplicated edge counts until
 898                      they are redirected by an invocation of this routine.  */
 899                   && !bitmap_bit_p (local_info->duplicate_blocks,
 900                                     ein->src->index))
 901                 nonpath_count += ein->count ();
 902             }
 903         }
 904       if (cur_count < path_out_count)
 905         path_out_count = cur_count;
 906       if (epath->count () < min_path_count)
 907         min_path_count = epath->count ();
 908     }
 909
 910   /* We computed path_out_count above assuming that this path targeted
 911      the joiner's on-path successor with the same likelihood as it
 912      reached the joiner.  However, other thread paths through the joiner
 913      may take a different path through the normal copy source block
 914      (i.e. they have a different elast), meaning that they do not
 915      contribute any counts to this path's elast.  As a result, it may
 916      turn out that this path must have more count flowing to the on-path
 917      successor of the joiner.  Essentially, all of this path's elast
 918      count must be contributed by this path and any nonpath counts
 919      (since any path through the joiner with a different elast will not
 920      include a copy of this elast in its duplicated path).
 921      So ensure that this path's path_out_count is at least the
 922      difference between elast->count () and nonpath_count.  Otherwise the edge
 923      counts after threading will not be sane.  */
 924   if (local_info->need_profile_correction
 925       && has_joiner && path_out_count < elast->count () - nonpath_count)
 926     {
 927       path_out_count = elast->count () - nonpath_count;
 928       /* But neither can we go above the minimum count along the path
 929          we are duplicating.  This can be an issue due to profile
 930          insanities coming in to this pass.  */
 931       if (path_out_count > min_path_count)
 932         path_out_count = min_path_count;
 933     }
 934
 935   *path_in_count_ptr = path_in_count;
 936   *path_out_count_ptr = path_out_count;
 937   return has_joiner;
 938 }
 939
 940
 941 /* Update the counts and frequencies for both an original path
 942    edge EPATH and its duplicate EDUP.  The duplicate source block
 943    will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
 944    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 945 static void
 946 update_profile (edge epath, edge edup, profile_count path_in_count,
 947                 profile_count path_out_count)
 948 {
 949
 950   /* First update the duplicated block's count.  */
 951   if (edup)
 952     {
 953       basic_block dup_block = edup->src;
 954
 955       /* Edup's count is reduced by path_out_count.  We need to redistribute
 956          probabilities to the remaining edges.  */
 957
 958       edge esucc;
 959       edge_iterator ei;
 960       profile_probability edup_prob
 961          = path_out_count.probability_in (path_in_count);
 962
 963       /* Either scale up or down the remaining edges.
 964          probabilities are always in range <0,1> and thus we can't do
 965          both by same loop.  */
 966       if (edup->probability > edup_prob)
 967         {
 968            profile_probability rev_scale
 969              = (profile_probability::always () - edup->probability)
 970                / (profile_probability::always () - edup_prob);
 971            FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 972              if (esucc != edup)
 973                esucc->probability /= rev_scale;
 974         }
 975       else if (edup->probability < edup_prob)
 976         {
 977            profile_probability scale
 978              = (profile_probability::always () - edup_prob)
 979                / (profile_probability::always () - edup->probability);
 980           FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 981             if (esucc != edup)
 982               esucc->probability *= scale;
 983         }
 984       if (edup_prob.initialized_p ())
 985         edup->probability = edup_prob;
 986
 987       gcc_assert (!dup_block->count.initialized_p ());
 988       dup_block->count = path_in_count;
 989     }
 990
 991   if (path_in_count == profile_count::zero ())
 992     return;
 993
 994   profile_count final_count = epath->count () - path_out_count;
 995
 996   /* Now update the original block's count in the
 997      opposite manner - remove the counts/freq that will flow
 998      into the duplicated block.  Handle underflow due to precision/
 999      rounding issues.  */
1000   epath->src->count -= path_in_count;
1001
1002   /* Next update this path edge's original and duplicated counts.  We know
1003      that the duplicated path will have path_out_count flowing
1004      out of it (in the joiner case this is the count along the duplicated path
1005      out of the duplicated joiner).  This count can then be removed from the
1006      original path edge.  */
1007
1008   edge esucc;
1009   edge_iterator ei;
1010   profile_probability epath_prob = final_count.probability_in (epath->src->count);
1011
1012   if (epath->probability > epath_prob)
1013     {
1014        profile_probability rev_scale
1015          = (profile_probability::always () - epath->probability)
1016            / (profile_probability::always () - epath_prob);
1017        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1018          if (esucc != epath)
1019            esucc->probability /= rev_scale;
1020     }
1021   else if (epath->probability < epath_prob)
1022     {
1023        profile_probability scale
1024          = (profile_probability::always () - epath_prob)
1025            / (profile_probability::always () - epath->probability);
1026       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1027         if (esucc != epath)
1028           esucc->probability *= scale;
1029     }
1030   if (epath_prob.initialized_p ())
1031     epath->probability = epath_prob;
1032 }
1033
1034 /* Wire up the outgoing edges from the duplicate blocks and
1035    update any PHIs as needed.  Also update the profile counts
1036    on the original and duplicate blocks and edges.  */
1037 void
1038 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1039                                ssa_local_info_t *local_info)
1040 {
1041   bool multi_incomings = (rd->incoming_edges->next != NULL);
1042   edge e = rd->incoming_edges->e;
1043   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1044   edge elast = path->last ()->e;
1045   profile_count path_in_count = profile_count::zero ();
1046   profile_count path_out_count = profile_count::zero ();
1047
1048   /* First determine how much profile count to move from original
1049      path to the duplicate path.  This is tricky in the presence of
1050      a joiner (see comments for compute_path_counts), where some portion
1051      of the path's counts will flow off-path from the joiner.  In the
1052      non-joiner case the path_in_count and path_out_count should be the
1053      same.  */
1054   bool has_joiner = compute_path_counts (rd, local_info,
1055                                          &path_in_count, &path_out_count);
1056
1057   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1058     {
1059       edge epath = (*path)[i]->e;
1060
1061       /* If we were threading through an joiner block, then we want
1062          to keep its control statement and redirect an outgoing edge.
1063          Else we want to remove the control statement & edges, then create
1064          a new outgoing edge.  In both cases we may need to update PHIs.  */
1065       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1066         {
1067           edge victim;
1068           edge e2;
1069
1070           gcc_assert (has_joiner);
1071
1072           /* This updates the PHIs at the destination of the duplicate
1073              block.  Pass 0 instead of i if we are threading a path which
1074              has multiple incoming edges.  */
1075           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1076                                    path, multi_incomings ? 0 : i);
1077
1078           /* Find the edge from the duplicate block to the block we're
1079              threading through.  That's the edge we want to redirect.  */
1080           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1081
1082           /* If there are no remaining blocks on the path to duplicate,
1083              then redirect VICTIM to the final destination of the jump
1084              threading path.  */
1085           if (!any_remaining_duplicated_blocks (path, i))
1086             {
1087               e2 = redirect_edge_and_branch (victim, elast->dest);
1088               /* If we redirected the edge, then we need to copy PHI arguments
1089                  at the target.  If the edge already existed (e2 != victim
1090                  case), then the PHIs in the target already have the correct
1091                  arguments.  */
1092               if (e2 == victim)
1093                 copy_phi_args (e2->dest, elast, e2,
1094                                path, multi_incomings ? 0 : i);
1095             }
1096           else
1097             {
1098               /* Redirect VICTIM to the next duplicated block in the path.  */
1099               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1100
1101               /* We need to update the PHIs in the next duplicated block.  We
1102                  want the new PHI args to have the same value as they had
1103                  in the source of the next duplicate block.
1104
1105                  Thus, we need to know which edge we traversed into the
1106                  source of the duplicate.  Furthermore, we may have
1107                  traversed many edges to reach the source of the duplicate.
1108
1109                  Walk through the path starting at element I until we
1110                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1111                  the edge from the prior element.  */
1112               for (unsigned int j = i + 1; j < path->length (); j++)
1113                 {
1114                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1115                     {
1116                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1117                       break;
1118                     }
1119                 }
1120             }
1121
1122           /* Update the counts of both the original block
1123              and path edge, and the duplicates.  The path duplicate's
1124              incoming count are the totals for all edges
1125              incoming to this jump threading path computed earlier.
1126              And we know that the duplicated path will have path_out_count
1127              flowing out of it (i.e. along the duplicated path out of the
1128              duplicated joiner).  */
1129           update_profile (epath, e2, path_in_count, path_out_count);
1130         }
1131       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1132         {
1133           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1134           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1135                                                    multi_incomings ? 0 : i);
1136           if (count == 1)
1137             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1138
1139           /* Update the counts of both the original block
1140              and path edge, and the duplicates.  Since we are now after
1141              any joiner that may have existed on the path, the count
1142              flowing along the duplicated threaded path is path_out_count.
1143              If we didn't have a joiner, then cur_path_freq was the sum
1144              of the total frequencies along all incoming edges to the
1145              thread path (path_in_freq).  If we had a joiner, it would have
1146              been updated at the end of that handling to the edge frequency
1147              along the duplicated joiner path edge.  */
1148           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1149                           path_out_count, path_out_count);
1150         }
1151       else
1152         {
1153           /* No copy case.  In this case we don't have an equivalent block
1154              on the duplicated thread path to update, but we do need
1155              to remove the portion of the counts/freqs that were moved
1156              to the duplicated path from the counts/freqs flowing through
1157              this block on the original path.  Since all the no-copy edges
1158              are after any joiner, the removed count is the same as
1159              path_out_count.
1160
1161              If we didn't have a joiner, then cur_path_freq was the sum
1162              of the total frequencies along all incoming edges to the
1163              thread path (path_in_freq).  If we had a joiner, it would have
1164              been updated at the end of that handling to the edge frequency
1165              along the duplicated joiner path edge.  */
1166            update_profile (epath, NULL, path_out_count, path_out_count);
1167         }
1168
1169       /* Increment the index into the duplicated path when we processed
1170          a duplicated block.  */
1171       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1172           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1173         {
1174           count++;
1175         }
1176     }
1177 }
1178
1179 /* Hash table traversal callback routine to create duplicate blocks.  */
1180
1181 int
1182 ssa_create_duplicates (struct redirection_data **slot,
1183                        ssa_local_info_t *local_info)
1184 {
1185   struct redirection_data *rd = *slot;
1186
1187   /* The second duplicated block in a jump threading path is specific
1188      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1189
1190      Each time we're called, we have to look through the path and see
1191      if a second block needs to be duplicated.
1192
1193      Note the search starts with the third edge on the path.  The first
1194      edge is the incoming edge, the second edge always has its source
1195      duplicated.  Thus we start our search with the third edge.  */
1196   vec<jump_thread_edge *> *path = rd->path;
1197   for (unsigned int i = 2; i < path->length (); i++)
1198     {
1199       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1200           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1201         {
1202           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1203                                       &local_info->duplicate_blocks);
1204           break;
1205         }
1206     }
1207
1208   /* Create a template block if we have not done so already.  Otherwise
1209      use the template to create a new block.  */
1210   if (local_info->template_block == NULL)
1211     {
1212       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1213                                   &local_info->duplicate_blocks);
1214       local_info->template_block = rd->dup_blocks[0];
1215       local_info->template_last_to_copy
1216         = gsi_last_bb (local_info->template_block);
1217
1218       /* We do not create any outgoing edges for the template.  We will
1219          take care of that in a later traversal.  That way we do not
1220          create edges that are going to just be deleted.  */
1221     }
1222   else
1223     {
1224       gimple_seq seq = NULL;
1225       if (gsi_stmt (local_info->template_last_to_copy)
1226           != gsi_stmt (gsi_last_bb (local_info->template_block)))
1227         {
1228           if (gsi_end_p (local_info->template_last_to_copy))
1229             {
1230               seq = bb_seq (local_info->template_block);
1231               set_bb_seq (local_info->template_block, NULL);
1232             }
1233           else
1234             seq = gsi_split_seq_after (local_info->template_last_to_copy);
1235         }
1236       create_block_for_threading (local_info->template_block, rd, 0,
1237                                   &local_info->duplicate_blocks);
1238       if (seq)
1239         {
1240           if (gsi_end_p (local_info->template_last_to_copy))
1241             set_bb_seq (local_info->template_block, seq);
1242           else
1243             gsi_insert_seq_after (&local_info->template_last_to_copy,
1244                                   seq, GSI_SAME_STMT);
1245         }
1246
1247       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1248          block.   */
1249       ssa_fix_duplicate_block_edges (rd, local_info);
1250     }
1251
1252   if (MAY_HAVE_DEBUG_STMTS)
1253     {
1254       /* Copy debug stmts from each NO_COPY src block to the block
1255          that would have been its predecessor, if we can append to it
1256          (we can't add stmts after a block-ending stmt), or prepending
1257          to the duplicate of the successor, if there is one.  If
1258          there's no duplicate successor, we'll mostly drop the blocks
1259          on the floor; propagate_threaded_block_debug_into, called
1260          elsewhere, will consolidate and preserve the effects of the
1261          binds, but none of the markers.  */
1262       gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
1263       if (!gsi_end_p (copy_to))
1264         {
1265           if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1266             {
1267               if (rd->dup_blocks[1])
1268                 copy_to = gsi_after_labels (rd->dup_blocks[1]);
1269               else
1270                 copy_to = gsi_none ();
1271             }
1272           else
1273             gsi_next (&copy_to);
1274         }
1275       for (unsigned int i = 2, j = 0; i < path->length (); i++)
1276         if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
1277             && gsi_bb (copy_to))
1278           {
1279             for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
1280                  !gsi_end_p (gsi); gsi_next (&gsi))
1281               {
1282                 if (!is_gimple_debug (gsi_stmt (gsi)))
1283                   continue;
1284                 gimple *stmt = gsi_stmt (gsi);
1285                 gimple *copy = gimple_copy (stmt);
1286                 gsi_insert_before (&copy_to, copy, GSI_SAME_STMT);
1287               }
1288           }
1289         else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1290                  || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1291           {
1292             j++;
1293             gcc_assert (j < 2);
1294             copy_to = gsi_last_bb (rd->dup_blocks[j]);
1295             if (!gsi_end_p (copy_to))
1296               {
1297                 if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1298                   copy_to = gsi_none ();
1299                 else
1300                   gsi_next (&copy_to);
1301               }
1302           }
1303     }
1304
1305   /* Keep walking the hash table.  */
1306   return 1;
1307 }
1308
1309 /* We did not create any outgoing edges for the template block during
1310    block creation.  This hash table traversal callback creates the
1311    outgoing edge for the template block.  */
1312
1313 inline int
1314 ssa_fixup_template_block (struct redirection_data **slot,
1315                           ssa_local_info_t *local_info)
1316 {
1317   struct redirection_data *rd = *slot;
1318
1319   /* If this is the template block halt the traversal after updating
1320      it appropriately.
1321
1322      If we were threading through an joiner block, then we want
1323      to keep its control statement and redirect an outgoing edge.
1324      Else we want to remove the control statement & edges, then create
1325      a new outgoing edge.  In both cases we may need to update PHIs.  */
1326   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1327     {
1328       ssa_fix_duplicate_block_edges (rd, local_info);
1329       return 0;
1330     }
1331
1332   return 1;
1333 }
1334
1335 /* Hash table traversal callback to redirect each incoming edge
1336    associated with this hash table element to its new destination.  */
1337
1338 static int
1339 ssa_redirect_edges (struct redirection_data **slot,
1340                     ssa_local_info_t *local_info)
1341 {
1342   struct redirection_data *rd = *slot;
1343   struct el *next, *el;
1344
1345   /* Walk over all the incoming edges associated with this hash table
1346      entry.  */
1347   for (el = rd->incoming_edges; el; el = next)
1348     {
1349       edge e = el->e;
1350       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1351
1352       /* Go ahead and free this element from the list.  Doing this now
1353          avoids the need for another list walk when we destroy the hash
1354          table.  */
1355       next = el->next;
1356       free (el);
1357
1358       local_info->num_threaded_edges++;
1359
1360       if (rd->dup_blocks[0])
1361         {
1362           edge e2;
1363
1364           if (dump_file && (dump_flags & TDF_DETAILS))
1365             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1366                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1367
1368           /* Redirect the incoming edge (possibly to the joiner block) to the
1369              appropriate duplicate block.  */
1370           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1371           gcc_assert (e == e2);
1372           flush_pending_stmts (e2);
1373         }
1374
1375       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1376          to clear it will cause all kinds of unpleasant problems later.  */
1377       path->release ();
1378       e->aux = NULL;
1379
1380     }
1381
1382   /* Indicate that we actually threaded one or more jumps.  */
1383   if (rd->incoming_edges)
1384     local_info->jumps_threaded = true;
1385
1386   return 1;
1387 }
1388
1389 /* Return true if this block has no executable statements other than
1390    a simple ctrl flow instruction.  When the number of outgoing edges
1391    is one, this is equivalent to a "forwarder" block.  */
1392
1393 static bool
1394 redirection_block_p (basic_block bb)
1395 {
1396   gimple_stmt_iterator gsi;
1397
1398   /* Advance to the first executable statement.  */
1399   gsi = gsi_start_bb (bb);
1400   while (!gsi_end_p (gsi)
1401          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1402              || is_gimple_debug (gsi_stmt (gsi))
1403              || gimple_nop_p (gsi_stmt (gsi))
1404              || gimple_clobber_p (gsi_stmt (gsi))))
1405     gsi_next (&gsi);
1406
1407   /* Check if this is an empty block.  */
1408   if (gsi_end_p (gsi))
1409     return true;
1410
1411   /* Test that we've reached the terminating control statement.  */
1412   return gsi_stmt (gsi)
1413          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1414              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1415              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1416 }
1417
1418 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1419    is reached via one or more specific incoming edges, we know which
1420    outgoing edge from BB will be traversed.
1421
1422    We want to redirect those incoming edges to the target of the
1423    appropriate outgoing edge.  Doing so avoids a conditional branch
1424    and may expose new optimization opportunities.  Note that we have
1425    to update dominator tree and SSA graph after such changes.
1426
1427    The key to keeping the SSA graph update manageable is to duplicate
1428    the side effects occurring in BB so that those side effects still
1429    occur on the paths which bypass BB after redirecting edges.
1430
1431    We accomplish this by creating duplicates of BB and arranging for
1432    the duplicates to unconditionally pass control to one specific
1433    successor of BB.  We then revector the incoming edges into BB to
1434    the appropriate duplicate of BB.
1435
1436    If NOLOOP_ONLY is true, we only perform the threading as long as it
1437    does not affect the structure of the loops in a nontrivial way.
1438
1439    If JOINERS is true, then thread through joiner blocks as well.  */
1440
1441 bool
1442 fwd_jt_path_registry::thread_block_1 (basic_block bb,
1443                                       bool noloop_only,
1444                                       bool joiners)
1445 {
1446   /* E is an incoming edge into BB that we may or may not want to
1447      redirect to a duplicate of BB.  */
1448   edge e, e2;
1449   edge_iterator ei;
1450   ssa_local_info_t local_info;
1451
1452   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1453   local_info.need_profile_correction = false;
1454   local_info.num_threaded_edges = 0;
1455
1456   /* To avoid scanning a linear array for the element we need we instead
1457      use a hash table.  For normal code there should be no noticeable
1458      difference.  However, if we have a block with a large number of
1459      incoming and outgoing edges such linear searches can get expensive.  */
1460   m_redirection_data
1461     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1462
1463   /* Record each unique threaded destination into a hash table for
1464      efficient lookups.  */
1465   edge last = NULL;
1466   FOR_EACH_EDGE (e, ei, bb->preds)
1467     {
1468       if (e->aux == NULL)
1469         continue;
1470
1471       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1472
1473       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1474           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1475         continue;
1476
1477       e2 = path->last ()->e;
1478       if (!e2 || noloop_only)
1479         {
1480           /* If NOLOOP_ONLY is true, we only allow threading through the
1481              header of a loop to exit edges.  */
1482
1483           /* One case occurs when there was loop header buried in a jump
1484              threading path that crosses loop boundaries.  We do not try
1485              and thread this elsewhere, so just cancel the jump threading
1486              request by clearing the AUX field now.  */
1487           if (bb->loop_father != e2->src->loop_father
1488               && (!loop_exit_edge_p (e2->src->loop_father, e2)
1489                   || flow_loop_nested_p (bb->loop_father,
1490                                          e2->dest->loop_father)))
1491             {
1492               /* Since this case is not handled by our special code
1493                  to thread through a loop header, we must explicitly
1494                  cancel the threading request here.  */
1495               cancel_thread (path, "Threading through unhandled loop header");
1496               e->aux = NULL;
1497               continue;
1498             }
1499
1500           /* Another case occurs when trying to thread through our
1501              own loop header, possibly from inside the loop.  We will
1502              thread these later.  */
1503           unsigned int i;
1504           for (i = 1; i < path->length (); i++)
1505             {
1506               if ((*path)[i]->e->src == bb->loop_father->header
1507                   && (!loop_exit_edge_p (bb->loop_father, e2)
1508                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1509                 break;
1510             }
1511
1512           if (i != path->length ())
1513             continue;
1514
1515           /* Loop parallelization can be confused by the result of
1516              threading through the loop exit test back into the loop.
1517              However, theading those jumps seems to help other codes.
1518
1519              I have been unable to find anything related to the shape of
1520              the CFG, the contents of the affected blocks, etc which would
1521              allow a more sensible test than what we're using below which
1522              merely avoids the optimization when parallelizing loops.  */
1523           if (flag_tree_parallelize_loops > 1)
1524             {
1525               for (i = 1; i < path->length (); i++)
1526                 if (bb->loop_father == e2->src->loop_father
1527                     && loop_exits_from_bb_p (bb->loop_father,
1528                                              (*path)[i]->e->src)
1529                     && !loop_exit_edge_p (bb->loop_father, e2))
1530                   break;
1531
1532               if (i != path->length ())
1533                 {
1534                   cancel_thread (path, "Threading through loop exit");
1535                   e->aux = NULL;
1536                   continue;
1537                 }
1538             }
1539         }
1540
1541       /* Insert the outgoing edge into the hash table if it is not
1542          already in the hash table.  */
1543       lookup_redirection_data (e, INSERT);
1544
1545       /* When we have thread paths through a common joiner with different
1546          final destinations, then we may need corrections to deal with
1547          profile insanities.  See the big comment before compute_path_counts.  */
1548       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1549         {
1550           if (!last)
1551             last = e2;
1552           else if (e2 != last)
1553             local_info.need_profile_correction = true;
1554         }
1555     }
1556
1557   /* We do not update dominance info.  */
1558   free_dominance_info (CDI_DOMINATORS);
1559
1560   /* We know we only thread through the loop header to loop exits.
1561      Let the basic block duplication hook know we are not creating
1562      a multiple entry loop.  */
1563   if (noloop_only
1564       && bb == bb->loop_father->header)
1565     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1566
1567   /* Now create duplicates of BB.
1568
1569      Note that for a block with a high outgoing degree we can waste
1570      a lot of time and memory creating and destroying useless edges.
1571
1572      So we first duplicate BB and remove the control structure at the
1573      tail of the duplicate as well as all outgoing edges from the
1574      duplicate.  We then use that duplicate block as a template for
1575      the rest of the duplicates.  */
1576   local_info.template_block = NULL;
1577   local_info.bb = bb;
1578   local_info.jumps_threaded = false;
1579   m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1580                             (&local_info);
1581
1582   /* The template does not have an outgoing edge.  Create that outgoing
1583      edge and update PHI nodes as the edge's target as necessary.
1584
1585      We do this after creating all the duplicates to avoid creating
1586      unnecessary edges.  */
1587   m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1588                             (&local_info);
1589
1590   /* The hash table traversals above created the duplicate blocks (and the
1591      statements within the duplicate blocks).  This loop creates PHI nodes for
1592      the duplicated blocks and redirects the incoming edges into BB to reach
1593      the duplicates of BB.  */
1594   m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1595                             (&local_info);
1596
1597   /* Done with this block.  Clear REDIRECTION_DATA.  */
1598   delete m_redirection_data;
1599   m_redirection_data = NULL;
1600
1601   if (noloop_only
1602       && bb == bb->loop_father->header)
1603     set_loop_copy (bb->loop_father, NULL);
1604
1605   BITMAP_FREE (local_info.duplicate_blocks);
1606   local_info.duplicate_blocks = NULL;
1607
1608   m_num_threaded_edges += local_info.num_threaded_edges;
1609
1610   /* Indicate to our caller whether or not any jumps were threaded.  */
1611   return local_info.jumps_threaded;
1612 }
1613
1614 /* Wrapper for thread_block_1 so that we can first handle jump
1615    thread paths which do not involve copying joiner blocks, then
1616    handle jump thread paths which have joiner blocks.
1617
1618    By doing things this way we can be as aggressive as possible and
1619    not worry that copying a joiner block will create a jump threading
1620    opportunity.  */
1621
1622 bool
1623 fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
1624 {
1625   bool retval;
1626   retval = thread_block_1 (bb, noloop_only, false);
1627   retval |= thread_block_1 (bb, noloop_only, true);
1628   return retval;
1629 }
1630
1631 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1632    from STOP and DBDS_CE_STOP.  */
1633
1634 static basic_block dbds_ce_stop;
1635 static bool
1636 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1637 {
1638   return (bb != (const_basic_block) stop
1639           && bb != dbds_ce_stop);
1640 }
1641
1642 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1643    returns the state.  */
1644
1645 enum bb_dom_status
1646 determine_bb_domination_status (class loop *loop, basic_block bb)
1647 {
1648   basic_block *bblocks;
1649   unsigned nblocks, i;
1650   bool bb_reachable = false;
1651   edge_iterator ei;
1652   edge e;
1653
1654   /* This function assumes BB is a successor of LOOP->header.
1655      If that is not the case return DOMST_NONDOMINATING which
1656      is always safe.  */
1657     {
1658       bool ok = false;
1659
1660       FOR_EACH_EDGE (e, ei, bb->preds)
1661         {
1662           if (e->src == loop->header)
1663             {
1664               ok = true;
1665               break;
1666             }
1667         }
1668
1669       if (!ok)
1670         return DOMST_NONDOMINATING;
1671     }
1672
1673   if (bb == loop->latch)
1674     return DOMST_DOMINATING;
1675
1676   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1677      from it.  */
1678
1679   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1680   dbds_ce_stop = loop->header;
1681   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1682                                 bblocks, loop->num_nodes, bb);
1683   for (i = 0; i < nblocks; i++)
1684     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1685       {
1686         if (e->src == loop->header)
1687           {
1688             free (bblocks);
1689             return DOMST_NONDOMINATING;
1690           }
1691         if (e->src == bb)
1692           bb_reachable = true;
1693       }
1694
1695   free (bblocks);
1696   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1697 }
1698
1699 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1700    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1701    to the inside of the loop.  */
1702
1703 bool
1704 fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
1705                                                   bool may_peel_loop_headers)
1706 {
1707   basic_block header = loop->header;
1708   edge e, tgt_edge, latch = loop_latch_edge (loop);
1709   edge_iterator ei;
1710   basic_block tgt_bb, atgt_bb;
1711   enum bb_dom_status domst;
1712
1713   /* We have already threaded through headers to exits, so all the threading
1714      requests now are to the inside of the loop.  We need to avoid creating
1715      irreducible regions (i.e., loops with more than one entry block), and
1716      also loop with several latch edges, or new subloops of the loop (although
1717      there are cases where it might be appropriate, it is difficult to decide,
1718      and doing it wrongly may confuse other optimizers).
1719
1720      We could handle more general cases here.  However, the intention is to
1721      preserve some information about the loop, which is impossible if its
1722      structure changes significantly, in a way that is not well understood.
1723      Thus we only handle few important special cases, in which also updating
1724      of the loop-carried information should be feasible:
1725
1726      1) Propagation of latch edge to a block that dominates the latch block
1727         of a loop.  This aims to handle the following idiom:
1728
1729         first = 1;
1730         while (1)
1731           {
1732             if (first)
1733               initialize;
1734             first = 0;
1735             body;
1736           }
1737
1738         After threading the latch edge, this becomes
1739
1740         first = 1;
1741         if (first)
1742           initialize;
1743         while (1)
1744           {
1745             first = 0;
1746             body;
1747           }
1748
1749         The original header of the loop is moved out of it, and we may thread
1750         the remaining edges through it without further constraints.
1751
1752      2) All entry edges are propagated to a single basic block that dominates
1753         the latch block of the loop.  This aims to handle the following idiom
1754         (normally created for "for" loops):
1755
1756         i = 0;
1757         while (1)
1758           {
1759             if (i >= 100)
1760               break;
1761             body;
1762             i++;
1763           }
1764
1765         This becomes
1766
1767         i = 0;
1768         while (1)
1769           {
1770             body;
1771             i++;
1772             if (i >= 100)
1773               break;
1774           }
1775      */
1776
1777   /* Threading through the header won't improve the code if the header has just
1778      one successor.  */
1779   if (single_succ_p (header))
1780     goto fail;
1781
1782   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1783     goto fail;
1784   else
1785     {
1786       tgt_bb = NULL;
1787       tgt_edge = NULL;
1788       FOR_EACH_EDGE (e, ei, header->preds)
1789         {
1790           if (!e->aux)
1791             {
1792               if (e == latch)
1793                 continue;
1794
1795               /* If latch is not threaded, and there is a header
1796                  edge that is not threaded, we would create loop
1797                  with multiple entries.  */
1798               goto fail;
1799             }
1800
1801           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1802
1803           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1804             goto fail;
1805           tgt_edge = (*path)[1]->e;
1806           atgt_bb = tgt_edge->dest;
1807           if (!tgt_bb)
1808             tgt_bb = atgt_bb;
1809           /* Two targets of threading would make us create loop
1810              with multiple entries.  */
1811           else if (tgt_bb != atgt_bb)
1812             goto fail;
1813         }
1814
1815       if (!tgt_bb)
1816         {
1817           /* There are no threading requests.  */
1818           return false;
1819         }
1820
1821       /* Redirecting to empty loop latch is useless.  */
1822       if (tgt_bb == loop->latch
1823           && empty_block_p (loop->latch))
1824         goto fail;
1825     }
1826
1827   /* The target block must dominate the loop latch, otherwise we would be
1828      creating a subloop.  */
1829   domst = determine_bb_domination_status (loop, tgt_bb);
1830   if (domst == DOMST_NONDOMINATING)
1831     goto fail;
1832   if (domst == DOMST_LOOP_BROKEN)
1833     {
1834       /* If the loop ceased to exist, mark it as such, and thread through its
1835          original header.  */
1836       mark_loop_for_removal (loop);
1837       return thread_block (header, false);
1838     }
1839
1840   if (tgt_bb->loop_father->header == tgt_bb)
1841     {
1842       /* If the target of the threading is a header of a subloop, we need
1843          to create a preheader for it, so that the headers of the two loops
1844          do not merge.  */
1845       if (EDGE_COUNT (tgt_bb->preds) > 2)
1846         {
1847           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1848           gcc_assert (tgt_bb != NULL);
1849         }
1850       else
1851         tgt_bb = split_edge (tgt_edge);
1852     }
1853
1854   basic_block new_preheader;
1855
1856   /* Now consider the case entry edges are redirected to the new entry
1857      block.  Remember one entry edge, so that we can find the new
1858      preheader (its destination after threading).  */
1859   FOR_EACH_EDGE (e, ei, header->preds)
1860     {
1861       if (e->aux)
1862         break;
1863     }
1864
1865   /* The duplicate of the header is the new preheader of the loop.  Ensure
1866      that it is placed correctly in the loop hierarchy.  */
1867   set_loop_copy (loop, loop_outer (loop));
1868
1869   thread_block (header, false);
1870   set_loop_copy (loop, NULL);
1871   new_preheader = e->dest;
1872
1873   /* Create the new latch block.  This is always necessary, as the latch
1874      must have only a single successor, but the original header had at
1875      least two successors.  */
1876   loop->latch = NULL;
1877   mfb_kj_edge = single_succ_edge (new_preheader);
1878   loop->header = mfb_kj_edge->dest;
1879   latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1880   loop->header = latch->dest;
1881   loop->latch = latch->src;
1882   return true;
1883
1884 fail:
1885   /* We failed to thread anything.  Cancel the requests.  */
1886   FOR_EACH_EDGE (e, ei, header->preds)
1887     {
1888       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1889
1890       if (path)
1891         {
1892           cancel_thread (path, "Failure in thread_through_loop_header");
1893           e->aux = NULL;
1894         }
1895     }
1896   return false;
1897 }
1898
1899 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1900    PHI arguments associated with those edges are equal or there are no
1901    PHI arguments, otherwise return FALSE.  */
1902
1903 static bool
1904 phi_args_equal_on_edges (edge e1, edge e2)
1905 {
1906   gphi_iterator gsi;
1907   int indx1 = e1->dest_idx;
1908   int indx2 = e2->dest_idx;
1909
1910   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1911     {
1912       gphi *phi = gsi.phi ();
1913
1914       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1915                             gimple_phi_arg_def (phi, indx2), 0))
1916         return false;
1917     }
1918   return true;
1919 }
1920
1921 /* Return the number of non-debug statements and non-virtual PHIs in a
1922    block.  */
1923
1924 static unsigned int
1925 count_stmts_and_phis_in_block (basic_block bb)
1926 {
1927   unsigned int num_stmts = 0;
1928
1929   gphi_iterator gpi;
1930   for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
1931     if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
1932       num_stmts++;
1933
1934   gimple_stmt_iterator gsi;
1935   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1936     {
1937       gimple *stmt = gsi_stmt (gsi);
1938       if (!is_gimple_debug (stmt))
1939         num_stmts++;
1940     }
1941
1942   return num_stmts;
1943 }
1944
1945
1946 /* Walk through the registered jump threads and convert them into a
1947    form convenient for this pass.
1948
1949    Any block which has incoming edges threaded to outgoing edges
1950    will have its entry in THREADED_BLOCK set.
1951
1952    Any threaded edge will have its new outgoing edge stored in the
1953    original edge's AUX field.
1954
1955    This form avoids the need to walk all the edges in the CFG to
1956    discover blocks which need processing and avoids unnecessary
1957    hash table lookups to map from threaded edge to new target.  */
1958
1959 void
1960 fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
1961 {
1962   unsigned int i;
1963   bitmap_iterator bi;
1964   auto_bitmap tmp;
1965   basic_block bb;
1966   edge e;
1967   edge_iterator ei;
1968
1969   /* It is possible to have jump threads in which one is a subpath
1970      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
1971      block and (B, C), (C, D) where no joiner block exists.
1972
1973      When this occurs ignore the jump thread request with the joiner
1974      block.  It's totally subsumed by the simpler jump thread request.
1975
1976      This results in less block copying, simpler CFGs.  More importantly,
1977      when we duplicate the joiner block, B, in this case we will create
1978      a new threading opportunity that we wouldn't be able to optimize
1979      until the next jump threading iteration.
1980
1981      So first convert the jump thread requests which do not require a
1982      joiner block.  */
1983   for (i = 0; i < m_paths.length (); i++)
1984     {
1985       vec<jump_thread_edge *> *path = m_paths[i];
1986
1987       if (path->length () > 1
1988           && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1989         {
1990           edge e = (*path)[0]->e;
1991           e->aux = (void *)path;
1992           bitmap_set_bit (tmp, e->dest->index);
1993         }
1994     }
1995
1996   /* Now iterate again, converting cases where we want to thread
1997      through a joiner block, but only if no other edge on the path
1998      already has a jump thread attached to it.  We do this in two passes,
1999      to avoid situations where the order in the paths vec can hide overlapping
2000      threads (the path is recorded on the incoming edge, so we would miss
2001      cases where the second path starts at a downstream edge on the same
2002      path).  First record all joiner paths, deleting any in the unexpected
2003      case where there is already a path for that incoming edge.  */
2004   for (i = 0; i < m_paths.length ();)
2005     {
2006       vec<jump_thread_edge *> *path = m_paths[i];
2007
2008       if (path->length () > 1
2009           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2010         {
2011           /* Attach the path to the starting edge if none is yet recorded.  */
2012           if ((*path)[0]->e->aux == NULL)
2013             {
2014               (*path)[0]->e->aux = path;
2015               i++;
2016             }
2017           else
2018             {
2019               m_paths.unordered_remove (i);
2020               cancel_thread (path);
2021             }
2022         }
2023       else
2024         {
2025           i++;
2026         }
2027     }
2028
2029   /* Second, look for paths that have any other jump thread attached to
2030      them, and either finish converting them or cancel them.  */
2031   for (i = 0; i < m_paths.length ();)
2032     {
2033       vec<jump_thread_edge *> *path = m_paths[i];
2034       edge e = (*path)[0]->e;
2035
2036       if (path->length () > 1
2037           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2038         {
2039           unsigned int j;
2040           for (j = 1; j < path->length (); j++)
2041             if ((*path)[j]->e->aux != NULL)
2042               break;
2043
2044           /* If we iterated through the entire path without exiting the loop,
2045              then we are good to go, record it.  */
2046           if (j == path->length ())
2047             {
2048               bitmap_set_bit (tmp, e->dest->index);
2049               i++;
2050             }
2051           else
2052             {
2053               e->aux = NULL;
2054               m_paths.unordered_remove (i);
2055               cancel_thread (path);
2056             }
2057         }
2058       else
2059         {
2060           i++;
2061         }
2062     }
2063
2064   /* When optimizing for size, prune all thread paths where statement
2065      duplication is necessary.
2066
2067      We walk the jump thread path looking for copied blocks.  There's
2068      two types of copied blocks.
2069
2070        EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2071        cancel the jump threading request when optimizing for size.
2072
2073        EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2074        will be killed by threading.  If threading does not kill all of
2075        its statements, then we should cancel the jump threading request
2076        when optimizing for size.  */
2077   if (optimize_function_for_size_p (cfun))
2078     {
2079       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2080         {
2081           FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
2082             if (e->aux)
2083               {
2084                 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2085
2086                 unsigned int j;
2087                 for (j = 1; j < path->length (); j++)
2088                   {
2089                     bb = (*path)[j]->e->src;
2090                     if (redirection_block_p (bb))
2091                       ;
2092                     else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
2093                              || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2094                                  && (count_stmts_and_phis_in_block (bb)
2095                                      != estimate_threading_killed_stmts (bb))))
2096                       break;
2097                   }
2098
2099                 if (j != path->length ())
2100                   {
2101                     cancel_thread (path);
2102                     e->aux = NULL;
2103                   }
2104                 else
2105                   bitmap_set_bit (threaded_blocks, i);
2106               }
2107         }
2108     }
2109   else
2110     bitmap_copy (threaded_blocks, tmp);
2111
2112   /* If we have a joiner block (J) which has two successors S1 and S2 and
2113      we are threading though S1 and the final destination of the thread
2114      is S2, then we must verify that any PHI nodes in S2 have the same
2115      PHI arguments for the edge J->S2 and J->S1->...->S2.
2116
2117      We used to detect this prior to registering the jump thread, but
2118      that prohibits propagation of edge equivalences into non-dominated
2119      PHI nodes as the equivalency test might occur before propagation.
2120
2121      This must also occur after we truncate any jump threading paths
2122      as this scenario may only show up after truncation.
2123
2124      This works for now, but will need improvement as part of the FSA
2125      optimization.
2126
2127      Note since we've moved the thread request data to the edges,
2128      we have to iterate on those rather than the threaded_edges vector.  */
2129   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2130     {
2131       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2132       FOR_EACH_EDGE (e, ei, bb->preds)
2133         {
2134           if (e->aux)
2135             {
2136               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2137               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2138
2139               if (have_joiner)
2140                 {
2141                   basic_block joiner = e->dest;
2142                   edge final_edge = path->last ()->e;
2143                   basic_block final_dest = final_edge->dest;
2144                   edge e2 = find_edge (joiner, final_dest);
2145
2146                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2147                     {
2148                       cancel_thread (path);
2149                       e->aux = NULL;
2150                     }
2151                 }
2152             }
2153         }
2154     }
2155
2156   /* Look for jump threading paths which cross multiple loop headers.
2157
2158      The code to thread through loop headers will change the CFG in ways
2159      that invalidate the cached loop iteration information.  So we must
2160      detect that case and wipe the cached information.  */
2161   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2162     {
2163       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2164       FOR_EACH_EDGE (e, ei, bb->preds)
2165         {
2166           if (e->aux)
2167             {
2168               gcc_assert (loops_state_satisfies_p
2169                             (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
2170               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2171
2172               for (unsigned int i = 0, crossed_headers = 0;
2173                    i < path->length ();
2174                    i++)
2175                 {
2176                   basic_block dest = (*path)[i]->e->dest;
2177                   basic_block src = (*path)[i]->e->src;
2178                   /* If we enter a loop.  */
2179                   if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2180                     ++crossed_headers;
2181                   /* If we step from a block outside an irreducible region
2182                      to a block inside an irreducible region, then we have
2183                      crossed into a loop.  */
2184                   else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2185                            && (dest->flags & BB_IRREDUCIBLE_LOOP))
2186                       ++crossed_headers;
2187                   if (crossed_headers > 1)
2188                     {
2189                       vect_free_loop_info_assumptions
2190                         ((*path)[path->length () - 1]->e->dest->loop_father);
2191                       break;
2192                     }
2193                 }
2194             }
2195         }
2196     }
2197 }
2198
2199
2200 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2201    case of SEME Single Entry Multiple Exits region in which all nodes in the
2202    REGION have exactly one incoming edge.  The only exception is the first block
2203    that may not have been connected to the rest of the cfg yet.  */
2204
2205 DEBUG_FUNCTION void
2206 verify_jump_thread (basic_block *region, unsigned n_region)
2207 {
2208   for (unsigned i = 0; i < n_region; i++)
2209     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2210 }
2211
2212 /* Return true when BB is one of the first N items in BBS.  */
2213
2214 static inline bool
2215 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2216 {
2217   for (int i = 0; i < n; i++)
2218     if (bb == bbs[i])
2219       return true;
2220
2221   return false;
2222 }
2223
2224 void
2225 jt_path_registry::debug_path (FILE *dump_file, int pathno)
2226 {
2227   vec<jump_thread_edge *> *p = m_paths[pathno];
2228   fprintf (dump_file, "path: ");
2229   for (unsigned i = 0; i < p->length (); ++i)
2230     fprintf (dump_file, "%d -> %d, ",
2231              (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
2232   fprintf (dump_file, "\n");
2233 }
2234
2235 void
2236 jt_path_registry::debug ()
2237 {
2238   for (unsigned i = 0; i < m_paths.length (); ++i)
2239     debug_path (stderr, i);
2240 }
2241
2242 /* Rewire a jump_thread_edge so that the source block is now a
2243    threaded source block.
2244
2245    PATH_NUM is an index into the global path table PATHS.
2246    EDGE_NUM is the jump thread edge number into said path.
2247
2248    Returns TRUE if we were able to successfully rewire the edge.  */
2249
2250 bool
2251 back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
2252                                                     unsigned edge_num)
2253 {
2254   vec<jump_thread_edge *> *path = m_paths[path_num];
2255   edge &e = (*path)[edge_num]->e;
2256   if (dump_file && (dump_flags & TDF_DETAILS))
2257     fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
2258              e->src->index, e->dest->index);
2259   basic_block src_copy = get_bb_copy (e->src);
2260   if (src_copy == NULL)
2261     {
2262       if (dump_file && (dump_flags & TDF_DETAILS))
2263         fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
2264       return false;
2265     }
2266   edge new_edge = find_edge (src_copy, e->dest);
2267   /* If the previously threaded paths created a flow graph where we
2268      can no longer figure out where to go, give up.  */
2269   if (new_edge == NULL)
2270     {
2271       if (dump_file && (dump_flags & TDF_DETAILS))
2272         fprintf (dump_file, "ignoring candidate: we lost our way\n");
2273       return false;
2274     }
2275   e = new_edge;
2276   return true;
2277 }
2278
2279 /* After a path has been jump threaded, adjust the remaining paths
2280    that are subsets of this path, so these paths can be safely
2281    threaded within the context of the new threaded path.
2282
2283    For example, suppose we have just threaded:
2284
2285    5 -> 6 -> 7 -> 8 -> 12       =>      5 -> 6' -> 7' -> 8' -> 12'
2286
2287    And we have an upcoming threading candidate:
2288    5 -> 6 -> 7 -> 8 -> 15 -> 20
2289
2290    This function adjusts the upcoming path into:
2291    8' -> 15 -> 20
2292
2293    CURR_PATH_NUM is an index into the global paths table.  It
2294    specifies the path that was just threaded.  */
2295
2296 void
2297 back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
2298 {
2299   vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
2300
2301   /* Iterate through all the other paths and adjust them.  */
2302   for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
2303     {
2304       if (cand_path_num == curr_path_num)
2305         {
2306           ++cand_path_num;
2307           continue;
2308         }
2309       /* Make sure the candidate to adjust starts with the same path
2310          as the recently threaded path.  */
2311       vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
2312       if ((*cand_path)[0]->e != (*curr_path)[0]->e)
2313         {
2314           ++cand_path_num;
2315           continue;
2316         }
2317       if (dump_file && (dump_flags & TDF_DETAILS))
2318         {
2319           fprintf (dump_file, "adjusting candidate: ");
2320           debug_path (dump_file, cand_path_num);
2321         }
2322
2323       /* Chop off from the candidate path any prefix it shares with
2324          the recently threaded path.  */
2325       unsigned minlength = MIN (curr_path->length (), cand_path->length ());
2326       unsigned j;
2327       for (j = 0; j < minlength; ++j)
2328         {
2329           edge cand_edge = (*cand_path)[j]->e;
2330           edge curr_edge = (*curr_path)[j]->e;
2331
2332           /* Once the prefix no longer matches, adjust the first
2333              non-matching edge to point from an adjusted edge to
2334              wherever it was going.  */
2335           if (cand_edge != curr_edge)
2336             {
2337               gcc_assert (cand_edge->src == curr_edge->src);
2338               if (!rewire_first_differing_edge (cand_path_num, j))
2339                 goto remove_candidate_from_list;
2340               break;
2341             }
2342         }
2343       if (j == minlength)
2344         {
2345           /* If we consumed the max subgraph we could look at, and
2346              still didn't find any different edges, it's the
2347              last edge after MINLENGTH.  */
2348           if (cand_path->length () > minlength)
2349             {
2350               if (!rewire_first_differing_edge (cand_path_num, j))
2351                 goto remove_candidate_from_list;
2352             }
2353           else if (dump_file && (dump_flags & TDF_DETAILS))
2354             fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
2355         }
2356       if (j > 0)
2357         {
2358           /* If we are removing everything, delete the entire candidate.  */
2359           if (j == cand_path->length ())
2360             {
2361             remove_candidate_from_list:
2362               cancel_thread (cand_path, "Adjusted candidate is EMPTY");
2363               m_paths.unordered_remove (cand_path_num);
2364               continue;
2365             }
2366           /* Otherwise, just remove the redundant sub-path.  */
2367           if (cand_path->length () - j > 1)
2368             cand_path->block_remove (0, j);
2369           else if (dump_file && (dump_flags & TDF_DETAILS))
2370             fprintf (dump_file, "Dropping illformed candidate.\n");
2371         }
2372       if (dump_file && (dump_flags & TDF_DETAILS))
2373         {
2374           fprintf (dump_file, "adjusted candidate: ");
2375           debug_path (dump_file, cand_path_num);
2376         }
2377       ++cand_path_num;
2378     }
2379 }
2380
2381 /* Duplicates a jump-thread path of N_REGION basic blocks.
2382    The ENTRY edge is redirected to the duplicate of the region.
2383
2384    Remove the last conditional statement in the last basic block in the REGION,
2385    and create a single fallthru edge pointing to the same destination as the
2386    EXIT edge.
2387
2388    CURRENT_PATH_NO is an index into the global paths[] table
2389    specifying the jump-thread path.
2390
2391    Returns false if it is unable to copy the region, true otherwise.  */
2392
2393 bool
2394 back_jt_path_registry::duplicate_thread_path (edge entry,
2395                                               edge exit,
2396                                               basic_block *region,
2397                                               unsigned n_region,
2398                                               unsigned current_path_no)
2399 {
2400   unsigned i;
2401   class loop *loop = entry->dest->loop_father;
2402   edge exit_copy;
2403   edge redirected;
2404   profile_count curr_count;
2405
2406   if (!can_copy_bbs_p (region, n_region))
2407     return false;
2408
2409   /* Some sanity checking.  Note that we do not check for all possible
2410      missuses of the functions.  I.e. if you ask to copy something weird,
2411      it will work, but the state of structures probably will not be
2412      correct.  */
2413   for (i = 0; i < n_region; i++)
2414     {
2415       /* We do not handle subloops, i.e. all the blocks must belong to the
2416          same loop.  */
2417       if (region[i]->loop_father != loop)
2418         return false;
2419     }
2420
2421   initialize_original_copy_tables ();
2422
2423   set_loop_copy (loop, loop);
2424
2425   basic_block *region_copy = XNEWVEC (basic_block, n_region);
2426   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2427             split_edge_bb_loc (entry), false);
2428
2429   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2430      following code ensures that all the edges exiting the jump-thread path are
2431      redirected back to the original code: these edges are exceptions
2432      invalidating the property that is propagated by executing all the blocks of
2433      the jump-thread path in order.  */
2434
2435   curr_count = entry->count ();
2436
2437   for (i = 0; i < n_region; i++)
2438     {
2439       edge e;
2440       edge_iterator ei;
2441       basic_block bb = region_copy[i];
2442
2443       /* Watch inconsistent profile.  */
2444       if (curr_count > region[i]->count)
2445         curr_count = region[i]->count;
2446       /* Scale current BB.  */
2447       if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2448         {
2449           /* In the middle of the path we only scale the frequencies.
2450              In last BB we need to update probabilities of outgoing edges
2451              because we know which one is taken at the threaded path.  */
2452           if (i + 1 != n_region)
2453             scale_bbs_frequencies_profile_count (region + i, 1,
2454                                                  region[i]->count - curr_count,
2455                                                  region[i]->count);
2456           else
2457             update_bb_profile_for_threading (region[i],
2458                                              curr_count,
2459                                              exit);
2460           scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2461                                                region_copy[i]->count);
2462         }
2463
2464       if (single_succ_p (bb))
2465         {
2466           /* Make sure the successor is the next node in the path.  */
2467           gcc_assert (i + 1 == n_region
2468                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2469           if (i + 1 != n_region)
2470             {
2471               curr_count = single_succ_edge (bb)->count ();
2472             }
2473           continue;
2474         }
2475
2476       /* Special case the last block on the path: make sure that it does not
2477          jump back on the copied path, including back to itself.  */
2478       if (i + 1 == n_region)
2479         {
2480           FOR_EACH_EDGE (e, ei, bb->succs)
2481             if (bb_in_bbs (e->dest, region_copy, n_region))
2482               {
2483                 basic_block orig = get_bb_original (e->dest);
2484                 if (orig)
2485                   redirect_edge_and_branch_force (e, orig);
2486               }
2487           continue;
2488         }
2489
2490       /* Redirect all other edges jumping to non-adjacent blocks back to the
2491          original code.  */
2492       FOR_EACH_EDGE (e, ei, bb->succs)
2493         if (region_copy[i + 1] != e->dest)
2494           {
2495             basic_block orig = get_bb_original (e->dest);
2496             if (orig)
2497               redirect_edge_and_branch_force (e, orig);
2498           }
2499         else
2500           {
2501             curr_count = e->count ();
2502           }
2503     }
2504
2505
2506   if (flag_checking)
2507     verify_jump_thread (region_copy, n_region);
2508
2509   /* Remove the last branch in the jump thread path.  */
2510   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2511
2512   /* And fixup the flags on the single remaining edge.  */
2513   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2514   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2515   fix_e->flags |= EDGE_FALLTHRU;
2516
2517   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2518
2519   if (e)
2520     {
2521       rescan_loop_exit (e, true, false);
2522       e->probability = profile_probability::always ();
2523     }
2524
2525   /* Redirect the entry and add the phi node arguments.  */
2526   if (entry->dest == loop->header)
2527     mark_loop_for_removal (loop);
2528   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2529   gcc_assert (redirected != NULL);
2530   flush_pending_stmts (entry);
2531
2532   /* Add the other PHI node arguments.  */
2533   add_phi_args_after_copy (region_copy, n_region, NULL);
2534
2535   free (region_copy);
2536
2537   adjust_paths_after_duplication (current_path_no);
2538
2539   free_original_copy_tables ();
2540   return true;
2541 }
2542
2543 /* Return true when PATH is a valid jump-thread path.  */
2544
2545 static bool
2546 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2547 {
2548   unsigned len = path->length ();
2549
2550   /* Check that the path is connected.  */
2551   for (unsigned int j = 0; j < len - 1; j++)
2552     {
2553       edge e = (*path)[j]->e;
2554       if (e->dest != (*path)[j+1]->e->src)
2555         return false;
2556     }
2557   return true;
2558 }
2559
2560 /* Remove any queued jump threads that include edge E.
2561
2562    We don't actually remove them here, just record the edges into ax
2563    hash table.  That way we can do the search once per iteration of
2564    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
2565
2566 void
2567 fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
2568 {
2569   if (!m_paths.exists () || !flag_thread_jumps)
2570     return;
2571
2572   edge *slot = m_removed_edges->find_slot (e, INSERT);
2573   *slot = e;
2574 }
2575
2576 /* Thread all paths that have been queued for jump threading, and
2577    update the CFG accordingly.
2578
2579    It is the caller's responsibility to fix the dominance information
2580    and rewrite duplicated SSA_NAMEs back into SSA form.
2581
2582    If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2583    headers if it does not simplify the loop.
2584
2585    Returns true if one or more edges were threaded.  */
2586
2587 bool
2588 jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
2589 {
2590   if (m_paths.length () == 0)
2591     return false;
2592
2593   m_num_threaded_edges = 0;
2594
2595   bool retval = update_cfg (peel_loop_headers);
2596
2597   statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
2598
2599   if (retval)
2600     {
2601       loops_state_set (LOOPS_NEED_FIXUP);
2602       return true;
2603     }
2604   return false;
2605 }
2606
2607 /* This is the backward threader version of thread_through_all_blocks
2608    using a generic BB copier.  */
2609
2610 bool
2611 back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2612 {
2613   bool retval = false;
2614   hash_set<edge> visited_starting_edges;
2615
2616   while (m_paths.length ())
2617     {
2618       vec<jump_thread_edge *> *path = m_paths[0];
2619       edge entry = (*path)[0]->e;
2620
2621       /* Do not jump-thread twice from the same starting edge.
2622
2623          Previously we only checked that we weren't threading twice
2624          from the same BB, but that was too restrictive.  Imagine a
2625          path that starts from GIMPLE_COND(x_123 == 0,...), where both
2626          edges out of this conditional yield paths that can be
2627          threaded (for example, both lead to an x_123==0 or x_123!=0
2628          conditional further down the line.  */
2629       if (visited_starting_edges.contains (entry)
2630           /* We may not want to realize this jump thread path for
2631              various reasons.  So check it first.  */
2632           || !valid_jump_thread_path (path))
2633         {
2634           /* Remove invalid jump-thread paths.  */
2635           cancel_thread (path, "Avoiding threading twice from same edge");
2636           m_paths.unordered_remove (0);
2637           continue;
2638         }
2639
2640       unsigned len = path->length ();
2641       edge exit = (*path)[len - 1]->e;
2642       basic_block *region = XNEWVEC (basic_block, len - 1);
2643
2644       for (unsigned int j = 0; j < len - 1; j++)
2645         region[j] = (*path)[j]->e->dest;
2646
2647       if (duplicate_thread_path (entry, exit, region, len - 1, 0))
2648         {
2649           /* We do not update dominance info.  */
2650           free_dominance_info (CDI_DOMINATORS);
2651           visited_starting_edges.add (entry);
2652           retval = true;
2653           m_num_threaded_edges++;
2654         }
2655
2656       path->release ();
2657       m_paths.unordered_remove (0);
2658       free (region);
2659     }
2660   return retval;
2661 }
2662
2663 /* This is the forward threader version of thread_through_all_blocks,
2664    using a custom BB copier.  */
2665
2666 bool
2667 fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
2668 {
2669   bool retval = false;
2670
2671   /* Remove any paths that referenced removed edges.  */
2672   if (m_removed_edges)
2673     for (unsigned i = 0; i < m_paths.length (); )
2674       {
2675         unsigned int j;
2676         vec<jump_thread_edge *> *path = m_paths[i];
2677
2678         for (j = 0; j < path->length (); j++)
2679           {
2680             edge e = (*path)[j]->e;
2681             if (m_removed_edges->find_slot (e, NO_INSERT)
2682                 || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2683                      || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2684                     && !can_duplicate_block_p (e->src)))
2685               break;
2686           }
2687
2688         if (j != path->length ())
2689           {
2690             cancel_thread (path, "Thread references removed edge");
2691             m_paths.unordered_remove (i);
2692             continue;
2693           }
2694         i++;
2695       }
2696
2697   auto_bitmap threaded_blocks;
2698   mark_threaded_blocks (threaded_blocks);
2699
2700   initialize_original_copy_tables ();
2701
2702   /* The order in which we process jump threads can be important.
2703
2704      Consider if we have two jump threading paths A and B.  If the
2705      target edge of A is the starting edge of B and we thread path A
2706      first, then we create an additional incoming edge into B->dest that
2707      we cannot discover as a jump threading path on this iteration.
2708
2709      If we instead thread B first, then the edge into B->dest will have
2710      already been redirected before we process path A and path A will
2711      natually, with no further work, target the redirected path for B.
2712
2713      An post-order is sufficient here.  Compute the ordering first, then
2714      process the blocks.  */
2715   if (!bitmap_empty_p (threaded_blocks))
2716     {
2717       int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2718       unsigned int postorder_num = post_order_compute (postorder, false, false);
2719       for (unsigned int i = 0; i < postorder_num; i++)
2720         {
2721           unsigned int indx = postorder[i];
2722           if (bitmap_bit_p (threaded_blocks, indx))
2723             {
2724               basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
2725               retval |= thread_block (bb, true);
2726             }
2727         }
2728       free (postorder);
2729     }
2730
2731   /* Then perform the threading through loop headers.  We start with the
2732      innermost loop, so that the changes in cfg we perform won't affect
2733      further threading.  */
2734   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2735     {
2736       if (!loop->header
2737           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2738         continue;
2739
2740       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2741     }
2742
2743   /* All jump threading paths should have been resolved at this
2744      point.  Verify that is the case.  */
2745   basic_block bb;
2746   FOR_EACH_BB_FN (bb, cfun)
2747     {
2748       edge_iterator ei;
2749       edge e;
2750       FOR_EACH_EDGE (e, ei, bb->preds)
2751         gcc_assert (e->aux == NULL);
2752     }
2753
2754   free_original_copy_tables ();
2755
2756   return retval;
2757 }
2758
2759 bool
2760 jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
2761 {
2762   gcc_checking_assert (!path.is_empty ());
2763   edge entry = path[0]->e;
2764   edge exit = path[path.length () - 1]->e;
2765   bool seen_latch = false;
2766   int loops_crossed = 0;
2767   bool crossed_latch = false;
2768   bool crossed_loop_header = false;
2769   // Use ->dest here instead of ->src to ignore the first block.  The
2770   // first block is allowed to be in a different loop, since it'll be
2771   // redirected.  See similar comment in profitable_path_p: "we don't
2772   // care about that block...".
2773   loop_p loop = entry->dest->loop_father;
2774   loop_p curr_loop = loop;
2775
2776   for (unsigned int i = 0; i < path.length (); i++)
2777     {
2778       edge e = path[i]->e;
2779
2780       if (e == NULL)
2781         {
2782           // NULL outgoing edges on a path can happen for jumping to a
2783           // constant address.
2784           cancel_thread (&path, "Found NULL edge in jump threading path");
2785           return true;
2786         }
2787
2788       if (loop->latch == e->src || loop->latch == e->dest)
2789         {
2790           seen_latch = true;
2791           // Like seen_latch, but excludes the first block.
2792           if (e->src != entry->src)
2793             crossed_latch = true;
2794         }
2795
2796       if (e->dest->loop_father != curr_loop)
2797         {
2798           curr_loop = e->dest->loop_father;
2799           ++loops_crossed;
2800         }
2801
2802       // ?? Avoid threading through loop headers that remain in the
2803       // loop, as such threadings tend to create sub-loops which
2804       // _might_ be OK ??.
2805       if (e->dest->loop_father->header == e->dest
2806           && !flow_loop_nested_p (exit->dest->loop_father,
2807                                   e->dest->loop_father))
2808         crossed_loop_header = true;
2809
2810       if (flag_checking && !m_backedge_threads)
2811         gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
2812     }
2813
2814   // If we crossed a loop into an outer loop without crossing the
2815   // latch, this is just an early exit from the loop.
2816   if (loops_crossed == 1
2817       && !crossed_latch
2818       && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
2819     return false;
2820
2821   if (cfun->curr_properties & PROP_loop_opts_done)
2822     return false;
2823
2824   if (seen_latch && empty_block_p (loop->latch))
2825     {
2826       cancel_thread (&path, "Threading through latch before loop opts "
2827                      "would create non-empty latch");
2828       return true;
2829     }
2830   if (loops_crossed)
2831     {
2832       cancel_thread (&path, "Path crosses loops");
2833       return true;
2834     }
2835   // The path should either start and end in the same loop or exit the
2836   // loop it starts in but never enter a loop.  This also catches
2837   // creating irreducible loops, not only rotation.
2838   if (entry->src->loop_father != exit->dest->loop_father
2839       && !flow_loop_nested_p (exit->src->loop_father,
2840                               entry->dest->loop_father))
2841     {
2842       cancel_thread (&path, "Path rotates loop");
2843       return true;
2844     }
2845   if (crossed_loop_header)
2846     {
2847       cancel_thread (&path, "Path crosses loop header but does not exit it");
2848       return true;
2849     }
2850   return false;
2851 }
2852
2853 /* Register a jump threading opportunity.  We queue up all the jump
2854    threading opportunities discovered by a pass and update the CFG
2855    and SSA form all at once.
2856
2857    E is the edge we can thread, E2 is the new target edge, i.e., we
2858    are effectively recording that E->dest can be changed to E2->dest
2859    after fixing the SSA graph.
2860
2861    Return TRUE if PATH was successfully threaded.  */
2862
2863 bool
2864 jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
2865 {
2866   gcc_checking_assert (flag_thread_jumps);
2867
2868   if (!dbg_cnt (registered_jump_thread))
2869     {
2870       path->release ();
2871       return false;
2872     }
2873
2874   if (cancel_invalid_paths (*path))
2875     return false;
2876
2877   if (dump_file && (dump_flags & TDF_DETAILS))
2878     dump_jump_thread_path (dump_file, *path, true);
2879
2880   m_paths.safe_push (path);
2881   return true;
2882 }
2883
2884 /* Return how many uses of T there are within BB, as long as there
2885    aren't any uses outside BB.  If there are any uses outside BB,
2886    return -1 if there's at most one use within BB, or -2 if there is
2887    more than one use within BB.  */
2888
2889 static int
2890 uses_in_bb (tree t, basic_block bb)
2891 {
2892   int uses = 0;
2893   bool outside_bb = false;
2894
2895   imm_use_iterator iter;
2896   use_operand_p use_p;
2897   FOR_EACH_IMM_USE_FAST (use_p, iter, t)
2898     {
2899       if (is_gimple_debug (USE_STMT (use_p)))
2900         continue;
2901
2902       if (gimple_bb (USE_STMT (use_p)) != bb)
2903         outside_bb = true;
2904       else
2905         uses++;
2906
2907       if (outside_bb && uses > 1)
2908         return -2;
2909     }
2910
2911   if (outside_bb)
2912     return -1;
2913
2914   return uses;
2915 }
2916
2917 /* Starting from the final control flow stmt in BB, assuming it will
2918    be removed, follow uses in to-be-removed stmts back to their defs
2919    and count how many defs are to become dead and be removed as
2920    well.  */
2921
2922 unsigned int
2923 estimate_threading_killed_stmts (basic_block bb)
2924 {
2925   int killed_stmts = 0;
2926   hash_map<tree, int> ssa_remaining_uses;
2927   auto_vec<gimple *, 4> dead_worklist;
2928
2929   /* If the block has only two predecessors, threading will turn phi
2930      dsts into either src, so count them as dead stmts.  */
2931   bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
2932
2933   if (drop_all_phis)
2934     for (gphi_iterator gsi = gsi_start_phis (bb);
2935          !gsi_end_p (gsi); gsi_next (&gsi))
2936       {
2937         gphi *phi = gsi.phi ();
2938         tree dst = gimple_phi_result (phi);
2939
2940         /* We don't count virtual PHIs as stmts in
2941            record_temporary_equivalences_from_phis.  */
2942         if (virtual_operand_p (dst))
2943           continue;
2944
2945         killed_stmts++;
2946       }
2947
2948   if (gsi_end_p (gsi_last_bb (bb)))
2949     return killed_stmts;
2950
2951   gimple *stmt = gsi_stmt (gsi_last_bb (bb));
2952   if (gimple_code (stmt) != GIMPLE_COND
2953       && gimple_code (stmt) != GIMPLE_GOTO
2954       && gimple_code (stmt) != GIMPLE_SWITCH)
2955     return killed_stmts;
2956
2957   /* The control statement is always dead.  */
2958   killed_stmts++;
2959   dead_worklist.quick_push (stmt);
2960   while (!dead_worklist.is_empty ())
2961     {
2962       stmt = dead_worklist.pop ();
2963
2964       ssa_op_iter iter;
2965       use_operand_p use_p;
2966       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2967         {
2968           tree t = USE_FROM_PTR (use_p);
2969           gimple *def = SSA_NAME_DEF_STMT (t);
2970
2971           if (gimple_bb (def) == bb
2972               && (gimple_code (def) != GIMPLE_PHI
2973                   || !drop_all_phis)
2974               && !gimple_has_side_effects (def))
2975             {
2976               int *usesp = ssa_remaining_uses.get (t);
2977               int uses;
2978
2979               if (usesp)
2980                 uses = *usesp;
2981               else
2982                 uses = uses_in_bb (t, bb);
2983
2984               gcc_assert (uses);
2985
2986               /* Don't bother recording the expected use count if we
2987                  won't find any further uses within BB.  */
2988               if (!usesp && (uses < -1 || uses > 1))
2989                 {
2990                   usesp = &ssa_remaining_uses.get_or_insert (t);
2991                   *usesp = uses;
2992                 }
2993
2994               if (uses < 0)
2995                 continue;
2996
2997               --uses;
2998               if (usesp)
2999                 *usesp = uses;
3000
3001               if (!uses)
3002                 {
3003                   killed_stmts++;
3004                   if (usesp)
3005                     ssa_remaining_uses.remove (t);
3006                   if (gimple_code (def) != GIMPLE_PHI)
3007                     dead_worklist.safe_push (def);
3008                 }
3009             }
3010         }
3011     }
3012
3013   if (dump_file)
3014     fprintf (dump_file, "threading bb %i kills %i stmts\n",
3015              bb->index, killed_stmts);
3016
3017   return killed_stmts;
3018 }