gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "alias.h"
  24 #include "backend.h"
  25 #include "cfghooks.h"
  26 #include "tree.h"
  27 #include "gimple.h"
  28 #include "hard-reg-set.h"
  29 #include "ssa.h"
  30 #include "options.h"
  31 #include "fold-const.h"
  32 #include "flags.h"
  33 #include "cfganal.h"
  34 #include "internal-fn.h"
  35 #include "gimple-iterator.h"
  36 #include "tree-ssa.h"
  37 #include "tree-ssa-threadupdate.h"
  38 #include "dumpfile.h"
  39 #include "cfgloop.h"
  40 #include "dbgcnt.h"
  41 #include "tree-cfg.h"
  42 #include "tree-pass.h"
  43
  44 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  45    one or more in-edges to B to instead reach the destination of an
  46    out-edge from B while preserving any side effects in B.
  47
  48    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  49    side effects of executing B.
  50
  51      1. Make a copy of B (including its outgoing edges and statements).  Call
  52         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  53
  54      2. Remove the control statement at the end of B' and all outgoing edges
  55         except B'->C.
  56
  57      3. Add a new argument to each PHI in C with the same value as the existing
  58         argument associated with edge B->C.  Associate the new PHI arguments
  59         with the edge B'->C.
  60
  61      4. For each PHI in B, find or create a PHI in B' with an identical
  62         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  63         value as the PHI in B associated with the edge A->B.  Associate
  64         the new argument in the PHI in B' with the edge A->B.
  65
  66      5. Change the edge A->B to A->B'.
  67
  68         5a. This automatically deletes any PHI arguments associated with the
  69             edge A->B in B.
  70
  71         5b. This automatically associates each new argument added in step 4
  72             with the edge A->B'.
  73
  74      6. Repeat for other incoming edges into B.
  75
  76      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  77
  78    Note that block duplication can be minimized by first collecting the
  79    set of unique destination blocks that the incoming edges should
  80    be threaded to.
  81
  82    We reduce the number of edges and statements we create by not copying all
  83    the outgoing edges and the control statement in step #1.  We instead create
  84    a template block without the outgoing edges and duplicate the template.
  85
  86    Another case this code handles is threading through a "joiner" block.  In
  87    this case, we do not know the destination of the joiner block, but one
  88    of the outgoing edges from the joiner block leads to a threadable path.  This
  89    case largely works as outlined above, except the duplicate of the joiner
  90    block still contains a full set of outgoing edges and its control statement.
  91    We just redirect one of its outgoing edges to our jump threading path.  */
  92
  93
  94 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  95    all the incoming edges which thread to the same destination edge at
  96    the same time.  That avoids lots of table lookups to get information
  97    for the destination edge.
  98
  99    To realize that implementation we create a list of incoming edges
 100    which thread to the same outgoing edge.  Thus to implement steps
 101    #5 and #6 we traverse our hash table of outgoing edge information.
 102    For each entry we walk the list of incoming edges which thread to
 103    the current outgoing edge.  */
 104
 105 struct el
 106 {
 107   edge e;
 108   struct el *next;
 109 };
 110
 111 /* Main data structure recording information regarding B's duplicate
 112    blocks.  */
 113
 114 /* We need to efficiently record the unique thread destinations of this
 115    block and specific information associated with those destinations.  We
 116    may have many incoming edges threaded to the same outgoing edge.  This
 117    can be naturally implemented with a hash table.  */
 118
 119 struct redirection_data : free_ptr_hash<redirection_data>
 120 {
 121   /* We support wiring up two block duplicates in a jump threading path.
 122
 123      One is a normal block copy where we remove the control statement
 124      and wire up its single remaining outgoing edge to the thread path.
 125
 126      The other is a joiner block where we leave the control statement
 127      in place, but wire one of the outgoing edges to a thread path.
 128
 129      In theory we could have multiple block duplicates in a jump
 130      threading path, but I haven't tried that.
 131
 132      The duplicate blocks appear in this array in the same order in
 133      which they appear in the jump thread path.  */
 134   basic_block dup_blocks[2];
 135
 136   /* The jump threading path.  */
 137   vec<jump_thread_edge *> *path;
 138
 139   /* A list of incoming edges which we want to thread to the
 140      same path.  */
 141   struct el *incoming_edges;
 142
 143   /* hash_table support.  */
 144   static inline hashval_t hash (const redirection_data *);
 145   static inline int equal (const redirection_data *, const redirection_data *);
 146 };
 147
 148 /* Dump a jump threading path, including annotations about each
 149    edge in the path.  */
 150
 151 static void
 152 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 153                        bool registering)
 154 {
 155   fprintf (dump_file,
 156            "  %s%s jump thread: (%d, %d) incoming edge; ",
 157            (registering ? "Registering" : "Cancelling"),
 158            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 159            path[0]->e->src->index, path[0]->e->dest->index);
 160
 161   for (unsigned int i = 1; i < path.length (); i++)
 162     {
 163       /* We can get paths with a NULL edge when the final destination
 164          of a jump thread turns out to be a constant address.  We dump
 165          those paths when debugging, so we have to be prepared for that
 166          possibility here.  */
 167       if (path[i]->e == NULL)
 168         continue;
 169
 170       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 171         fprintf (dump_file, " (%d, %d) joiner; ",
 172                  path[i]->e->src->index, path[i]->e->dest->index);
 173       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 174        fprintf (dump_file, " (%d, %d) normal;",
 175                  path[i]->e->src->index, path[i]->e->dest->index);
 176       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 177        fprintf (dump_file, " (%d, %d) nocopy;",
 178                  path[i]->e->src->index, path[i]->e->dest->index);
 179       if (path[0]->type == EDGE_FSM_THREAD)
 180         fprintf (dump_file, " (%d, %d) ",
 181                  path[i]->e->src->index, path[i]->e->dest->index);
 182     }
 183   fputc ('\n', dump_file);
 184 }
 185
 186 /* Simple hashing function.  For any given incoming edge E, we're going
 187    to be most concerned with the final destination of its jump thread
 188    path.  So hash on the block index of the final edge in the path.  */
 189
 190 inline hashval_t
 191 redirection_data::hash (const redirection_data *p)
 192 {
 193   vec<jump_thread_edge *> *path = p->path;
 194   return path->last ()->e->dest->index;
 195 }
 196
 197 /* Given two hash table entries, return true if they have the same
 198    jump threading path.  */
 199 inline int
 200 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 201 {
 202   vec<jump_thread_edge *> *path1 = p1->path;
 203   vec<jump_thread_edge *> *path2 = p2->path;
 204
 205   if (path1->length () != path2->length ())
 206     return false;
 207
 208   for (unsigned int i = 1; i < path1->length (); i++)
 209     {
 210       if ((*path1)[i]->type != (*path2)[i]->type
 211           || (*path1)[i]->e != (*path2)[i]->e)
 212         return false;
 213     }
 214
 215   return true;
 216 }
 217
 218 /* Data structure of information to pass to hash table traversal routines.  */
 219 struct ssa_local_info_t
 220 {
 221   /* The current block we are working on.  */
 222   basic_block bb;
 223
 224   /* We only create a template block for the first duplicated block in a
 225      jump threading path as we may need many duplicates of that block.
 226
 227      The second duplicate block in a path is specific to that path.  Creating
 228      and sharing a template for that block is considerably more difficult.  */
 229   basic_block template_block;
 230
 231   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 232   bool jumps_threaded;
 233
 234   /* Blocks duplicated for the thread.  */
 235   bitmap duplicate_blocks;
 236 };
 237
 238 /* Passes which use the jump threading code register jump threading
 239    opportunities as they are discovered.  We keep the registered
 240    jump threading opportunities in this vector as edge pairs
 241    (original_edge, target_edge).  */
 242 static vec<vec<jump_thread_edge *> *> paths;
 243
 244 /* When we start updating the CFG for threading, data necessary for jump
 245    threading is attached to the AUX field for the incoming edge.  Use these
 246    macros to access the underlying structure attached to the AUX field.  */
 247 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 248
 249 /* Jump threading statistics.  */
 250
 251 struct thread_stats_d
 252 {
 253   unsigned long num_threaded_edges;
 254 };
 255
 256 struct thread_stats_d thread_stats;
 257
 258
 259 /* Remove the last statement in block BB if it is a control statement
 260    Also remove all outgoing edges except the edge which reaches DEST_BB.
 261    If DEST_BB is NULL, then remove all outgoing edges.  */
 262
 263 static void
 264 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 265 {
 266   gimple_stmt_iterator gsi;
 267   edge e;
 268   edge_iterator ei;
 269
 270   gsi = gsi_last_bb (bb);
 271
 272   /* If the duplicate ends with a control statement, then remove it.
 273
 274      Note that if we are duplicating the template block rather than the
 275      original basic block, then the duplicate might not have any real
 276      statements in it.  */
 277   if (!gsi_end_p (gsi)
 278       && gsi_stmt (gsi)
 279       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 280           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 281           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 282     gsi_remove (&gsi, true);
 283
 284   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 285     {
 286       if (e->dest != dest_bb)
 287         remove_edge (e);
 288       else
 289         ei_next (&ei);
 290     }
 291 }
 292
 293 /* Create a duplicate of BB.  Record the duplicate block in an array
 294    indexed by COUNT stored in RD.  */
 295
 296 static void
 297 create_block_for_threading (basic_block bb,
 298                             struct redirection_data *rd,
 299                             unsigned int count,
 300                             bitmap *duplicate_blocks)
 301 {
 302   edge_iterator ei;
 303   edge e;
 304
 305   /* We can use the generic block duplication code and simply remove
 306      the stuff we do not need.  */
 307   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 308
 309   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 310     e->aux = NULL;
 311
 312   /* Zero out the profile, since the block is unreachable for now.  */
 313   rd->dup_blocks[count]->frequency = 0;
 314   rd->dup_blocks[count]->count = 0;
 315   if (duplicate_blocks)
 316     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 317 }
 318
 319 /* Main data structure to hold information for duplicates of BB.  */
 320
 321 static hash_table<redirection_data> *redirection_data;
 322
 323 /* Given an outgoing edge E lookup and return its entry in our hash table.
 324
 325    If INSERT is true, then we insert the entry into the hash table if
 326    it is not already present.  INCOMING_EDGE is added to the list of incoming
 327    edges associated with E in the hash table.  */
 328
 329 static struct redirection_data *
 330 lookup_redirection_data (edge e, enum insert_option insert)
 331 {
 332   struct redirection_data **slot;
 333   struct redirection_data *elt;
 334   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 335
 336  /* Build a hash table element so we can see if E is already
 337      in the table.  */
 338   elt = XNEW (struct redirection_data);
 339   elt->path = path;
 340   elt->dup_blocks[0] = NULL;
 341   elt->dup_blocks[1] = NULL;
 342   elt->incoming_edges = NULL;
 343
 344   slot = redirection_data->find_slot (elt, insert);
 345
 346   /* This will only happen if INSERT is false and the entry is not
 347      in the hash table.  */
 348   if (slot == NULL)
 349     {
 350       free (elt);
 351       return NULL;
 352     }
 353
 354   /* This will only happen if E was not in the hash table and
 355      INSERT is true.  */
 356   if (*slot == NULL)
 357     {
 358       *slot = elt;
 359       elt->incoming_edges = XNEW (struct el);
 360       elt->incoming_edges->e = e;
 361       elt->incoming_edges->next = NULL;
 362       return elt;
 363     }
 364   /* E was in the hash table.  */
 365   else
 366     {
 367       /* Free ELT as we do not need it anymore, we will extract the
 368          relevant entry from the hash table itself.  */
 369       free (elt);
 370
 371       /* Get the entry stored in the hash table.  */
 372       elt = *slot;
 373
 374       /* If insertion was requested, then we need to add INCOMING_EDGE
 375          to the list of incoming edges associated with E.  */
 376       if (insert)
 377         {
 378           struct el *el = XNEW (struct el);
 379           el->next = elt->incoming_edges;
 380           el->e = e;
 381           elt->incoming_edges = el;
 382         }
 383
 384       return elt;
 385     }
 386 }
 387
 388 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 389    does not have a value associated with it.  */
 390
 391 static void
 392 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 393 {
 394   int src_idx = src_e->dest_idx;
 395   int tgt_idx = tgt_e->dest_idx;
 396
 397   /* Iterate over each PHI in e->dest.  */
 398   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 399                            gsi2 = gsi_start_phis (tgt_e->dest);
 400        !gsi_end_p (gsi);
 401        gsi_next (&gsi), gsi_next (&gsi2))
 402     {
 403       gphi *src_phi = gsi.phi ();
 404       gphi *dest_phi = gsi2.phi ();
 405       tree val = gimple_phi_arg_def (src_phi, src_idx);
 406       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 407
 408       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 409       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 410     }
 411 }
 412
 413 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 414    to see if it has constant value in a flow sensitive manner.  Set
 415    LOCUS to location of the constant phi arg and return the value.
 416    Return DEF directly if either PATH or idx is ZERO.  */
 417
 418 static tree
 419 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 420                          basic_block bb, int idx, source_location *locus)
 421 {
 422   tree arg;
 423   gphi *def_phi;
 424   basic_block def_bb;
 425
 426   if (path == NULL || idx == 0)
 427     return def;
 428
 429   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 430   if (!def_phi)
 431     return def;
 432
 433   def_bb = gimple_bb (def_phi);
 434   /* Don't propagate loop invariants into deeper loops.  */
 435   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 436     return def;
 437
 438   /* Backtrack jump threading path from IDX to see if def has constant
 439      value.  */
 440   for (int j = idx - 1; j >= 0; j--)
 441     {
 442       edge e = (*path)[j]->e;
 443       if (e->dest == def_bb)
 444         {
 445           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 446           if (is_gimple_min_invariant (arg))
 447             {
 448               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 449               return arg;
 450             }
 451           break;
 452         }
 453     }
 454
 455   return def;
 456 }
 457
 458 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 459    Try to backtrack jump threading PATH from node IDX to see if the arg
 460    has constant value, copy constant value instead of argument itself
 461    if yes.  */
 462
 463 static void
 464 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 465                vec<jump_thread_edge *> *path, int idx)
 466 {
 467   gphi_iterator gsi;
 468   int src_indx = src_e->dest_idx;
 469
 470   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 471     {
 472       gphi *phi = gsi.phi ();
 473       tree def = gimple_phi_arg_def (phi, src_indx);
 474       source_location locus = gimple_phi_arg_location (phi, src_indx);
 475
 476       if (TREE_CODE (def) == SSA_NAME
 477           && !virtual_operand_p (gimple_phi_result (phi)))
 478         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 479
 480       add_phi_arg (phi, def, tgt_e, locus);
 481     }
 482 }
 483
 484 /* We have recently made a copy of ORIG_BB, including its outgoing
 485    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 486    ORIG_BB has a new argument associated with edge from NEW_BB to the
 487    successor.  Initialize the PHI argument so that it is equal to the PHI
 488    argument associated with the edge from ORIG_BB to the successor.
 489    PATH and IDX are used to check if the new PHI argument has constant
 490    value in a flow sensitive manner.  */
 491
 492 static void
 493 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 494                          vec<jump_thread_edge *> *path, int idx)
 495 {
 496   edge_iterator ei;
 497   edge e;
 498
 499   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 500     {
 501       edge e2 = find_edge (new_bb, e->dest);
 502       copy_phi_args (e->dest, e, e2, path, idx);
 503     }
 504 }
 505
 506 /* Given a duplicate block and its single destination (both stored
 507    in RD).  Create an edge between the duplicate and its single
 508    destination.
 509
 510    Add an additional argument to any PHI nodes at the single
 511    destination.  IDX is the start node in jump threading path
 512    we start to check to see if the new PHI argument has constant
 513    value along the jump threading path.  */
 514
 515 static void
 516 create_edge_and_update_destination_phis (struct redirection_data *rd,
 517                                          basic_block bb, int idx)
 518 {
 519   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 520
 521   rescan_loop_exit (e, true, false);
 522   e->probability = REG_BR_PROB_BASE;
 523   e->count = bb->count;
 524
 525   /* We used to copy the thread path here.  That was added in 2007
 526      and dutifully updated through the representation changes in 2013.
 527
 528      In 2013 we added code to thread from an interior node through
 529      the backedge to another interior node.  That runs after the code
 530      to thread through loop headers from outside the loop.
 531
 532      The latter may delete edges in the CFG, including those
 533      which appeared in the jump threading path we copied here.  Thus
 534      we'd end up using a dangling pointer.
 535
 536      After reviewing the 2007/2011 code, I can't see how anything
 537      depended on copying the AUX field and clearly copying the jump
 538      threading path is problematical due to embedded edge pointers.
 539      It has been removed.  */
 540   e->aux = NULL;
 541
 542   /* If there are any PHI nodes at the destination of the outgoing edge
 543      from the duplicate block, then we will need to add a new argument
 544      to them.  The argument should have the same value as the argument
 545      associated with the outgoing edge stored in RD.  */
 546   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 547 }
 548
 549 /* Look through PATH beginning at START and return TRUE if there are
 550    any additional blocks that need to be duplicated.  Otherwise,
 551    return FALSE.  */
 552 static bool
 553 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 554                                  unsigned int start)
 555 {
 556   for (unsigned int i = start + 1; i < path->length (); i++)
 557     {
 558       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 559           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 560         return true;
 561     }
 562   return false;
 563 }
 564
 565
 566 /* Compute the amount of profile count/frequency coming into the jump threading
 567    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 568    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 569    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 570    identify blocks duplicated for jump threading, which have duplicated
 571    edges that need to be ignored in the analysis.  Return true if path contains
 572    a joiner, false otherwise.
 573
 574    In the non-joiner case, this is straightforward - all the counts/frequency
 575    flowing into the jump threading path should flow through the duplicated
 576    block and out of the duplicated path.
 577
 578    In the joiner case, it is very tricky.  Some of the counts flowing into
 579    the original path go offpath at the joiner.  The problem is that while
 580    we know how much total count goes off-path in the original control flow,
 581    we don't know how many of the counts corresponding to just the jump
 582    threading path go offpath at the joiner.
 583
 584    For example, assume we have the following control flow and identified
 585    jump threading paths:
 586
 587                 A     B     C
 588                  \    |    /
 589                Ea \   |Eb / Ec
 590                    \  |  /
 591                     v v v
 592                       J       <-- Joiner
 593                      / \
 594                 Eoff/   \Eon
 595                    /     \
 596                   v       v
 597                 Soff     Son  <--- Normal
 598                          /\
 599                       Ed/  \ Ee
 600                        /    \
 601                       v     v
 602                       D      E
 603
 604             Jump threading paths: A -> J -> Son -> D (path 1)
 605                                   C -> J -> Son -> E (path 2)
 606
 607    Note that the control flow could be more complicated:
 608    - Each jump threading path may have more than one incoming edge.  I.e. A and
 609    Ea could represent multiple incoming blocks/edges that are included in
 610    path 1.
 611    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 612    before or after the "normal" copy block).  These are not duplicated onto
 613    the jump threading path, as they are single-successor.
 614    - Any of the blocks along the path may have other incoming edges that
 615    are not part of any jump threading path, but add profile counts along
 616    the path.
 617
 618    In the aboe example, after all jump threading is complete, we will
 619    end up with the following control flow:
 620
 621                 A         B         C
 622                 |         |         |
 623               Ea|         |Eb     |Ec
 624                 |         |         |
 625                 v         v         v
 626                Ja         J        Jc
 627                / \      / \Eon'     / \
 628           Eona/   \   ---/---\--------   \Eonc
 629              /     \ /  /     \    \
 630             v       v  v       v          v
 631            Sona     Soff      Son       Sonc
 632              \           /\      /
 633               \___________    /  \  _____/
 634                           \  /    \/
 635                            vv      v
 636                             D      E
 637
 638    The main issue to notice here is that when we are processing path 1
 639    (A->J->Son->D) we need to figure out the outgoing edge weights to
 640    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 641    sum of the incoming weights to D remain Ed.  The problem with simply
 642    assuming that Ja (and Jc when processing path 2) has the same outgoing
 643    probabilities to its successors as the original block J, is that after
 644    all paths are processed and other edges/counts removed (e.g. none
 645    of Ec will reach D after processing path 2), we may end up with not
 646    enough count flowing along duplicated edge Sona->D.
 647
 648    Therefore, in the case of a joiner, we keep track of all counts
 649    coming in along the current path, as well as from predecessors not
 650    on any jump threading path (Eb in the above example).  While we
 651    first assume that the duplicated Eona for Ja->Sona has the same
 652    probability as the original, we later compensate for other jump
 653    threading paths that may eliminate edges.  We do that by keep track
 654    of all counts coming into the original path that are not in a jump
 655    thread (Eb in the above example, but as noted earlier, there could
 656    be other predecessors incoming to the path at various points, such
 657    as at Son).  Call this cumulative non-path count coming into the path
 658    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 659    least as big as (Ed - Enonpath), but no bigger than the minimum
 660    weight along the jump threading path.  The probabilities of both the
 661    original and duplicated joiner block J and Ja will be adjusted
 662    accordingly after the updates.  */
 663
 664 static bool
 665 compute_path_counts (struct redirection_data *rd,
 666                      ssa_local_info_t *local_info,
 667                      gcov_type *path_in_count_ptr,
 668                      gcov_type *path_out_count_ptr,
 669                      int *path_in_freq_ptr)
 670 {
 671   edge e = rd->incoming_edges->e;
 672   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 673   edge elast = path->last ()->e;
 674   gcov_type nonpath_count = 0;
 675   bool has_joiner = false;
 676   gcov_type path_in_count = 0;
 677   int path_in_freq = 0;
 678
 679   /* Start by accumulating incoming edge counts to the path's first bb
 680      into a couple buckets:
 681         path_in_count: total count of incoming edges that flow into the
 682                   current path.
 683         nonpath_count: total count of incoming edges that are not
 684                   flowing along *any* path.  These are the counts
 685                   that will still flow along the original path after
 686                   all path duplication is done by potentially multiple
 687                   calls to this routine.
 688      (any other incoming edge counts are for a different jump threading
 689      path that will be handled by a later call to this routine.)
 690      To make this easier, start by recording all incoming edges that flow into
 691      the current path in a bitmap.  We could add up the path's incoming edge
 692      counts here, but we still need to walk all the first bb's incoming edges
 693      below to add up the counts of the other edges not included in this jump
 694      threading path.  */
 695   struct el *next, *el;
 696   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 697   for (el = rd->incoming_edges; el; el = next)
 698     {
 699       next = el->next;
 700       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 701     }
 702   edge ein;
 703   edge_iterator ei;
 704   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 705     {
 706       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 707       /* Simply check the incoming edge src against the set captured above.  */
 708       if (ein_path
 709           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 710         {
 711           /* It is necessary but not sufficient that the last path edges
 712              are identical.  There may be different paths that share the
 713              same last path edge in the case where the last edge has a nocopy
 714              source block.  */
 715           gcc_assert (ein_path->last ()->e == elast);
 716           path_in_count += ein->count;
 717           path_in_freq += EDGE_FREQUENCY (ein);
 718         }
 719       else if (!ein_path)
 720         {
 721           /* Keep track of the incoming edges that are not on any jump-threading
 722              path.  These counts will still flow out of original path after all
 723              jump threading is complete.  */
 724             nonpath_count += ein->count;
 725         }
 726     }
 727
 728   /* This is needed due to insane incoming frequencies.  */
 729   if (path_in_freq > BB_FREQ_MAX)
 730     path_in_freq = BB_FREQ_MAX;
 731
 732   BITMAP_FREE (in_edge_srcs);
 733
 734   /* Now compute the fraction of the total count coming into the first
 735      path bb that is from the current threading path.  */
 736   gcov_type total_count = e->dest->count;
 737   /* Handle incoming profile insanities.  */
 738   if (total_count < path_in_count)
 739     path_in_count = total_count;
 740   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 741
 742   /* Walk the entire path to do some more computation in order to estimate
 743      how much of the path_in_count will flow out of the duplicated threading
 744      path.  In the non-joiner case this is straightforward (it should be
 745      the same as path_in_count, although we will handle incoming profile
 746      insanities by setting it equal to the minimum count along the path).
 747
 748      In the joiner case, we need to estimate how much of the path_in_count
 749      will stay on the threading path after the joiner's conditional branch.
 750      We don't really know for sure how much of the counts
 751      associated with this path go to each successor of the joiner, but we'll
 752      estimate based on the fraction of the total count coming into the path
 753      bb was from the threading paths (computed above in onpath_scale).
 754      Afterwards, we will need to do some fixup to account for other threading
 755      paths and possible profile insanities.
 756
 757      In order to estimate the joiner case's counts we also need to update
 758      nonpath_count with any additional counts coming into the path.  Other
 759      blocks along the path may have additional predecessors from outside
 760      the path.  */
 761   gcov_type path_out_count = path_in_count;
 762   gcov_type min_path_count = path_in_count;
 763   for (unsigned int i = 1; i < path->length (); i++)
 764     {
 765       edge epath = (*path)[i]->e;
 766       gcov_type cur_count = epath->count;
 767       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 768         {
 769           has_joiner = true;
 770           cur_count = apply_probability (cur_count, onpath_scale);
 771         }
 772       /* In the joiner case we need to update nonpath_count for any edges
 773          coming into the path that will contribute to the count flowing
 774          into the path successor.  */
 775       if (has_joiner && epath != elast)
 776       {
 777         /* Look for other incoming edges after joiner.  */
 778         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 779           {
 780             if (ein != epath
 781                 /* Ignore in edges from blocks we have duplicated for a
 782                    threading path, which have duplicated edge counts until
 783                    they are redirected by an invocation of this routine.  */
 784                 && !bitmap_bit_p (local_info->duplicate_blocks,
 785                                   ein->src->index))
 786               nonpath_count += ein->count;
 787           }
 788       }
 789       if (cur_count < path_out_count)
 790         path_out_count = cur_count;
 791       if (epath->count < min_path_count)
 792         min_path_count = epath->count;
 793     }
 794
 795   /* We computed path_out_count above assuming that this path targeted
 796      the joiner's on-path successor with the same likelihood as it
 797      reached the joiner.  However, other thread paths through the joiner
 798      may take a different path through the normal copy source block
 799      (i.e. they have a different elast), meaning that they do not
 800      contribute any counts to this path's elast.  As a result, it may
 801      turn out that this path must have more count flowing to the on-path
 802      successor of the joiner.  Essentially, all of this path's elast
 803      count must be contributed by this path and any nonpath counts
 804      (since any path through the joiner with a different elast will not
 805      include a copy of this elast in its duplicated path).
 806      So ensure that this path's path_out_count is at least the
 807      difference between elast->count and nonpath_count.  Otherwise the edge
 808      counts after threading will not be sane.  */
 809   if (has_joiner && path_out_count < elast->count - nonpath_count)
 810   {
 811     path_out_count = elast->count - nonpath_count;
 812     /* But neither can we go above the minimum count along the path
 813        we are duplicating.  This can be an issue due to profile
 814        insanities coming in to this pass.  */
 815     if (path_out_count > min_path_count)
 816       path_out_count = min_path_count;
 817   }
 818
 819   *path_in_count_ptr = path_in_count;
 820   *path_out_count_ptr = path_out_count;
 821   *path_in_freq_ptr = path_in_freq;
 822   return has_joiner;
 823 }
 824
 825
 826 /* Update the counts and frequencies for both an original path
 827    edge EPATH and its duplicate EDUP.  The duplicate source block
 828    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 829    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 830 static void
 831 update_profile (edge epath, edge edup, gcov_type path_in_count,
 832                 gcov_type path_out_count, int path_in_freq)
 833 {
 834
 835   /* First update the duplicated block's count / frequency.  */
 836   if (edup)
 837     {
 838       basic_block dup_block = edup->src;
 839       gcc_assert (dup_block->count == 0);
 840       gcc_assert (dup_block->frequency == 0);
 841       dup_block->count = path_in_count;
 842       dup_block->frequency = path_in_freq;
 843     }
 844
 845   /* Now update the original block's count and frequency in the
 846      opposite manner - remove the counts/freq that will flow
 847      into the duplicated block.  Handle underflow due to precision/
 848      rounding issues.  */
 849   epath->src->count -= path_in_count;
 850   if (epath->src->count < 0)
 851     epath->src->count = 0;
 852   epath->src->frequency -= path_in_freq;
 853   if (epath->src->frequency < 0)
 854     epath->src->frequency = 0;
 855
 856   /* Next update this path edge's original and duplicated counts.  We know
 857      that the duplicated path will have path_out_count flowing
 858      out of it (in the joiner case this is the count along the duplicated path
 859      out of the duplicated joiner).  This count can then be removed from the
 860      original path edge.  */
 861   if (edup)
 862     edup->count = path_out_count;
 863   epath->count -= path_out_count;
 864   gcc_assert (epath->count >= 0);
 865 }
 866
 867
 868 /* The duplicate and original joiner blocks may end up with different
 869    probabilities (different from both the original and from each other).
 870    Recompute the probabilities here once we have updated the edge
 871    counts and frequencies.  */
 872
 873 static void
 874 recompute_probabilities (basic_block bb)
 875 {
 876   edge esucc;
 877   edge_iterator ei;
 878   FOR_EACH_EDGE (esucc, ei, bb->succs)
 879     {
 880       if (!bb->count)
 881         continue;
 882
 883       /* Prevent overflow computation due to insane profiles.  */
 884       if (esucc->count < bb->count)
 885         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 886                                                  bb->count);
 887       else
 888         /* Can happen with missing/guessed probabilities, since we
 889            may determine that more is flowing along duplicated
 890            path than joiner succ probabilities allowed.
 891            Counts and freqs will be insane after jump threading,
 892            at least make sure probability is sane or we will
 893            get a flow verification error.
 894            Not much we can do to make counts/freqs sane without
 895            redoing the profile estimation.  */
 896         esucc->probability = REG_BR_PROB_BASE;
 897     }
 898 }
 899
 900
 901 /* Update the counts of the original and duplicated edges from a joiner
 902    that go off path, given that we have already determined that the
 903    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 904    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 905    edge from joiner is EPATH.  */
 906
 907 static void
 908 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 909                               gcov_type path_in_count,
 910                               gcov_type path_out_count)
 911 {
 912   /* Compute the count that currently flows off path from the joiner.
 913      In other words, the total count of joiner's out edges other than
 914      epath.  Compute this by walking the successors instead of
 915      subtracting epath's count from the joiner bb count, since there
 916      are sometimes slight insanities where the total out edge count is
 917      larger than the bb count (possibly due to rounding/truncation
 918      errors).  */
 919   gcov_type total_orig_off_path_count = 0;
 920   edge enonpath;
 921   edge_iterator ei;
 922   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 923     {
 924       if (enonpath == epath)
 925         continue;
 926       total_orig_off_path_count += enonpath->count;
 927     }
 928
 929   /* For the path that we are duplicating, the amount that will flow
 930      off path from the duplicated joiner is the delta between the
 931      path's cumulative in count and the portion of that count we
 932      estimated above as flowing from the joiner along the duplicated
 933      path.  */
 934   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 935
 936   /* Now do the actual updates of the off-path edges.  */
 937   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 938     {
 939       /* Look for edges going off of the threading path.  */
 940       if (enonpath == epath)
 941         continue;
 942
 943       /* Find the corresponding edge out of the duplicated joiner.  */
 944       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 945       gcc_assert (enonpathdup);
 946
 947       /* We can't use the original probability of the joiner's out
 948          edges, since the probabilities of the original branch
 949          and the duplicated branches may vary after all threading is
 950          complete.  But apportion the duplicated joiner's off-path
 951          total edge count computed earlier (total_dup_off_path_count)
 952          among the duplicated off-path edges based on their original
 953          ratio to the full off-path count (total_orig_off_path_count).
 954          */
 955       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 956                                       total_orig_off_path_count);
 957       /* Give the duplicated offpath edge a portion of the duplicated
 958          total.  */
 959       enonpathdup->count = apply_scale (scale,
 960                                         total_dup_off_path_count);
 961       /* Now update the original offpath edge count, handling underflow
 962          due to rounding errors.  */
 963       enonpath->count -= enonpathdup->count;
 964       if (enonpath->count < 0)
 965         enonpath->count = 0;
 966     }
 967 }
 968
 969
 970 /* Check if the paths through RD all have estimated frequencies but zero
 971    profile counts.  This is more accurate than checking the entry block
 972    for a zero profile count, since profile insanities sometimes creep in.  */
 973
 974 static bool
 975 estimated_freqs_path (struct redirection_data *rd)
 976 {
 977   edge e = rd->incoming_edges->e;
 978   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 979   edge ein;
 980   edge_iterator ei;
 981   bool non_zero_freq = false;
 982   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 983     {
 984       if (ein->count)
 985         return false;
 986       non_zero_freq |= ein->src->frequency != 0;
 987     }
 988
 989   for (unsigned int i = 1; i < path->length (); i++)
 990     {
 991       edge epath = (*path)[i]->e;
 992       if (epath->src->count)
 993         return false;
 994       non_zero_freq |= epath->src->frequency != 0;
 995       edge esucc;
 996       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 997         {
 998           if (esucc->count)
 999             return false;
1000           non_zero_freq |= esucc->src->frequency != 0;
1001         }
1002     }
1003   return non_zero_freq;
1004 }
1005
1006
1007 /* Invoked for routines that have guessed frequencies and no profile
1008    counts to record the block and edge frequencies for paths through RD
1009    in the profile count fields of those blocks and edges.  This is because
1010    ssa_fix_duplicate_block_edges incrementally updates the block and
1011    edge counts as edges are redirected, and it is difficult to do that
1012    for edge frequencies which are computed on the fly from the source
1013    block frequency and probability.  When a block frequency is updated
1014    its outgoing edge frequencies are affected and become difficult to
1015    adjust.  */
1016
1017 static void
1018 freqs_to_counts_path (struct redirection_data *rd)
1019 {
1020   edge e = rd->incoming_edges->e;
1021   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1022   edge ein;
1023   edge_iterator ei;
1024   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1025     {
1026       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1027          errors applying the probability when the frequencies are very
1028          small.  */
1029       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1030                                       ein->probability);
1031     }
1032
1033   for (unsigned int i = 1; i < path->length (); i++)
1034     {
1035       edge epath = (*path)[i]->e;
1036       edge esucc;
1037       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1038          errors applying the edge probability when the frequencies are very
1039          small.  */
1040       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1041       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1042         esucc->count = apply_probability (esucc->src->count,
1043                                           esucc->probability);
1044     }
1045 }
1046
1047
1048 /* For routines that have guessed frequencies and no profile counts, where we
1049    used freqs_to_counts_path to record block and edge frequencies for paths
1050    through RD, we clear the counts after completing all updates for RD.
1051    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1052    but the block frequencies and edge probabilities were updated as well,
1053    so we can simply clear the count fields.  */
1054
1055 static void
1056 clear_counts_path (struct redirection_data *rd)
1057 {
1058   edge e = rd->incoming_edges->e;
1059   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1060   edge ein, esucc;
1061   edge_iterator ei;
1062   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1063     ein->count = 0;
1064
1065   /* First clear counts along original path.  */
1066   for (unsigned int i = 1; i < path->length (); i++)
1067     {
1068       edge epath = (*path)[i]->e;
1069       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1070         esucc->count = 0;
1071       epath->src->count = 0;
1072     }
1073   /* Also need to clear the counts along duplicated path.  */
1074   for (unsigned int i = 0; i < 2; i++)
1075     {
1076       basic_block dup = rd->dup_blocks[i];
1077       if (!dup)
1078         continue;
1079       FOR_EACH_EDGE (esucc, ei, dup->succs)
1080         esucc->count = 0;
1081       dup->count = 0;
1082     }
1083 }
1084
1085 /* Wire up the outgoing edges from the duplicate blocks and
1086    update any PHIs as needed.  Also update the profile counts
1087    on the original and duplicate blocks and edges.  */
1088 void
1089 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1090                                ssa_local_info_t *local_info)
1091 {
1092   bool multi_incomings = (rd->incoming_edges->next != NULL);
1093   edge e = rd->incoming_edges->e;
1094   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1095   edge elast = path->last ()->e;
1096   gcov_type path_in_count = 0;
1097   gcov_type path_out_count = 0;
1098   int path_in_freq = 0;
1099
1100   /* This routine updates profile counts, frequencies, and probabilities
1101      incrementally. Since it is difficult to do the incremental updates
1102      using frequencies/probabilities alone, for routines without profile
1103      data we first take a snapshot of the existing block and edge frequencies
1104      by copying them into the empty profile count fields.  These counts are
1105      then used to do the incremental updates, and cleared at the end of this
1106      routine.  If the function is marked as having a profile, we still check
1107      to see if the paths through RD are using estimated frequencies because
1108      the routine had zero profile counts.  */
1109   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1110                              || estimated_freqs_path (rd));
1111   if (do_freqs_to_counts)
1112     freqs_to_counts_path (rd);
1113
1114   /* First determine how much profile count to move from original
1115      path to the duplicate path.  This is tricky in the presence of
1116      a joiner (see comments for compute_path_counts), where some portion
1117      of the path's counts will flow off-path from the joiner.  In the
1118      non-joiner case the path_in_count and path_out_count should be the
1119      same.  */
1120   bool has_joiner = compute_path_counts (rd, local_info,
1121                                          &path_in_count, &path_out_count,
1122                                          &path_in_freq);
1123
1124   int cur_path_freq = path_in_freq;
1125   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1126     {
1127       edge epath = (*path)[i]->e;
1128
1129       /* If we were threading through an joiner block, then we want
1130          to keep its control statement and redirect an outgoing edge.
1131          Else we want to remove the control statement & edges, then create
1132          a new outgoing edge.  In both cases we may need to update PHIs.  */
1133       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1134         {
1135           edge victim;
1136           edge e2;
1137
1138           gcc_assert (has_joiner);
1139
1140           /* This updates the PHIs at the destination of the duplicate
1141              block.  Pass 0 instead of i if we are threading a path which
1142              has multiple incoming edges.  */
1143           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1144                                    path, multi_incomings ? 0 : i);
1145
1146           /* Find the edge from the duplicate block to the block we're
1147              threading through.  That's the edge we want to redirect.  */
1148           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1149
1150           /* If there are no remaining blocks on the path to duplicate,
1151              then redirect VICTIM to the final destination of the jump
1152              threading path.  */
1153           if (!any_remaining_duplicated_blocks (path, i))
1154             {
1155               e2 = redirect_edge_and_branch (victim, elast->dest);
1156               /* If we redirected the edge, then we need to copy PHI arguments
1157                  at the target.  If the edge already existed (e2 != victim
1158                  case), then the PHIs in the target already have the correct
1159                  arguments.  */
1160               if (e2 == victim)
1161                 copy_phi_args (e2->dest, elast, e2,
1162                                path, multi_incomings ? 0 : i);
1163             }
1164           else
1165             {
1166               /* Redirect VICTIM to the next duplicated block in the path.  */
1167               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1168
1169               /* We need to update the PHIs in the next duplicated block.  We
1170                  want the new PHI args to have the same value as they had
1171                  in the source of the next duplicate block.
1172
1173                  Thus, we need to know which edge we traversed into the
1174                  source of the duplicate.  Furthermore, we may have
1175                  traversed many edges to reach the source of the duplicate.
1176
1177                  Walk through the path starting at element I until we
1178                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1179                  the edge from the prior element.  */
1180               for (unsigned int j = i + 1; j < path->length (); j++)
1181                 {
1182                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1183                     {
1184                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1185                       break;
1186                     }
1187                 }
1188             }
1189
1190           /* Update the counts and frequency of both the original block
1191              and path edge, and the duplicates.  The path duplicate's
1192              incoming count and frequency are the totals for all edges
1193              incoming to this jump threading path computed earlier.
1194              And we know that the duplicated path will have path_out_count
1195              flowing out of it (i.e. along the duplicated path out of the
1196              duplicated joiner).  */
1197           update_profile (epath, e2, path_in_count, path_out_count,
1198                           path_in_freq);
1199
1200           /* Next we need to update the counts of the original and duplicated
1201              edges from the joiner that go off path.  */
1202           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1203                                         path_out_count);
1204
1205           /* Finally, we need to set the probabilities on the duplicated
1206              edges out of the duplicated joiner (e2->src).  The probabilities
1207              along the original path will all be updated below after we finish
1208              processing the whole path.  */
1209           recompute_probabilities (e2->src);
1210
1211           /* Record the frequency flowing to the downstream duplicated
1212              path blocks.  */
1213           cur_path_freq = EDGE_FREQUENCY (e2);
1214         }
1215       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1216         {
1217           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1218           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1219                                                    multi_incomings ? 0 : i);
1220           if (count == 1)
1221             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1222
1223           /* Update the counts and frequency of both the original block
1224              and path edge, and the duplicates.  Since we are now after
1225              any joiner that may have existed on the path, the count
1226              flowing along the duplicated threaded path is path_out_count.
1227              If we didn't have a joiner, then cur_path_freq was the sum
1228              of the total frequencies along all incoming edges to the
1229              thread path (path_in_freq).  If we had a joiner, it would have
1230              been updated at the end of that handling to the edge frequency
1231              along the duplicated joiner path edge.  */
1232           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1233                           path_out_count, path_out_count,
1234                           cur_path_freq);
1235         }
1236       else
1237         {
1238           /* No copy case.  In this case we don't have an equivalent block
1239              on the duplicated thread path to update, but we do need
1240              to remove the portion of the counts/freqs that were moved
1241              to the duplicated path from the counts/freqs flowing through
1242              this block on the original path.  Since all the no-copy edges
1243              are after any joiner, the removed count is the same as
1244              path_out_count.
1245
1246              If we didn't have a joiner, then cur_path_freq was the sum
1247              of the total frequencies along all incoming edges to the
1248              thread path (path_in_freq).  If we had a joiner, it would have
1249              been updated at the end of that handling to the edge frequency
1250              along the duplicated joiner path edge.  */
1251              update_profile (epath, NULL, path_out_count, path_out_count,
1252                              cur_path_freq);
1253         }
1254
1255       /* Increment the index into the duplicated path when we processed
1256          a duplicated block.  */
1257       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1258           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1259       {
1260           count++;
1261       }
1262     }
1263
1264   /* Now walk orig blocks and update their probabilities, since the
1265      counts and freqs should be updated properly by above loop.  */
1266   for (unsigned int i = 1; i < path->length (); i++)
1267     {
1268       edge epath = (*path)[i]->e;
1269       recompute_probabilities (epath->src);
1270     }
1271
1272   /* Done with all profile and frequency updates, clear counts if they
1273      were copied.  */
1274   if (do_freqs_to_counts)
1275     clear_counts_path (rd);
1276 }
1277
1278 /* Hash table traversal callback routine to create duplicate blocks.  */
1279
1280 int
1281 ssa_create_duplicates (struct redirection_data **slot,
1282                        ssa_local_info_t *local_info)
1283 {
1284   struct redirection_data *rd = *slot;
1285
1286   /* The second duplicated block in a jump threading path is specific
1287      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1288
1289      Each time we're called, we have to look through the path and see
1290      if a second block needs to be duplicated.
1291
1292      Note the search starts with the third edge on the path.  The first
1293      edge is the incoming edge, the second edge always has its source
1294      duplicated.  Thus we start our search with the third edge.  */
1295   vec<jump_thread_edge *> *path = rd->path;
1296   for (unsigned int i = 2; i < path->length (); i++)
1297     {
1298       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1299           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1300         {
1301           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1302                                       &local_info->duplicate_blocks);
1303           break;
1304         }
1305     }
1306
1307   /* Create a template block if we have not done so already.  Otherwise
1308      use the template to create a new block.  */
1309   if (local_info->template_block == NULL)
1310     {
1311       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1312                                   &local_info->duplicate_blocks);
1313       local_info->template_block = rd->dup_blocks[0];
1314
1315       /* We do not create any outgoing edges for the template.  We will
1316          take care of that in a later traversal.  That way we do not
1317          create edges that are going to just be deleted.  */
1318     }
1319   else
1320     {
1321       create_block_for_threading (local_info->template_block, rd, 0,
1322                                   &local_info->duplicate_blocks);
1323
1324       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1325          block.   */
1326       ssa_fix_duplicate_block_edges (rd, local_info);
1327     }
1328
1329   /* Keep walking the hash table.  */
1330   return 1;
1331 }
1332
1333 /* We did not create any outgoing edges for the template block during
1334    block creation.  This hash table traversal callback creates the
1335    outgoing edge for the template block.  */
1336
1337 inline int
1338 ssa_fixup_template_block (struct redirection_data **slot,
1339                           ssa_local_info_t *local_info)
1340 {
1341   struct redirection_data *rd = *slot;
1342
1343   /* If this is the template block halt the traversal after updating
1344      it appropriately.
1345
1346      If we were threading through an joiner block, then we want
1347      to keep its control statement and redirect an outgoing edge.
1348      Else we want to remove the control statement & edges, then create
1349      a new outgoing edge.  In both cases we may need to update PHIs.  */
1350   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1351     {
1352       ssa_fix_duplicate_block_edges (rd, local_info);
1353       return 0;
1354     }
1355
1356   return 1;
1357 }
1358
1359 /* Hash table traversal callback to redirect each incoming edge
1360    associated with this hash table element to its new destination.  */
1361
1362 int
1363 ssa_redirect_edges (struct redirection_data **slot,
1364                     ssa_local_info_t *local_info)
1365 {
1366   struct redirection_data *rd = *slot;
1367   struct el *next, *el;
1368
1369   /* Walk over all the incoming edges associated with this hash table
1370      entry.  */
1371   for (el = rd->incoming_edges; el; el = next)
1372     {
1373       edge e = el->e;
1374       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1375
1376       /* Go ahead and free this element from the list.  Doing this now
1377          avoids the need for another list walk when we destroy the hash
1378          table.  */
1379       next = el->next;
1380       free (el);
1381
1382       thread_stats.num_threaded_edges++;
1383
1384       if (rd->dup_blocks[0])
1385         {
1386           edge e2;
1387
1388           if (dump_file && (dump_flags & TDF_DETAILS))
1389             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1390                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1391
1392           /* If we redirect a loop latch edge cancel its loop.  */
1393           if (e->src == e->src->loop_father->latch)
1394             mark_loop_for_removal (e->src->loop_father);
1395
1396           /* Redirect the incoming edge (possibly to the joiner block) to the
1397              appropriate duplicate block.  */
1398           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1399           gcc_assert (e == e2);
1400           flush_pending_stmts (e2);
1401         }
1402
1403       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1404          to clear it will cause all kinds of unpleasant problems later.  */
1405       delete_jump_thread_path (path);
1406       e->aux = NULL;
1407
1408     }
1409
1410   /* Indicate that we actually threaded one or more jumps.  */
1411   if (rd->incoming_edges)
1412     local_info->jumps_threaded = true;
1413
1414   return 1;
1415 }
1416
1417 /* Return true if this block has no executable statements other than
1418    a simple ctrl flow instruction.  When the number of outgoing edges
1419    is one, this is equivalent to a "forwarder" block.  */
1420
1421 static bool
1422 redirection_block_p (basic_block bb)
1423 {
1424   gimple_stmt_iterator gsi;
1425
1426   /* Advance to the first executable statement.  */
1427   gsi = gsi_start_bb (bb);
1428   while (!gsi_end_p (gsi)
1429          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1430              || is_gimple_debug (gsi_stmt (gsi))
1431              || gimple_nop_p (gsi_stmt (gsi))
1432              || gimple_clobber_p (gsi_stmt (gsi))))
1433     gsi_next (&gsi);
1434
1435   /* Check if this is an empty block.  */
1436   if (gsi_end_p (gsi))
1437     return true;
1438
1439   /* Test that we've reached the terminating control statement.  */
1440   return gsi_stmt (gsi)
1441          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1442              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1443              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1444 }
1445
1446 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1447    is reached via one or more specific incoming edges, we know which
1448    outgoing edge from BB will be traversed.
1449
1450    We want to redirect those incoming edges to the target of the
1451    appropriate outgoing edge.  Doing so avoids a conditional branch
1452    and may expose new optimization opportunities.  Note that we have
1453    to update dominator tree and SSA graph after such changes.
1454
1455    The key to keeping the SSA graph update manageable is to duplicate
1456    the side effects occurring in BB so that those side effects still
1457    occur on the paths which bypass BB after redirecting edges.
1458
1459    We accomplish this by creating duplicates of BB and arranging for
1460    the duplicates to unconditionally pass control to one specific
1461    successor of BB.  We then revector the incoming edges into BB to
1462    the appropriate duplicate of BB.
1463
1464    If NOLOOP_ONLY is true, we only perform the threading as long as it
1465    does not affect the structure of the loops in a nontrivial way.
1466
1467    If JOINERS is true, then thread through joiner blocks as well.  */
1468
1469 static bool
1470 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1471 {
1472   /* E is an incoming edge into BB that we may or may not want to
1473      redirect to a duplicate of BB.  */
1474   edge e, e2;
1475   edge_iterator ei;
1476   ssa_local_info_t local_info;
1477
1478   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1479
1480   /* To avoid scanning a linear array for the element we need we instead
1481      use a hash table.  For normal code there should be no noticeable
1482      difference.  However, if we have a block with a large number of
1483      incoming and outgoing edges such linear searches can get expensive.  */
1484   redirection_data
1485     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1486
1487   /* Record each unique threaded destination into a hash table for
1488      efficient lookups.  */
1489   FOR_EACH_EDGE (e, ei, bb->preds)
1490     {
1491       if (e->aux == NULL)
1492         continue;
1493
1494       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1495
1496       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1497           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1498         continue;
1499
1500       e2 = path->last ()->e;
1501       if (!e2 || noloop_only)
1502         {
1503           /* If NOLOOP_ONLY is true, we only allow threading through the
1504              header of a loop to exit edges.  */
1505
1506           /* One case occurs when there was loop header buried in a jump
1507              threading path that crosses loop boundaries.  We do not try
1508              and thread this elsewhere, so just cancel the jump threading
1509              request by clearing the AUX field now.  */
1510           if ((bb->loop_father != e2->src->loop_father
1511                && !loop_exit_edge_p (e2->src->loop_father, e2))
1512               || (e2->src->loop_father != e2->dest->loop_father
1513                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1514             {
1515               /* Since this case is not handled by our special code
1516                  to thread through a loop header, we must explicitly
1517                  cancel the threading request here.  */
1518               delete_jump_thread_path (path);
1519               e->aux = NULL;
1520               continue;
1521             }
1522
1523           /* Another case occurs when trying to thread through our
1524              own loop header, possibly from inside the loop.  We will
1525              thread these later.  */
1526           unsigned int i;
1527           for (i = 1; i < path->length (); i++)
1528             {
1529               if ((*path)[i]->e->src == bb->loop_father->header
1530                   && (!loop_exit_edge_p (bb->loop_father, e2)
1531                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1532                 break;
1533             }
1534
1535           if (i != path->length ())
1536             continue;
1537         }
1538
1539       /* Insert the outgoing edge into the hash table if it is not
1540          already in the hash table.  */
1541       lookup_redirection_data (e, INSERT);
1542     }
1543
1544   /* We do not update dominance info.  */
1545   free_dominance_info (CDI_DOMINATORS);
1546
1547   /* We know we only thread through the loop header to loop exits.
1548      Let the basic block duplication hook know we are not creating
1549      a multiple entry loop.  */
1550   if (noloop_only
1551       && bb == bb->loop_father->header)
1552     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1553
1554   /* Now create duplicates of BB.
1555
1556      Note that for a block with a high outgoing degree we can waste
1557      a lot of time and memory creating and destroying useless edges.
1558
1559      So we first duplicate BB and remove the control structure at the
1560      tail of the duplicate as well as all outgoing edges from the
1561      duplicate.  We then use that duplicate block as a template for
1562      the rest of the duplicates.  */
1563   local_info.template_block = NULL;
1564   local_info.bb = bb;
1565   local_info.jumps_threaded = false;
1566   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1567                             (&local_info);
1568
1569   /* The template does not have an outgoing edge.  Create that outgoing
1570      edge and update PHI nodes as the edge's target as necessary.
1571
1572      We do this after creating all the duplicates to avoid creating
1573      unnecessary edges.  */
1574   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1575                             (&local_info);
1576
1577   /* The hash table traversals above created the duplicate blocks (and the
1578      statements within the duplicate blocks).  This loop creates PHI nodes for
1579      the duplicated blocks and redirects the incoming edges into BB to reach
1580      the duplicates of BB.  */
1581   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1582                             (&local_info);
1583
1584   /* Done with this block.  Clear REDIRECTION_DATA.  */
1585   delete redirection_data;
1586   redirection_data = NULL;
1587
1588   if (noloop_only
1589       && bb == bb->loop_father->header)
1590     set_loop_copy (bb->loop_father, NULL);
1591
1592   BITMAP_FREE (local_info.duplicate_blocks);
1593   local_info.duplicate_blocks = NULL;
1594
1595   /* Indicate to our caller whether or not any jumps were threaded.  */
1596   return local_info.jumps_threaded;
1597 }
1598
1599 /* Wrapper for thread_block_1 so that we can first handle jump
1600    thread paths which do not involve copying joiner blocks, then
1601    handle jump thread paths which have joiner blocks.
1602
1603    By doing things this way we can be as aggressive as possible and
1604    not worry that copying a joiner block will create a jump threading
1605    opportunity.  */
1606
1607 static bool
1608 thread_block (basic_block bb, bool noloop_only)
1609 {
1610   bool retval;
1611   retval = thread_block_1 (bb, noloop_only, false);
1612   retval |= thread_block_1 (bb, noloop_only, true);
1613   return retval;
1614 }
1615
1616
1617 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1618    copy of E->dest created during threading, or E->dest if it was not necessary
1619    to copy it (E is its single predecessor).  */
1620
1621 static basic_block
1622 thread_single_edge (edge e)
1623 {
1624   basic_block bb = e->dest;
1625   struct redirection_data rd;
1626   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1627   edge eto = (*path)[1]->e;
1628
1629   delete_jump_thread_path (path);
1630   e->aux = NULL;
1631
1632   thread_stats.num_threaded_edges++;
1633
1634   if (single_pred_p (bb))
1635     {
1636       /* If BB has just a single predecessor, we should only remove the
1637          control statements at its end, and successors except for ETO.  */
1638       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1639
1640       /* And fixup the flags on the single remaining edge.  */
1641       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1642       eto->flags |= EDGE_FALLTHRU;
1643
1644       return bb;
1645     }
1646
1647   /* Otherwise, we need to create a copy.  */
1648   if (e->dest == eto->src)
1649     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1650
1651   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1652   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1653   npath->safe_push (x);
1654
1655   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1656   npath->safe_push (x);
1657   rd.path = npath;
1658
1659   create_block_for_threading (bb, &rd, 0, NULL);
1660   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1661   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1662
1663   if (dump_file && (dump_flags & TDF_DETAILS))
1664     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1665              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1666
1667   rd.dup_blocks[0]->count = e->count;
1668   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1669   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1670   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1671   flush_pending_stmts (e);
1672
1673   delete_jump_thread_path (npath);
1674   return rd.dup_blocks[0];
1675 }
1676
1677 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1678    from STOP and DBDS_CE_STOP.  */
1679
1680 static basic_block dbds_ce_stop;
1681 static bool
1682 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1683 {
1684   return (bb != (const_basic_block) stop
1685           && bb != dbds_ce_stop);
1686 }
1687
1688 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1689    returns the state.  */
1690
1691 enum bb_dom_status
1692 {
1693   /* BB does not dominate latch of the LOOP.  */
1694   DOMST_NONDOMINATING,
1695   /* The LOOP is broken (there is no path from the header to its latch.  */
1696   DOMST_LOOP_BROKEN,
1697   /* BB dominates the latch of the LOOP.  */
1698   DOMST_DOMINATING
1699 };
1700
1701 static enum bb_dom_status
1702 determine_bb_domination_status (struct loop *loop, basic_block bb)
1703 {
1704   basic_block *bblocks;
1705   unsigned nblocks, i;
1706   bool bb_reachable = false;
1707   edge_iterator ei;
1708   edge e;
1709
1710   /* This function assumes BB is a successor of LOOP->header.
1711      If that is not the case return DOMST_NONDOMINATING which
1712      is always safe.  */
1713     {
1714       bool ok = false;
1715
1716       FOR_EACH_EDGE (e, ei, bb->preds)
1717         {
1718           if (e->src == loop->header)
1719             {
1720               ok = true;
1721               break;
1722             }
1723         }
1724
1725       if (!ok)
1726         return DOMST_NONDOMINATING;
1727     }
1728
1729   if (bb == loop->latch)
1730     return DOMST_DOMINATING;
1731
1732   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1733      from it.  */
1734
1735   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1736   dbds_ce_stop = loop->header;
1737   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1738                                 bblocks, loop->num_nodes, bb);
1739   for (i = 0; i < nblocks; i++)
1740     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1741       {
1742         if (e->src == loop->header)
1743           {
1744             free (bblocks);
1745             return DOMST_NONDOMINATING;
1746           }
1747         if (e->src == bb)
1748           bb_reachable = true;
1749       }
1750
1751   free (bblocks);
1752   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1753 }
1754
1755 /* Return true if BB is part of the new pre-header that is created
1756    when threading the latch to DATA.  */
1757
1758 static bool
1759 def_split_header_continue_p (const_basic_block bb, const void *data)
1760 {
1761   const_basic_block new_header = (const_basic_block) data;
1762   const struct loop *l;
1763
1764   if (bb == new_header
1765       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1766     return false;
1767   for (l = bb->loop_father; l; l = loop_outer (l))
1768     if (l == new_header->loop_father)
1769       return true;
1770   return false;
1771 }
1772
1773 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1774    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1775    to the inside of the loop.  */
1776
1777 static bool
1778 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1779 {
1780   basic_block header = loop->header;
1781   edge e, tgt_edge, latch = loop_latch_edge (loop);
1782   edge_iterator ei;
1783   basic_block tgt_bb, atgt_bb;
1784   enum bb_dom_status domst;
1785
1786   /* We have already threaded through headers to exits, so all the threading
1787      requests now are to the inside of the loop.  We need to avoid creating
1788      irreducible regions (i.e., loops with more than one entry block), and
1789      also loop with several latch edges, or new subloops of the loop (although
1790      there are cases where it might be appropriate, it is difficult to decide,
1791      and doing it wrongly may confuse other optimizers).
1792
1793      We could handle more general cases here.  However, the intention is to
1794      preserve some information about the loop, which is impossible if its
1795      structure changes significantly, in a way that is not well understood.
1796      Thus we only handle few important special cases, in which also updating
1797      of the loop-carried information should be feasible:
1798
1799      1) Propagation of latch edge to a block that dominates the latch block
1800         of a loop.  This aims to handle the following idiom:
1801
1802         first = 1;
1803         while (1)
1804           {
1805             if (first)
1806               initialize;
1807             first = 0;
1808             body;
1809           }
1810
1811         After threading the latch edge, this becomes
1812
1813         first = 1;
1814         if (first)
1815           initialize;
1816         while (1)
1817           {
1818             first = 0;
1819             body;
1820           }
1821
1822         The original header of the loop is moved out of it, and we may thread
1823         the remaining edges through it without further constraints.
1824
1825      2) All entry edges are propagated to a single basic block that dominates
1826         the latch block of the loop.  This aims to handle the following idiom
1827         (normally created for "for" loops):
1828
1829         i = 0;
1830         while (1)
1831           {
1832             if (i >= 100)
1833               break;
1834             body;
1835             i++;
1836           }
1837
1838         This becomes
1839
1840         i = 0;
1841         while (1)
1842           {
1843             body;
1844             i++;
1845             if (i >= 100)
1846               break;
1847           }
1848      */
1849
1850   /* Threading through the header won't improve the code if the header has just
1851      one successor.  */
1852   if (single_succ_p (header))
1853     goto fail;
1854
1855   /* If we threaded the latch using a joiner block, we cancel the
1856      threading opportunity out of an abundance of caution.  However,
1857      still allow threading from outside to inside the loop.  */
1858   if (latch->aux)
1859     {
1860       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1861       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1862         {
1863           delete_jump_thread_path (path);
1864           latch->aux = NULL;
1865         }
1866     }
1867
1868   if (latch->aux)
1869     {
1870       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1871       tgt_edge = (*path)[1]->e;
1872       tgt_bb = tgt_edge->dest;
1873     }
1874   else if (!may_peel_loop_headers
1875            && !redirection_block_p (loop->header))
1876     goto fail;
1877   else
1878     {
1879       tgt_bb = NULL;
1880       tgt_edge = NULL;
1881       FOR_EACH_EDGE (e, ei, header->preds)
1882         {
1883           if (!e->aux)
1884             {
1885               if (e == latch)
1886                 continue;
1887
1888               /* If latch is not threaded, and there is a header
1889                  edge that is not threaded, we would create loop
1890                  with multiple entries.  */
1891               goto fail;
1892             }
1893
1894           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1895
1896           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1897             goto fail;
1898           tgt_edge = (*path)[1]->e;
1899           atgt_bb = tgt_edge->dest;
1900           if (!tgt_bb)
1901             tgt_bb = atgt_bb;
1902           /* Two targets of threading would make us create loop
1903              with multiple entries.  */
1904           else if (tgt_bb != atgt_bb)
1905             goto fail;
1906         }
1907
1908       if (!tgt_bb)
1909         {
1910           /* There are no threading requests.  */
1911           return false;
1912         }
1913
1914       /* Redirecting to empty loop latch is useless.  */
1915       if (tgt_bb == loop->latch
1916           && empty_block_p (loop->latch))
1917         goto fail;
1918     }
1919
1920   /* The target block must dominate the loop latch, otherwise we would be
1921      creating a subloop.  */
1922   domst = determine_bb_domination_status (loop, tgt_bb);
1923   if (domst == DOMST_NONDOMINATING)
1924     goto fail;
1925   if (domst == DOMST_LOOP_BROKEN)
1926     {
1927       /* If the loop ceased to exist, mark it as such, and thread through its
1928          original header.  */
1929       mark_loop_for_removal (loop);
1930       return thread_block (header, false);
1931     }
1932
1933   if (tgt_bb->loop_father->header == tgt_bb)
1934     {
1935       /* If the target of the threading is a header of a subloop, we need
1936          to create a preheader for it, so that the headers of the two loops
1937          do not merge.  */
1938       if (EDGE_COUNT (tgt_bb->preds) > 2)
1939         {
1940           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1941           gcc_assert (tgt_bb != NULL);
1942         }
1943       else
1944         tgt_bb = split_edge (tgt_edge);
1945     }
1946
1947   if (latch->aux)
1948     {
1949       basic_block *bblocks;
1950       unsigned nblocks, i;
1951
1952       /* First handle the case latch edge is redirected.  We are copying
1953          the loop header but not creating a multiple entry loop.  Make the
1954          cfg manipulation code aware of that fact.  */
1955       set_loop_copy (loop, loop);
1956       loop->latch = thread_single_edge (latch);
1957       set_loop_copy (loop, NULL);
1958       gcc_assert (single_succ (loop->latch) == tgt_bb);
1959       loop->header = tgt_bb;
1960
1961       /* Remove the new pre-header blocks from our loop.  */
1962       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1963       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1964                                     bblocks, loop->num_nodes, tgt_bb);
1965       for (i = 0; i < nblocks; i++)
1966         if (bblocks[i]->loop_father == loop)
1967           {
1968             remove_bb_from_loops (bblocks[i]);
1969             add_bb_to_loop (bblocks[i], loop_outer (loop));
1970           }
1971       free (bblocks);
1972
1973       /* If the new header has multiple latches mark it so.  */
1974       FOR_EACH_EDGE (e, ei, loop->header->preds)
1975         if (e->src->loop_father == loop
1976             && e->src != loop->latch)
1977           {
1978             loop->latch = NULL;
1979             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1980           }
1981
1982       /* Cancel remaining threading requests that would make the
1983          loop a multiple entry loop.  */
1984       FOR_EACH_EDGE (e, ei, header->preds)
1985         {
1986           edge e2;
1987
1988           if (e->aux == NULL)
1989             continue;
1990
1991           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1992           e2 = path->last ()->e;
1993
1994           if (e->src->loop_father != e2->dest->loop_father
1995               && e2->dest != loop->header)
1996             {
1997               delete_jump_thread_path (path);
1998               e->aux = NULL;
1999             }
2000         }
2001
2002       /* Thread the remaining edges through the former header.  */
2003       thread_block (header, false);
2004     }
2005   else
2006     {
2007       basic_block new_preheader;
2008
2009       /* Now consider the case entry edges are redirected to the new entry
2010          block.  Remember one entry edge, so that we can find the new
2011          preheader (its destination after threading).  */
2012       FOR_EACH_EDGE (e, ei, header->preds)
2013         {
2014           if (e->aux)
2015             break;
2016         }
2017
2018       /* The duplicate of the header is the new preheader of the loop.  Ensure
2019          that it is placed correctly in the loop hierarchy.  */
2020       set_loop_copy (loop, loop_outer (loop));
2021
2022       thread_block (header, false);
2023       set_loop_copy (loop, NULL);
2024       new_preheader = e->dest;
2025
2026       /* Create the new latch block.  This is always necessary, as the latch
2027          must have only a single successor, but the original header had at
2028          least two successors.  */
2029       loop->latch = NULL;
2030       mfb_kj_edge = single_succ_edge (new_preheader);
2031       loop->header = mfb_kj_edge->dest;
2032       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2033       loop->header = latch->dest;
2034       loop->latch = latch->src;
2035     }
2036
2037   return true;
2038
2039 fail:
2040   /* We failed to thread anything.  Cancel the requests.  */
2041   FOR_EACH_EDGE (e, ei, header->preds)
2042     {
2043       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2044
2045       if (path)
2046         {
2047           delete_jump_thread_path (path);
2048           e->aux = NULL;
2049         }
2050     }
2051   return false;
2052 }
2053
2054 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2055    PHI arguments associated with those edges are equal or there are no
2056    PHI arguments, otherwise return FALSE.  */
2057
2058 static bool
2059 phi_args_equal_on_edges (edge e1, edge e2)
2060 {
2061   gphi_iterator gsi;
2062   int indx1 = e1->dest_idx;
2063   int indx2 = e2->dest_idx;
2064
2065   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2066     {
2067       gphi *phi = gsi.phi ();
2068
2069       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2070                             gimple_phi_arg_def (phi, indx2), 0))
2071         return false;
2072     }
2073   return true;
2074 }
2075
2076 /* Walk through the registered jump threads and convert them into a
2077    form convenient for this pass.
2078
2079    Any block which has incoming edges threaded to outgoing edges
2080    will have its entry in THREADED_BLOCK set.
2081
2082    Any threaded edge will have its new outgoing edge stored in the
2083    original edge's AUX field.
2084
2085    This form avoids the need to walk all the edges in the CFG to
2086    discover blocks which need processing and avoids unnecessary
2087    hash table lookups to map from threaded edge to new target.  */
2088
2089 static void
2090 mark_threaded_blocks (bitmap threaded_blocks)
2091 {
2092   unsigned int i;
2093   bitmap_iterator bi;
2094   bitmap tmp = BITMAP_ALLOC (NULL);
2095   basic_block bb;
2096   edge e;
2097   edge_iterator ei;
2098
2099   /* It is possible to have jump threads in which one is a subpath
2100      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2101      block and (B, C), (C, D) where no joiner block exists.
2102
2103      When this occurs ignore the jump thread request with the joiner
2104      block.  It's totally subsumed by the simpler jump thread request.
2105
2106      This results in less block copying, simpler CFGs.  More importantly,
2107      when we duplicate the joiner block, B, in this case we will create
2108      a new threading opportunity that we wouldn't be able to optimize
2109      until the next jump threading iteration.
2110
2111      So first convert the jump thread requests which do not require a
2112      joiner block.  */
2113   for (i = 0; i < paths.length (); i++)
2114     {
2115       vec<jump_thread_edge *> *path = paths[i];
2116
2117       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2118         {
2119           edge e = (*path)[0]->e;
2120           e->aux = (void *)path;
2121           bitmap_set_bit (tmp, e->dest->index);
2122         }
2123     }
2124
2125   /* Now iterate again, converting cases where we want to thread
2126      through a joiner block, but only if no other edge on the path
2127      already has a jump thread attached to it.  We do this in two passes,
2128      to avoid situations where the order in the paths vec can hide overlapping
2129      threads (the path is recorded on the incoming edge, so we would miss
2130      cases where the second path starts at a downstream edge on the same
2131      path).  First record all joiner paths, deleting any in the unexpected
2132      case where there is already a path for that incoming edge.  */
2133   for (i = 0; i < paths.length ();)
2134     {
2135       vec<jump_thread_edge *> *path = paths[i];
2136
2137       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2138         {
2139           /* Attach the path to the starting edge if none is yet recorded.  */
2140           if ((*path)[0]->e->aux == NULL)
2141             {
2142               (*path)[0]->e->aux = path;
2143               i++;
2144             }
2145           else
2146             {
2147               paths.unordered_remove (i);
2148               if (dump_file && (dump_flags & TDF_DETAILS))
2149                 dump_jump_thread_path (dump_file, *path, false);
2150               delete_jump_thread_path (path);
2151             }
2152         }
2153       else
2154         {
2155           i++;
2156         }
2157     }
2158
2159   /* Second, look for paths that have any other jump thread attached to
2160      them, and either finish converting them or cancel them.  */
2161   for (i = 0; i < paths.length ();)
2162     {
2163       vec<jump_thread_edge *> *path = paths[i];
2164       edge e = (*path)[0]->e;
2165
2166       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2167         {
2168           unsigned int j;
2169           for (j = 1; j < path->length (); j++)
2170             if ((*path)[j]->e->aux != NULL)
2171               break;
2172
2173           /* If we iterated through the entire path without exiting the loop,
2174              then we are good to go, record it.  */
2175           if (j == path->length ())
2176             {
2177               bitmap_set_bit (tmp, e->dest->index);
2178               i++;
2179             }
2180           else
2181             {
2182               e->aux = NULL;
2183               paths.unordered_remove (i);
2184               if (dump_file && (dump_flags & TDF_DETAILS))
2185                 dump_jump_thread_path (dump_file, *path, false);
2186               delete_jump_thread_path (path);
2187             }
2188         }
2189       else
2190         {
2191           i++;
2192         }
2193     }
2194
2195   /* If optimizing for size, only thread through block if we don't have
2196      to duplicate it or it's an otherwise empty redirection block.  */
2197   if (optimize_function_for_size_p (cfun))
2198     {
2199       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2200         {
2201           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2202           if (EDGE_COUNT (bb->preds) > 1
2203               && !redirection_block_p (bb))
2204             {
2205               FOR_EACH_EDGE (e, ei, bb->preds)
2206                 {
2207                   if (e->aux)
2208                     {
2209                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2210                       delete_jump_thread_path (path);
2211                       e->aux = NULL;
2212                     }
2213                 }
2214             }
2215           else
2216             bitmap_set_bit (threaded_blocks, i);
2217         }
2218     }
2219   else
2220     bitmap_copy (threaded_blocks, tmp);
2221
2222   /* Look for jump threading paths which cross multiple loop headers.
2223
2224      The code to thread through loop headers will change the CFG in ways
2225      that break assumptions made by the loop optimization code.
2226
2227      We don't want to blindly cancel the requests.  We can instead do better
2228      by trimming off the end of the jump thread path.  */
2229   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2230     {
2231       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2232       FOR_EACH_EDGE (e, ei, bb->preds)
2233         {
2234           if (e->aux)
2235             {
2236               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2237
2238               for (unsigned int i = 0, crossed_headers = 0;
2239                    i < path->length ();
2240                    i++)
2241                 {
2242                   basic_block dest = (*path)[i]->e->dest;
2243                   crossed_headers += (dest == dest->loop_father->header);
2244                   if (crossed_headers > 1)
2245                     {
2246                       /* Trim from entry I onwards.  */
2247                       for (unsigned int j = i; j < path->length (); j++)
2248                         delete (*path)[j];
2249                       path->truncate (i);
2250
2251                       /* Now that we've truncated the path, make sure
2252                          what's left is still valid.   We need at least
2253                          two edges on the path and the last edge can not
2254                          be a joiner.  This should never happen, but let's
2255                          be safe.  */
2256                       if (path->length () < 2
2257                           || (path->last ()->type
2258                               == EDGE_COPY_SRC_JOINER_BLOCK))
2259                         {
2260                           delete_jump_thread_path (path);
2261                           e->aux = NULL;
2262                         }
2263                       break;
2264                     }
2265                 }
2266             }
2267         }
2268     }
2269
2270   /* If we have a joiner block (J) which has two successors S1 and S2 and
2271      we are threading though S1 and the final destination of the thread
2272      is S2, then we must verify that any PHI nodes in S2 have the same
2273      PHI arguments for the edge J->S2 and J->S1->...->S2.
2274
2275      We used to detect this prior to registering the jump thread, but
2276      that prohibits propagation of edge equivalences into non-dominated
2277      PHI nodes as the equivalency test might occur before propagation.
2278
2279      This must also occur after we truncate any jump threading paths
2280      as this scenario may only show up after truncation.
2281
2282      This works for now, but will need improvement as part of the FSA
2283      optimization.
2284
2285      Note since we've moved the thread request data to the edges,
2286      we have to iterate on those rather than the threaded_edges vector.  */
2287   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2288     {
2289       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2290       FOR_EACH_EDGE (e, ei, bb->preds)
2291         {
2292           if (e->aux)
2293             {
2294               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2295               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2296
2297               if (have_joiner)
2298                 {
2299                   basic_block joiner = e->dest;
2300                   edge final_edge = path->last ()->e;
2301                   basic_block final_dest = final_edge->dest;
2302                   edge e2 = find_edge (joiner, final_dest);
2303
2304                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2305                     {
2306                       delete_jump_thread_path (path);
2307                       e->aux = NULL;
2308                     }
2309                 }
2310             }
2311         }
2312     }
2313
2314   BITMAP_FREE (tmp);
2315 }
2316
2317
2318 /* Return TRUE if BB ends with a switch statement or a computed goto.
2319    Otherwise return false.  */
2320 static bool
2321 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2322 {
2323   gimple *stmt = last_stmt (bb);
2324   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2325     return true;
2326   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2327       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2328     return true;
2329   return false;
2330 }
2331
2332 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
2333    case of SEME Single Entry Multiple Exits region in which all nodes in the
2334    REGION have exactly one incoming edge.  The only exception is the first block
2335    that may not have been connected to the rest of the cfg yet.  */
2336
2337 DEBUG_FUNCTION void
2338 verify_jump_thread (basic_block *region, unsigned n_region)
2339 {
2340   for (unsigned i = 0; i < n_region; i++)
2341     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2342 }
2343
2344 /* Return true when BB is one of the first N items in BBS.  */
2345
2346 static inline bool
2347 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2348 {
2349   for (int i = 0; i < n; i++)
2350     if (bb == bbs[i])
2351       return true;
2352
2353   return false;
2354 }
2355
2356 /* Duplicates a jump-thread path of N_REGION basic blocks.
2357    The ENTRY edge is redirected to the duplicate of the region.
2358
2359    Remove the last conditional statement in the last basic block in the REGION,
2360    and create a single fallthru edge pointing to the same destination as the
2361    EXIT edge.
2362
2363    The new basic blocks are stored to REGION_COPY in the same order as they had
2364    in REGION, provided that REGION_COPY is not NULL.
2365
2366    Returns false if it is unable to copy the region, true otherwise.  */
2367
2368 static bool
2369 duplicate_thread_path (edge entry, edge exit,
2370                        basic_block *region, unsigned n_region,
2371                        basic_block *region_copy)
2372 {
2373   unsigned i;
2374   bool free_region_copy = false;
2375   struct loop *loop = entry->dest->loop_father;
2376   edge exit_copy;
2377   edge redirected;
2378   int total_freq = 0, entry_freq = 0;
2379   gcov_type total_count = 0, entry_count = 0;
2380
2381   if (!can_copy_bbs_p (region, n_region))
2382     return false;
2383
2384   /* Some sanity checking.  Note that we do not check for all possible
2385      missuses of the functions.  I.e. if you ask to copy something weird,
2386      it will work, but the state of structures probably will not be
2387      correct.  */
2388   for (i = 0; i < n_region; i++)
2389     {
2390       /* We do not handle subloops, i.e. all the blocks must belong to the
2391          same loop.  */
2392       if (region[i]->loop_father != loop)
2393         return false;
2394     }
2395
2396   initialize_original_copy_tables ();
2397
2398   set_loop_copy (loop, loop);
2399
2400   if (!region_copy)
2401     {
2402       region_copy = XNEWVEC (basic_block, n_region);
2403       free_region_copy = true;
2404     }
2405
2406   if (entry->dest->count)
2407     {
2408       total_count = entry->dest->count;
2409       entry_count = entry->count;
2410       /* Fix up corner cases, to avoid division by zero or creation of negative
2411          frequencies.  */
2412       if (entry_count > total_count)
2413         entry_count = total_count;
2414     }
2415   else
2416     {
2417       total_freq = entry->dest->frequency;
2418       entry_freq = EDGE_FREQUENCY (entry);
2419       /* Fix up corner cases, to avoid division by zero or creation of negative
2420          frequencies.  */
2421       if (total_freq == 0)
2422         total_freq = 1;
2423       else if (entry_freq > total_freq)
2424         entry_freq = total_freq;
2425     }
2426
2427   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2428             split_edge_bb_loc (entry), false);
2429
2430   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2431      following code ensures that all the edges exiting the jump-thread path are
2432      redirected back to the original code: these edges are exceptions
2433      invalidating the property that is propagated by executing all the blocks of
2434      the jump-thread path in order.  */
2435
2436   for (i = 0; i < n_region; i++)
2437     {
2438       edge e;
2439       edge_iterator ei;
2440       basic_block bb = region_copy[i];
2441
2442       if (single_succ_p (bb))
2443         {
2444           /* Make sure the successor is the next node in the path.  */
2445           gcc_assert (i + 1 == n_region
2446                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2447           continue;
2448         }
2449
2450       /* Special case the last block on the path: make sure that it does not
2451          jump back on the copied path.  */
2452       if (i + 1 == n_region)
2453         {
2454           FOR_EACH_EDGE (e, ei, bb->succs)
2455             if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2456               {
2457                 basic_block orig = get_bb_original (e->dest);
2458                 if (orig)
2459                   redirect_edge_and_branch_force (e, orig);
2460               }
2461           continue;
2462         }
2463
2464       /* Redirect all other edges jumping to non-adjacent blocks back to the
2465          original code.  */
2466       FOR_EACH_EDGE (e, ei, bb->succs)
2467         if (region_copy[i + 1] != e->dest)
2468           {
2469             basic_block orig = get_bb_original (e->dest);
2470             if (orig)
2471               redirect_edge_and_branch_force (e, orig);
2472           }
2473     }
2474
2475   if (total_count)
2476     {
2477       scale_bbs_frequencies_gcov_type (region, n_region,
2478                                        total_count - entry_count,
2479                                        total_count);
2480       scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2481                                        total_count);
2482     }
2483   else
2484     {
2485       scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2486                                  total_freq);
2487       scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2488     }
2489
2490 #ifdef ENABLE_CHECKING
2491   verify_jump_thread (region_copy, n_region);
2492 #endif
2493
2494   /* Remove the last branch in the jump thread path.  */
2495   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2496
2497   /* And fixup the flags on the single remaining edge.  */
2498   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2499   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2500   fix_e->flags |= EDGE_FALLTHRU;
2501
2502   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2503
2504   if (e) {
2505     rescan_loop_exit (e, true, false);
2506     e->probability = REG_BR_PROB_BASE;
2507     e->count = region_copy[n_region - 1]->count;
2508   }
2509
2510   /* Redirect the entry and add the phi node arguments.  */
2511   if (entry->dest == loop->header)
2512     mark_loop_for_removal (loop);
2513   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2514   gcc_assert (redirected != NULL);
2515   flush_pending_stmts (entry);
2516
2517   /* Add the other PHI node arguments.  */
2518   add_phi_args_after_copy (region_copy, n_region, NULL);
2519
2520   if (free_region_copy)
2521     free (region_copy);
2522
2523   free_original_copy_tables ();
2524   return true;
2525 }
2526
2527 /* Return true when PATH is a valid jump-thread path.  */
2528
2529 static bool
2530 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2531 {
2532   unsigned len = path->length ();
2533
2534   /* Check that the path is connected.  */
2535   for (unsigned int j = 0; j < len - 1; j++)
2536     if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2537       return false;
2538
2539   return true;
2540 }
2541
2542 /* Walk through all blocks and thread incoming edges to the appropriate
2543    outgoing edge for each edge pair recorded in THREADED_EDGES.
2544
2545    It is the caller's responsibility to fix the dominance information
2546    and rewrite duplicated SSA_NAMEs back into SSA form.
2547
2548    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2549    loop headers if it does not simplify the loop.
2550
2551    Returns true if one or more edges were threaded, false otherwise.  */
2552
2553 bool
2554 thread_through_all_blocks (bool may_peel_loop_headers)
2555 {
2556   bool retval = false;
2557   unsigned int i;
2558   bitmap_iterator bi;
2559   bitmap threaded_blocks;
2560   struct loop *loop;
2561
2562   if (!paths.exists ())
2563     return false;
2564
2565   threaded_blocks = BITMAP_ALLOC (NULL);
2566   memset (&thread_stats, 0, sizeof (thread_stats));
2567
2568   /* Jump-thread all FSM threads before other jump-threads.  */
2569   for (i = 0; i < paths.length ();)
2570     {
2571       vec<jump_thread_edge *> *path = paths[i];
2572       edge entry = (*path)[0]->e;
2573
2574       /* Only code-generate FSM jump-threads in this loop.  */
2575       if ((*path)[0]->type != EDGE_FSM_THREAD)
2576         {
2577           i++;
2578           continue;
2579         }
2580
2581       /* Do not jump-thread twice from the same block.  */
2582       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2583           /* Verify that the jump thread path is still valid: a
2584              previous jump-thread may have changed the CFG, and
2585              invalidated the current path.  */
2586           || !valid_jump_thread_path (path))
2587         {
2588           /* Remove invalid FSM jump-thread paths.  */
2589           delete_jump_thread_path (path);
2590           paths.unordered_remove (i);
2591           continue;
2592         }
2593
2594       unsigned len = path->length ();
2595       edge exit = (*path)[len - 1]->e;
2596       basic_block *region = XNEWVEC (basic_block, len - 1);
2597
2598       for (unsigned int j = 0; j < len - 1; j++)
2599         region[j] = (*path)[j]->e->dest;
2600
2601       if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2602         {
2603           /* We do not update dominance info.  */
2604           free_dominance_info (CDI_DOMINATORS);
2605           bitmap_set_bit (threaded_blocks, entry->src->index);
2606           retval = true;
2607         }
2608
2609       delete_jump_thread_path (path);
2610       paths.unordered_remove (i);
2611     }
2612
2613   /* Remove from PATHS all the jump-threads starting with an edge already
2614      jump-threaded.  */
2615   for (i = 0; i < paths.length ();)
2616     {
2617       vec<jump_thread_edge *> *path = paths[i];
2618       edge entry = (*path)[0]->e;
2619
2620       /* Do not jump-thread twice from the same block.  */
2621       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2622         {
2623           delete_jump_thread_path (path);
2624           paths.unordered_remove (i);
2625         }
2626       else
2627         i++;
2628     }
2629
2630   bitmap_clear (threaded_blocks);
2631
2632   mark_threaded_blocks (threaded_blocks);
2633
2634   initialize_original_copy_tables ();
2635
2636   /* First perform the threading requests that do not affect
2637      loop structure.  */
2638   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2639     {
2640       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2641
2642       if (EDGE_COUNT (bb->preds) > 0)
2643         retval |= thread_block (bb, true);
2644     }
2645
2646   /* Then perform the threading through loop headers.  We start with the
2647      innermost loop, so that the changes in cfg we perform won't affect
2648      further threading.  */
2649   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2650     {
2651       if (!loop->header
2652           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2653         continue;
2654
2655       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2656     }
2657
2658   /* Any jump threading paths that are still attached to edges at this
2659      point must be one of two cases.
2660
2661      First, we could have a jump threading path which went from outside
2662      a loop to inside a loop that was ignored because a prior jump thread
2663      across a backedge was realized (which indirectly causes the loop
2664      above to ignore the latter thread).  We can detect these because the
2665      loop structures will be different and we do not currently try to
2666      optimize this case.
2667
2668      Second, we could be threading across a backedge to a point within the
2669      same loop.  This occurrs for the FSA/FSM optimization and we would
2670      like to optimize it.  However, we have to be very careful as this
2671      may completely scramble the loop structures, with the result being
2672      irreducible loops causing us to throw away our loop structure.
2673
2674      As a compromise for the latter case, if the thread path ends in
2675      a block where the last statement is a multiway branch, then go
2676      ahead and thread it, else ignore it.  */
2677   basic_block bb;
2678   edge e;
2679   FOR_EACH_BB_FN (bb, cfun)
2680     {
2681       /* If we do end up threading here, we can remove elements from
2682          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2683       for (edge_iterator ei = ei_start (bb->preds);
2684            (e = ei_safe_edge (ei));)
2685         if (e->aux)
2686           {
2687             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2688
2689             /* Case 1, threading from outside to inside the loop
2690                after we'd already threaded through the header.  */
2691             if ((*path)[0]->e->dest->loop_father
2692                 != path->last ()->e->src->loop_father)
2693               {
2694                 delete_jump_thread_path (path);
2695                 e->aux = NULL;
2696                 ei_next (&ei);
2697               }
2698            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2699               {
2700                 /* The code to thread through loop headers may have
2701                    split a block with jump threads attached to it.
2702
2703                    We can identify this with a disjoint jump threading
2704                    path.  If found, just remove it.  */
2705                 for (unsigned int i = 0; i < path->length () - 1; i++)
2706                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2707                     {
2708                       delete_jump_thread_path (path);
2709                       e->aux = NULL;
2710                       ei_next (&ei);
2711                       break;
2712                     }
2713
2714                 /* Our path is still valid, thread it.  */
2715                 if (e->aux)
2716                   {
2717                     if (thread_block ((*path)[0]->e->dest, false))
2718                       e->aux = NULL;
2719                     else
2720                       {
2721                         delete_jump_thread_path (path);
2722                         e->aux = NULL;
2723                         ei_next (&ei);
2724                       }
2725                   }
2726               }
2727            else
2728               {
2729                 delete_jump_thread_path (path);
2730                 e->aux = NULL;
2731                 ei_next (&ei);
2732               }
2733           }
2734         else
2735           ei_next (&ei);
2736     }
2737
2738   statistics_counter_event (cfun, "Jumps threaded",
2739                             thread_stats.num_threaded_edges);
2740
2741   free_original_copy_tables ();
2742
2743   BITMAP_FREE (threaded_blocks);
2744   threaded_blocks = NULL;
2745   paths.release ();
2746
2747   if (retval)
2748     loops_state_set (LOOPS_NEED_FIXUP);
2749
2750   return retval;
2751 }
2752
2753 /* Delete the jump threading path PATH.  We have to explcitly delete
2754    each entry in the vector, then the container.  */
2755
2756 void
2757 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2758 {
2759   for (unsigned int i = 0; i < path->length (); i++)
2760     delete (*path)[i];
2761   path->release();
2762   delete path;
2763 }
2764
2765 /* Register a jump threading opportunity.  We queue up all the jump
2766    threading opportunities discovered by a pass and update the CFG
2767    and SSA form all at once.
2768
2769    E is the edge we can thread, E2 is the new target edge, i.e., we
2770    are effectively recording that E->dest can be changed to E2->dest
2771    after fixing the SSA graph.  */
2772
2773 void
2774 register_jump_thread (vec<jump_thread_edge *> *path)
2775 {
2776   if (!dbg_cnt (registered_jump_thread))
2777     {
2778       delete_jump_thread_path (path);
2779       return;
2780     }
2781
2782   /* First make sure there are no NULL outgoing edges on the jump threading
2783      path.  That can happen for jumping to a constant address.  */
2784   for (unsigned int i = 0; i < path->length (); i++)
2785     if ((*path)[i]->e == NULL)
2786       {
2787         if (dump_file && (dump_flags & TDF_DETAILS))
2788           {
2789             fprintf (dump_file,
2790                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2791             dump_jump_thread_path (dump_file, *path, false);
2792           }
2793
2794         delete_jump_thread_path (path);
2795         return;
2796       }
2797
2798   if (dump_file && (dump_flags & TDF_DETAILS))
2799     dump_jump_thread_path (dump_file, *path, true);
2800
2801   if (!paths.exists ())
2802     paths.create (5);
2803
2804   paths.safe_push (path);
2805 }