gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "backend.h"
  24 #include "tree.h"
  25 #include "gimple.h"
  26 #include "cfghooks.h"
  27 #include "tree-pass.h"
  28 #include "ssa.h"
  29 #include "fold-const.h"
  30 #include "cfganal.h"
  31 #include "gimple-iterator.h"
  32 #include "tree-ssa.h"
  33 #include "tree-ssa-threadupdate.h"
  34 #include "cfgloop.h"
  35 #include "dbgcnt.h"
  36 #include "tree-cfg.h"
  37 #include "tree-vectorizer.h"
  38
  39 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  40    one or more in-edges to B to instead reach the destination of an
  41    out-edge from B while preserving any side effects in B.
  42
  43    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  44    side effects of executing B.
  45
  46      1. Make a copy of B (including its outgoing edges and statements).  Call
  47         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  48
  49      2. Remove the control statement at the end of B' and all outgoing edges
  50         except B'->C.
  51
  52      3. Add a new argument to each PHI in C with the same value as the existing
  53         argument associated with edge B->C.  Associate the new PHI arguments
  54         with the edge B'->C.
  55
  56      4. For each PHI in B, find or create a PHI in B' with an identical
  57         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  58         value as the PHI in B associated with the edge A->B.  Associate
  59         the new argument in the PHI in B' with the edge A->B.
  60
  61      5. Change the edge A->B to A->B'.
  62
  63         5a. This automatically deletes any PHI arguments associated with the
  64             edge A->B in B.
  65
  66         5b. This automatically associates each new argument added in step 4
  67             with the edge A->B'.
  68
  69      6. Repeat for other incoming edges into B.
  70
  71      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  72
  73    Note that block duplication can be minimized by first collecting the
  74    set of unique destination blocks that the incoming edges should
  75    be threaded to.
  76
  77    We reduce the number of edges and statements we create by not copying all
  78    the outgoing edges and the control statement in step #1.  We instead create
  79    a template block without the outgoing edges and duplicate the template.
  80
  81    Another case this code handles is threading through a "joiner" block.  In
  82    this case, we do not know the destination of the joiner block, but one
  83    of the outgoing edges from the joiner block leads to a threadable path.  This
  84    case largely works as outlined above, except the duplicate of the joiner
  85    block still contains a full set of outgoing edges and its control statement.
  86    We just redirect one of its outgoing edges to our jump threading path.  */
  87
  88
  89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  90    all the incoming edges which thread to the same destination edge at
  91    the same time.  That avoids lots of table lookups to get information
  92    for the destination edge.
  93
  94    To realize that implementation we create a list of incoming edges
  95    which thread to the same outgoing edge.  Thus to implement steps
  96    #5 and #6 we traverse our hash table of outgoing edge information.
  97    For each entry we walk the list of incoming edges which thread to
  98    the current outgoing edge.  */
  99
 100 struct el
 101 {
 102   edge e;
 103   struct el *next;
 104 };
 105
 106 /* Main data structure recording information regarding B's duplicate
 107    blocks.  */
 108
 109 /* We need to efficiently record the unique thread destinations of this
 110    block and specific information associated with those destinations.  We
 111    may have many incoming edges threaded to the same outgoing edge.  This
 112    can be naturally implemented with a hash table.  */
 113
 114 struct redirection_data : free_ptr_hash<redirection_data>
 115 {
 116   /* We support wiring up two block duplicates in a jump threading path.
 117
 118      One is a normal block copy where we remove the control statement
 119      and wire up its single remaining outgoing edge to the thread path.
 120
 121      The other is a joiner block where we leave the control statement
 122      in place, but wire one of the outgoing edges to a thread path.
 123
 124      In theory we could have multiple block duplicates in a jump
 125      threading path, but I haven't tried that.
 126
 127      The duplicate blocks appear in this array in the same order in
 128      which they appear in the jump thread path.  */
 129   basic_block dup_blocks[2];
 130
 131   /* The jump threading path.  */
 132   vec<jump_thread_edge *> *path;
 133
 134   /* A list of incoming edges which we want to thread to the
 135      same path.  */
 136   struct el *incoming_edges;
 137
 138   /* hash_table support.  */
 139   static inline hashval_t hash (const redirection_data *);
 140   static inline int equal (const redirection_data *, const redirection_data *);
 141 };
 142
 143 /* Dump a jump threading path, including annotations about each
 144    edge in the path.  */
 145
 146 static void
 147 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 148                        bool registering)
 149 {
 150   fprintf (dump_file,
 151            "  %s%s jump thread: (%d, %d) incoming edge; ",
 152            (registering ? "Registering" : "Cancelling"),
 153            (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
 154            path[0]->e->src->index, path[0]->e->dest->index);
 155
 156   for (unsigned int i = 1; i < path.length (); i++)
 157     {
 158       /* We can get paths with a NULL edge when the final destination
 159          of a jump thread turns out to be a constant address.  We dump
 160          those paths when debugging, so we have to be prepared for that
 161          possibility here.  */
 162       if (path[i]->e == NULL)
 163         continue;
 164
 165       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 166         fprintf (dump_file, " (%d, %d) joiner; ",
 167                  path[i]->e->src->index, path[i]->e->dest->index);
 168       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 169        fprintf (dump_file, " (%d, %d) normal;",
 170                  path[i]->e->src->index, path[i]->e->dest->index);
 171       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 172        fprintf (dump_file, " (%d, %d) nocopy;",
 173                  path[i]->e->src->index, path[i]->e->dest->index);
 174       if (path[0]->type == EDGE_FSM_THREAD)
 175         fprintf (dump_file, " (%d, %d) ",
 176                  path[i]->e->src->index, path[i]->e->dest->index);
 177     }
 178   fputc ('\n', dump_file);
 179 }
 180
 181 /* Simple hashing function.  For any given incoming edge E, we're going
 182    to be most concerned with the final destination of its jump thread
 183    path.  So hash on the block index of the final edge in the path.  */
 184
 185 inline hashval_t
 186 redirection_data::hash (const redirection_data *p)
 187 {
 188   vec<jump_thread_edge *> *path = p->path;
 189   return path->last ()->e->dest->index;
 190 }
 191
 192 /* Given two hash table entries, return true if they have the same
 193    jump threading path.  */
 194 inline int
 195 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
 196 {
 197   vec<jump_thread_edge *> *path1 = p1->path;
 198   vec<jump_thread_edge *> *path2 = p2->path;
 199
 200   if (path1->length () != path2->length ())
 201     return false;
 202
 203   for (unsigned int i = 1; i < path1->length (); i++)
 204     {
 205       if ((*path1)[i]->type != (*path2)[i]->type
 206           || (*path1)[i]->e != (*path2)[i]->e)
 207         return false;
 208     }
 209
 210   return true;
 211 }
 212
 213 /* Rather than search all the edges in jump thread paths each time
 214    DOM is able to simply if control statement, we build a hash table
 215    with the deleted edges.  We only care about the address of the edge,
 216    not its contents.  */
 217 struct removed_edges : nofree_ptr_hash<edge_def>
 218 {
 219   static hashval_t hash (edge e) { return htab_hash_pointer (e); }
 220   static bool equal (edge e1, edge e2) { return e1 == e2; }
 221 };
 222
 223 static hash_table<removed_edges> *removed_edges;
 224
 225 /* Data structure of information to pass to hash table traversal routines.  */
 226 struct ssa_local_info_t
 227 {
 228   /* The current block we are working on.  */
 229   basic_block bb;
 230
 231   /* We only create a template block for the first duplicated block in a
 232      jump threading path as we may need many duplicates of that block.
 233
 234      The second duplicate block in a path is specific to that path.  Creating
 235      and sharing a template for that block is considerably more difficult.  */
 236   basic_block template_block;
 237
 238   /* Blocks duplicated for the thread.  */
 239   bitmap duplicate_blocks;
 240
 241   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 242   bool jumps_threaded;
 243
 244   /* When we have multiple paths through a joiner which reach different
 245      final destinations, then we may need to correct for potential
 246      profile insanities.  */
 247   bool need_profile_correction;
 248 };
 249
 250 /* Passes which use the jump threading code register jump threading
 251    opportunities as they are discovered.  We keep the registered
 252    jump threading opportunities in this vector as edge pairs
 253    (original_edge, target_edge).  */
 254 static vec<vec<jump_thread_edge *> *> paths;
 255
 256 /* When we start updating the CFG for threading, data necessary for jump
 257    threading is attached to the AUX field for the incoming edge.  Use these
 258    macros to access the underlying structure attached to the AUX field.  */
 259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 260
 261 /* Jump threading statistics.  */
 262
 263 struct thread_stats_d
 264 {
 265   unsigned long num_threaded_edges;
 266 };
 267
 268 struct thread_stats_d thread_stats;
 269
 270
 271 /* Remove the last statement in block BB if it is a control statement
 272    Also remove all outgoing edges except the edge which reaches DEST_BB.
 273    If DEST_BB is NULL, then remove all outgoing edges.  */
 274
 275 void
 276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 277 {
 278   gimple_stmt_iterator gsi;
 279   edge e;
 280   edge_iterator ei;
 281
 282   gsi = gsi_last_bb (bb);
 283
 284   /* If the duplicate ends with a control statement, then remove it.
 285
 286      Note that if we are duplicating the template block rather than the
 287      original basic block, then the duplicate might not have any real
 288      statements in it.  */
 289   if (!gsi_end_p (gsi)
 290       && gsi_stmt (gsi)
 291       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 292           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 293           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 294     gsi_remove (&gsi, true);
 295
 296   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 297     {
 298       if (e->dest != dest_bb)
 299         {
 300           free_dom_edge_info (e);
 301           remove_edge (e);
 302         }
 303       else
 304         {
 305           e->probability = profile_probability::always ();
 306           ei_next (&ei);
 307         }
 308     }
 309
 310   /* If the remaining edge is a loop exit, there must have
 311      a removed edge that was not a loop exit.
 312
 313      In that case BB and possibly other blocks were previously
 314      in the loop, but are now outside the loop.  Thus, we need
 315      to update the loop structures.  */
 316   if (single_succ_p (bb)
 317       && loop_outer (bb->loop_father)
 318       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
 319     loops_state_set (LOOPS_NEED_FIXUP);
 320 }
 321
 322 /* Create a duplicate of BB.  Record the duplicate block in an array
 323    indexed by COUNT stored in RD.  */
 324
 325 static void
 326 create_block_for_threading (basic_block bb,
 327                             struct redirection_data *rd,
 328                             unsigned int count,
 329                             bitmap *duplicate_blocks)
 330 {
 331   edge_iterator ei;
 332   edge e;
 333
 334   /* We can use the generic block duplication code and simply remove
 335      the stuff we do not need.  */
 336   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 337
 338   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 339     e->aux = NULL;
 340
 341   /* Zero out the profile, since the block is unreachable for now.  */
 342   rd->dup_blocks[count]->count = profile_count::uninitialized ();
 343   if (duplicate_blocks)
 344     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 345 }
 346
 347 /* Main data structure to hold information for duplicates of BB.  */
 348
 349 static hash_table<redirection_data> *redirection_data;
 350
 351 /* Given an outgoing edge E lookup and return its entry in our hash table.
 352
 353    If INSERT is true, then we insert the entry into the hash table if
 354    it is not already present.  INCOMING_EDGE is added to the list of incoming
 355    edges associated with E in the hash table.  */
 356
 357 static struct redirection_data *
 358 lookup_redirection_data (edge e, enum insert_option insert)
 359 {
 360   struct redirection_data **slot;
 361   struct redirection_data *elt;
 362   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 363
 364   /* Build a hash table element so we can see if E is already
 365      in the table.  */
 366   elt = XNEW (struct redirection_data);
 367   elt->path = path;
 368   elt->dup_blocks[0] = NULL;
 369   elt->dup_blocks[1] = NULL;
 370   elt->incoming_edges = NULL;
 371
 372   slot = redirection_data->find_slot (elt, insert);
 373
 374   /* This will only happen if INSERT is false and the entry is not
 375      in the hash table.  */
 376   if (slot == NULL)
 377     {
 378       free (elt);
 379       return NULL;
 380     }
 381
 382   /* This will only happen if E was not in the hash table and
 383      INSERT is true.  */
 384   if (*slot == NULL)
 385     {
 386       *slot = elt;
 387       elt->incoming_edges = XNEW (struct el);
 388       elt->incoming_edges->e = e;
 389       elt->incoming_edges->next = NULL;
 390       return elt;
 391     }
 392   /* E was in the hash table.  */
 393   else
 394     {
 395       /* Free ELT as we do not need it anymore, we will extract the
 396          relevant entry from the hash table itself.  */
 397       free (elt);
 398
 399       /* Get the entry stored in the hash table.  */
 400       elt = *slot;
 401
 402       /* If insertion was requested, then we need to add INCOMING_EDGE
 403          to the list of incoming edges associated with E.  */
 404       if (insert)
 405         {
 406           struct el *el = XNEW (struct el);
 407           el->next = elt->incoming_edges;
 408           el->e = e;
 409           elt->incoming_edges = el;
 410         }
 411
 412       return elt;
 413     }
 414 }
 415
 416 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 417    does not have a value associated with it.  */
 418
 419 static void
 420 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 421 {
 422   int src_idx = src_e->dest_idx;
 423   int tgt_idx = tgt_e->dest_idx;
 424
 425   /* Iterate over each PHI in e->dest.  */
 426   for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
 427                            gsi2 = gsi_start_phis (tgt_e->dest);
 428        !gsi_end_p (gsi);
 429        gsi_next (&gsi), gsi_next (&gsi2))
 430     {
 431       gphi *src_phi = gsi.phi ();
 432       gphi *dest_phi = gsi2.phi ();
 433       tree val = gimple_phi_arg_def (src_phi, src_idx);
 434       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 435
 436       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 437       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 438     }
 439 }
 440
 441 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 442    to see if it has constant value in a flow sensitive manner.  Set
 443    LOCUS to location of the constant phi arg and return the value.
 444    Return DEF directly if either PATH or idx is ZERO.  */
 445
 446 static tree
 447 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 448                          basic_block bb, int idx, source_location *locus)
 449 {
 450   tree arg;
 451   gphi *def_phi;
 452   basic_block def_bb;
 453
 454   if (path == NULL || idx == 0)
 455     return def;
 456
 457   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
 458   if (!def_phi)
 459     return def;
 460
 461   def_bb = gimple_bb (def_phi);
 462   /* Don't propagate loop invariants into deeper loops.  */
 463   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 464     return def;
 465
 466   /* Backtrack jump threading path from IDX to see if def has constant
 467      value.  */
 468   for (int j = idx - 1; j >= 0; j--)
 469     {
 470       edge e = (*path)[j]->e;
 471       if (e->dest == def_bb)
 472         {
 473           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 474           if (is_gimple_min_invariant (arg))
 475             {
 476               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 477               return arg;
 478             }
 479           break;
 480         }
 481     }
 482
 483   return def;
 484 }
 485
 486 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 487    Try to backtrack jump threading PATH from node IDX to see if the arg
 488    has constant value, copy constant value instead of argument itself
 489    if yes.  */
 490
 491 static void
 492 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 493                vec<jump_thread_edge *> *path, int idx)
 494 {
 495   gphi_iterator gsi;
 496   int src_indx = src_e->dest_idx;
 497
 498   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 499     {
 500       gphi *phi = gsi.phi ();
 501       tree def = gimple_phi_arg_def (phi, src_indx);
 502       source_location locus = gimple_phi_arg_location (phi, src_indx);
 503
 504       if (TREE_CODE (def) == SSA_NAME
 505           && !virtual_operand_p (gimple_phi_result (phi)))
 506         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 507
 508       add_phi_arg (phi, def, tgt_e, locus);
 509     }
 510 }
 511
 512 /* We have recently made a copy of ORIG_BB, including its outgoing
 513    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 514    ORIG_BB has a new argument associated with edge from NEW_BB to the
 515    successor.  Initialize the PHI argument so that it is equal to the PHI
 516    argument associated with the edge from ORIG_BB to the successor.
 517    PATH and IDX are used to check if the new PHI argument has constant
 518    value in a flow sensitive manner.  */
 519
 520 static void
 521 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 522                          vec<jump_thread_edge *> *path, int idx)
 523 {
 524   edge_iterator ei;
 525   edge e;
 526
 527   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 528     {
 529       edge e2 = find_edge (new_bb, e->dest);
 530       copy_phi_args (e->dest, e, e2, path, idx);
 531     }
 532 }
 533
 534 /* Given a duplicate block and its single destination (both stored
 535    in RD).  Create an edge between the duplicate and its single
 536    destination.
 537
 538    Add an additional argument to any PHI nodes at the single
 539    destination.  IDX is the start node in jump threading path
 540    we start to check to see if the new PHI argument has constant
 541    value along the jump threading path.  */
 542
 543 static void
 544 create_edge_and_update_destination_phis (struct redirection_data *rd,
 545                                          basic_block bb, int idx)
 546 {
 547   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 548
 549   rescan_loop_exit (e, true, false);
 550
 551   /* We used to copy the thread path here.  That was added in 2007
 552      and dutifully updated through the representation changes in 2013.
 553
 554      In 2013 we added code to thread from an interior node through
 555      the backedge to another interior node.  That runs after the code
 556      to thread through loop headers from outside the loop.
 557
 558      The latter may delete edges in the CFG, including those
 559      which appeared in the jump threading path we copied here.  Thus
 560      we'd end up using a dangling pointer.
 561
 562      After reviewing the 2007/2011 code, I can't see how anything
 563      depended on copying the AUX field and clearly copying the jump
 564      threading path is problematical due to embedded edge pointers.
 565      It has been removed.  */
 566   e->aux = NULL;
 567
 568   /* If there are any PHI nodes at the destination of the outgoing edge
 569      from the duplicate block, then we will need to add a new argument
 570      to them.  The argument should have the same value as the argument
 571      associated with the outgoing edge stored in RD.  */
 572   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 573 }
 574
 575 /* Look through PATH beginning at START and return TRUE if there are
 576    any additional blocks that need to be duplicated.  Otherwise,
 577    return FALSE.  */
 578 static bool
 579 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 580                                  unsigned int start)
 581 {
 582   for (unsigned int i = start + 1; i < path->length (); i++)
 583     {
 584       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 585           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 586         return true;
 587     }
 588   return false;
 589 }
 590
 591
 592 /* Compute the amount of profile count coming into the jump threading
 593    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 594    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 595    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 596    identify blocks duplicated for jump threading, which have duplicated
 597    edges that need to be ignored in the analysis.  Return true if path contains
 598    a joiner, false otherwise.
 599
 600    In the non-joiner case, this is straightforward - all the counts
 601    flowing into the jump threading path should flow through the duplicated
 602    block and out of the duplicated path.
 603
 604    In the joiner case, it is very tricky.  Some of the counts flowing into
 605    the original path go offpath at the joiner.  The problem is that while
 606    we know how much total count goes off-path in the original control flow,
 607    we don't know how many of the counts corresponding to just the jump
 608    threading path go offpath at the joiner.
 609
 610    For example, assume we have the following control flow and identified
 611    jump threading paths:
 612
 613                 A     B     C
 614                  \    |    /
 615                Ea \   |Eb / Ec
 616                    \  |  /
 617                     v v v
 618                       J       <-- Joiner
 619                      / \
 620                 Eoff/   \Eon
 621                    /     \
 622                   v       v
 623                 Soff     Son  <--- Normal
 624                          /\
 625                       Ed/  \ Ee
 626                        /    \
 627                       v     v
 628                       D      E
 629
 630             Jump threading paths: A -> J -> Son -> D (path 1)
 631                                   C -> J -> Son -> E (path 2)
 632
 633    Note that the control flow could be more complicated:
 634    - Each jump threading path may have more than one incoming edge.  I.e. A and
 635    Ea could represent multiple incoming blocks/edges that are included in
 636    path 1.
 637    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 638    before or after the "normal" copy block).  These are not duplicated onto
 639    the jump threading path, as they are single-successor.
 640    - Any of the blocks along the path may have other incoming edges that
 641    are not part of any jump threading path, but add profile counts along
 642    the path.
 643
 644    In the above example, after all jump threading is complete, we will
 645    end up with the following control flow:
 646
 647                 A          B           C
 648                 |          |           |
 649               Ea|          |Eb         |Ec
 650                 |          |           |
 651                 v          v           v
 652                Ja          J          Jc
 653                / \        / \Eon'     / \
 654           Eona/   \   ---/---\--------   \Eonc
 655              /     \ /  /     \           \
 656             v       v  v       v          v
 657            Sona     Soff      Son       Sonc
 658              \                 /\         /
 659               \___________    /  \  _____/
 660                           \  /    \/
 661                            vv      v
 662                             D      E
 663
 664    The main issue to notice here is that when we are processing path 1
 665    (A->J->Son->D) we need to figure out the outgoing edge weights to
 666    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 667    sum of the incoming weights to D remain Ed.  The problem with simply
 668    assuming that Ja (and Jc when processing path 2) has the same outgoing
 669    probabilities to its successors as the original block J, is that after
 670    all paths are processed and other edges/counts removed (e.g. none
 671    of Ec will reach D after processing path 2), we may end up with not
 672    enough count flowing along duplicated edge Sona->D.
 673
 674    Therefore, in the case of a joiner, we keep track of all counts
 675    coming in along the current path, as well as from predecessors not
 676    on any jump threading path (Eb in the above example).  While we
 677    first assume that the duplicated Eona for Ja->Sona has the same
 678    probability as the original, we later compensate for other jump
 679    threading paths that may eliminate edges.  We do that by keep track
 680    of all counts coming into the original path that are not in a jump
 681    thread (Eb in the above example, but as noted earlier, there could
 682    be other predecessors incoming to the path at various points, such
 683    as at Son).  Call this cumulative non-path count coming into the path
 684    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 685    least as big as (Ed - Enonpath), but no bigger than the minimum
 686    weight along the jump threading path.  The probabilities of both the
 687    original and duplicated joiner block J and Ja will be adjusted
 688    accordingly after the updates.  */
 689
 690 static bool
 691 compute_path_counts (struct redirection_data *rd,
 692                      ssa_local_info_t *local_info,
 693                      profile_count *path_in_count_ptr,
 694                      profile_count *path_out_count_ptr,
 695                      int *path_in_freq_ptr)
 696 {
 697   edge e = rd->incoming_edges->e;
 698   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 699   edge elast = path->last ()->e;
 700   profile_count nonpath_count = profile_count::zero ();
 701   bool has_joiner = false;
 702   profile_count path_in_count = profile_count::zero ();
 703   int path_in_freq = 0;
 704
 705   /* Start by accumulating incoming edge counts to the path's first bb
 706      into a couple buckets:
 707         path_in_count: total count of incoming edges that flow into the
 708                   current path.
 709         nonpath_count: total count of incoming edges that are not
 710                   flowing along *any* path.  These are the counts
 711                   that will still flow along the original path after
 712                   all path duplication is done by potentially multiple
 713                   calls to this routine.
 714      (any other incoming edge counts are for a different jump threading
 715      path that will be handled by a later call to this routine.)
 716      To make this easier, start by recording all incoming edges that flow into
 717      the current path in a bitmap.  We could add up the path's incoming edge
 718      counts here, but we still need to walk all the first bb's incoming edges
 719      below to add up the counts of the other edges not included in this jump
 720      threading path.  */
 721   struct el *next, *el;
 722   auto_bitmap in_edge_srcs;
 723   for (el = rd->incoming_edges; el; el = next)
 724     {
 725       next = el->next;
 726       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 727     }
 728   edge ein;
 729   edge_iterator ei;
 730   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 731     {
 732       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 733       /* Simply check the incoming edge src against the set captured above.  */
 734       if (ein_path
 735           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 736         {
 737           /* It is necessary but not sufficient that the last path edges
 738              are identical.  There may be different paths that share the
 739              same last path edge in the case where the last edge has a nocopy
 740              source block.  */
 741           gcc_assert (ein_path->last ()->e == elast);
 742           path_in_count += ein->count ();
 743           path_in_freq += EDGE_FREQUENCY (ein);
 744         }
 745       else if (!ein_path)
 746         {
 747           /* Keep track of the incoming edges that are not on any jump-threading
 748              path.  These counts will still flow out of original path after all
 749              jump threading is complete.  */
 750             nonpath_count += ein->count ();
 751         }
 752     }
 753
 754   /* This is needed due to insane incoming frequencies.  */
 755   if (path_in_freq > BB_FREQ_MAX)
 756     path_in_freq = BB_FREQ_MAX;
 757
 758   /* Now compute the fraction of the total count coming into the first
 759      path bb that is from the current threading path.  */
 760   profile_count total_count = e->dest->count;
 761   /* Handle incoming profile insanities.  */
 762   if (total_count < path_in_count)
 763     path_in_count = total_count;
 764   profile_probability onpath_scale = path_in_count.probability_in (total_count);
 765
 766   /* Walk the entire path to do some more computation in order to estimate
 767      how much of the path_in_count will flow out of the duplicated threading
 768      path.  In the non-joiner case this is straightforward (it should be
 769      the same as path_in_count, although we will handle incoming profile
 770      insanities by setting it equal to the minimum count along the path).
 771
 772      In the joiner case, we need to estimate how much of the path_in_count
 773      will stay on the threading path after the joiner's conditional branch.
 774      We don't really know for sure how much of the counts
 775      associated with this path go to each successor of the joiner, but we'll
 776      estimate based on the fraction of the total count coming into the path
 777      bb was from the threading paths (computed above in onpath_scale).
 778      Afterwards, we will need to do some fixup to account for other threading
 779      paths and possible profile insanities.
 780
 781      In order to estimate the joiner case's counts we also need to update
 782      nonpath_count with any additional counts coming into the path.  Other
 783      blocks along the path may have additional predecessors from outside
 784      the path.  */
 785   profile_count path_out_count = path_in_count;
 786   profile_count min_path_count = path_in_count;
 787   for (unsigned int i = 1; i < path->length (); i++)
 788     {
 789       edge epath = (*path)[i]->e;
 790       profile_count cur_count = epath->count ();
 791       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 792         {
 793           has_joiner = true;
 794           cur_count = cur_count.apply_probability (onpath_scale);
 795         }
 796       /* In the joiner case we need to update nonpath_count for any edges
 797          coming into the path that will contribute to the count flowing
 798          into the path successor.  */
 799       if (has_joiner && epath != elast)
 800         {
 801           /* Look for other incoming edges after joiner.  */
 802           FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 803             {
 804               if (ein != epath
 805                   /* Ignore in edges from blocks we have duplicated for a
 806                      threading path, which have duplicated edge counts until
 807                      they are redirected by an invocation of this routine.  */
 808                   && !bitmap_bit_p (local_info->duplicate_blocks,
 809                                     ein->src->index))
 810                 nonpath_count += ein->count ();
 811             }
 812         }
 813       if (cur_count < path_out_count)
 814         path_out_count = cur_count;
 815       if (epath->count () < min_path_count)
 816         min_path_count = epath->count ();
 817     }
 818
 819   /* We computed path_out_count above assuming that this path targeted
 820      the joiner's on-path successor with the same likelihood as it
 821      reached the joiner.  However, other thread paths through the joiner
 822      may take a different path through the normal copy source block
 823      (i.e. they have a different elast), meaning that they do not
 824      contribute any counts to this path's elast.  As a result, it may
 825      turn out that this path must have more count flowing to the on-path
 826      successor of the joiner.  Essentially, all of this path's elast
 827      count must be contributed by this path and any nonpath counts
 828      (since any path through the joiner with a different elast will not
 829      include a copy of this elast in its duplicated path).
 830      So ensure that this path's path_out_count is at least the
 831      difference between elast->count () and nonpath_count.  Otherwise the edge
 832      counts after threading will not be sane.  */
 833   if (local_info->need_profile_correction
 834       && has_joiner && path_out_count < elast->count () - nonpath_count)
 835     {
 836       path_out_count = elast->count () - nonpath_count;
 837       /* But neither can we go above the minimum count along the path
 838          we are duplicating.  This can be an issue due to profile
 839          insanities coming in to this pass.  */
 840       if (path_out_count > min_path_count)
 841         path_out_count = min_path_count;
 842     }
 843
 844   *path_in_count_ptr = path_in_count;
 845   *path_out_count_ptr = path_out_count;
 846   *path_in_freq_ptr = path_in_freq;
 847   return has_joiner;
 848 }
 849
 850
 851 /* Update the counts and frequencies for both an original path
 852    edge EPATH and its duplicate EDUP.  The duplicate source block
 853    will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
 854    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 855 static void
 856 update_profile (edge epath, edge edup, profile_count path_in_count,
 857                 profile_count path_out_count)
 858 {
 859
 860   /* First update the duplicated block's count.  */
 861   if (edup)
 862     {
 863       basic_block dup_block = edup->src;
 864
 865       /* Edup's count is reduced by path_out_count.  We need to redistribute
 866          probabilities to the remaining edges.  */
 867
 868       edge esucc;
 869       edge_iterator ei;
 870       profile_probability edup_prob
 871          = path_out_count.probability_in (path_in_count);
 872
 873       /* Either scale up or down the remaining edges.
 874          probabilities are always in range <0,1> and thus we can't do
 875          both by same loop.  */
 876       if (edup->probability > edup_prob)
 877         {
 878            profile_probability rev_scale
 879              = (profile_probability::always () - edup->probability)
 880                / (profile_probability::always () - edup_prob);
 881            FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 882              if (esucc != edup)
 883                esucc->probability /= rev_scale;
 884         }
 885       else if (edup->probability < edup_prob)
 886         {
 887            profile_probability scale
 888              = (profile_probability::always () - edup_prob)
 889                / (profile_probability::always () - edup->probability);
 890           FOR_EACH_EDGE (esucc, ei, dup_block->succs)
 891             if (esucc != edup)
 892               esucc->probability *= scale;
 893         }
 894       if (edup_prob.initialized_p ())
 895         edup->probability = edup_prob;
 896
 897       gcc_assert (!dup_block->count.initialized_p ());
 898       dup_block->count = path_in_count;
 899     }
 900
 901   if (path_in_count == profile_count::zero ())
 902     return;
 903
 904   profile_count final_count = epath->count () - path_out_count;
 905
 906   /* Now update the original block's count in the
 907      opposite manner - remove the counts/freq that will flow
 908      into the duplicated block.  Handle underflow due to precision/
 909      rounding issues.  */
 910   epath->src->count -= path_in_count;
 911
 912   /* Next update this path edge's original and duplicated counts.  We know
 913      that the duplicated path will have path_out_count flowing
 914      out of it (in the joiner case this is the count along the duplicated path
 915      out of the duplicated joiner).  This count can then be removed from the
 916      original path edge.  */
 917
 918   edge esucc;
 919   edge_iterator ei;
 920   profile_probability epath_prob = final_count.probability_in (epath->src->count);
 921
 922   if (epath->probability > epath_prob)
 923     {
 924        profile_probability rev_scale
 925          = (profile_probability::always () - epath->probability)
 926            / (profile_probability::always () - epath_prob);
 927        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 928          if (esucc != epath)
 929            esucc->probability /= rev_scale;
 930     }
 931   else if (epath->probability < epath_prob)
 932     {
 933        profile_probability scale
 934          = (profile_probability::always () - epath_prob)
 935            / (profile_probability::always () - epath->probability);
 936       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 937         if (esucc != epath)
 938           esucc->probability *= scale;
 939     }
 940   if (epath_prob.initialized_p ())
 941     epath->probability = epath_prob;
 942 }
 943
 944 /* Wire up the outgoing edges from the duplicate blocks and
 945    update any PHIs as needed.  Also update the profile counts
 946    on the original and duplicate blocks and edges.  */
 947 void
 948 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
 949                                ssa_local_info_t *local_info)
 950 {
 951   bool multi_incomings = (rd->incoming_edges->next != NULL);
 952   edge e = rd->incoming_edges->e;
 953   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 954   edge elast = path->last ()->e;
 955   profile_count path_in_count = profile_count::zero ();
 956   profile_count path_out_count = profile_count::zero ();
 957   int path_in_freq = 0;
 958
 959   /* First determine how much profile count to move from original
 960      path to the duplicate path.  This is tricky in the presence of
 961      a joiner (see comments for compute_path_counts), where some portion
 962      of the path's counts will flow off-path from the joiner.  In the
 963      non-joiner case the path_in_count and path_out_count should be the
 964      same.  */
 965   bool has_joiner = compute_path_counts (rd, local_info,
 966                                          &path_in_count, &path_out_count,
 967                                          &path_in_freq);
 968
 969   for (unsigned int count = 0, i = 1; i < path->length (); i++)
 970     {
 971       edge epath = (*path)[i]->e;
 972
 973       /* If we were threading through an joiner block, then we want
 974          to keep its control statement and redirect an outgoing edge.
 975          Else we want to remove the control statement & edges, then create
 976          a new outgoing edge.  In both cases we may need to update PHIs.  */
 977       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 978         {
 979           edge victim;
 980           edge e2;
 981
 982           gcc_assert (has_joiner);
 983
 984           /* This updates the PHIs at the destination of the duplicate
 985              block.  Pass 0 instead of i if we are threading a path which
 986              has multiple incoming edges.  */
 987           update_destination_phis (local_info->bb, rd->dup_blocks[count],
 988                                    path, multi_incomings ? 0 : i);
 989
 990           /* Find the edge from the duplicate block to the block we're
 991              threading through.  That's the edge we want to redirect.  */
 992           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
 993
 994           /* If there are no remaining blocks on the path to duplicate,
 995              then redirect VICTIM to the final destination of the jump
 996              threading path.  */
 997           if (!any_remaining_duplicated_blocks (path, i))
 998             {
 999               e2 = redirect_edge_and_branch (victim, elast->dest);
1000               /* If we redirected the edge, then we need to copy PHI arguments
1001                  at the target.  If the edge already existed (e2 != victim
1002                  case), then the PHIs in the target already have the correct
1003                  arguments.  */
1004               if (e2 == victim)
1005                 copy_phi_args (e2->dest, elast, e2,
1006                                path, multi_incomings ? 0 : i);
1007             }
1008           else
1009             {
1010               /* Redirect VICTIM to the next duplicated block in the path.  */
1011               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1012
1013               /* We need to update the PHIs in the next duplicated block.  We
1014                  want the new PHI args to have the same value as they had
1015                  in the source of the next duplicate block.
1016
1017                  Thus, we need to know which edge we traversed into the
1018                  source of the duplicate.  Furthermore, we may have
1019                  traversed many edges to reach the source of the duplicate.
1020
1021                  Walk through the path starting at element I until we
1022                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1023                  the edge from the prior element.  */
1024               for (unsigned int j = i + 1; j < path->length (); j++)
1025                 {
1026                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1027                     {
1028                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1029                       break;
1030                     }
1031                 }
1032             }
1033
1034           /* Update the counts of both the original block
1035              and path edge, and the duplicates.  The path duplicate's
1036              incoming count are the totals for all edges
1037              incoming to this jump threading path computed earlier.
1038              And we know that the duplicated path will have path_out_count
1039              flowing out of it (i.e. along the duplicated path out of the
1040              duplicated joiner).  */
1041           update_profile (epath, e2, path_in_count, path_out_count);
1042         }
1043       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1044         {
1045           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1046           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1047                                                    multi_incomings ? 0 : i);
1048           if (count == 1)
1049             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1050
1051           /* Update the counts of both the original block
1052              and path edge, and the duplicates.  Since we are now after
1053              any joiner that may have existed on the path, the count
1054              flowing along the duplicated threaded path is path_out_count.
1055              If we didn't have a joiner, then cur_path_freq was the sum
1056              of the total frequencies along all incoming edges to the
1057              thread path (path_in_freq).  If we had a joiner, it would have
1058              been updated at the end of that handling to the edge frequency
1059              along the duplicated joiner path edge.  */
1060           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1061                           path_out_count, path_out_count);
1062         }
1063       else
1064         {
1065           /* No copy case.  In this case we don't have an equivalent block
1066              on the duplicated thread path to update, but we do need
1067              to remove the portion of the counts/freqs that were moved
1068              to the duplicated path from the counts/freqs flowing through
1069              this block on the original path.  Since all the no-copy edges
1070              are after any joiner, the removed count is the same as
1071              path_out_count.
1072
1073              If we didn't have a joiner, then cur_path_freq was the sum
1074              of the total frequencies along all incoming edges to the
1075              thread path (path_in_freq).  If we had a joiner, it would have
1076              been updated at the end of that handling to the edge frequency
1077              along the duplicated joiner path edge.  */
1078            update_profile (epath, NULL, path_out_count, path_out_count);
1079         }
1080
1081       /* Increment the index into the duplicated path when we processed
1082          a duplicated block.  */
1083       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1084           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1085         {
1086           count++;
1087         }
1088     }
1089 }
1090
1091 /* Hash table traversal callback routine to create duplicate blocks.  */
1092
1093 int
1094 ssa_create_duplicates (struct redirection_data **slot,
1095                        ssa_local_info_t *local_info)
1096 {
1097   struct redirection_data *rd = *slot;
1098
1099   /* The second duplicated block in a jump threading path is specific
1100      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1101
1102      Each time we're called, we have to look through the path and see
1103      if a second block needs to be duplicated.
1104
1105      Note the search starts with the third edge on the path.  The first
1106      edge is the incoming edge, the second edge always has its source
1107      duplicated.  Thus we start our search with the third edge.  */
1108   vec<jump_thread_edge *> *path = rd->path;
1109   for (unsigned int i = 2; i < path->length (); i++)
1110     {
1111       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1112           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1113         {
1114           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1115                                       &local_info->duplicate_blocks);
1116           break;
1117         }
1118     }
1119
1120   /* Create a template block if we have not done so already.  Otherwise
1121      use the template to create a new block.  */
1122   if (local_info->template_block == NULL)
1123     {
1124       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1125                                   &local_info->duplicate_blocks);
1126       local_info->template_block = rd->dup_blocks[0];
1127
1128       /* We do not create any outgoing edges for the template.  We will
1129          take care of that in a later traversal.  That way we do not
1130          create edges that are going to just be deleted.  */
1131     }
1132   else
1133     {
1134       create_block_for_threading (local_info->template_block, rd, 0,
1135                                   &local_info->duplicate_blocks);
1136
1137       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1138          block.   */
1139       ssa_fix_duplicate_block_edges (rd, local_info);
1140     }
1141
1142   /* Keep walking the hash table.  */
1143   return 1;
1144 }
1145
1146 /* We did not create any outgoing edges for the template block during
1147    block creation.  This hash table traversal callback creates the
1148    outgoing edge for the template block.  */
1149
1150 inline int
1151 ssa_fixup_template_block (struct redirection_data **slot,
1152                           ssa_local_info_t *local_info)
1153 {
1154   struct redirection_data *rd = *slot;
1155
1156   /* If this is the template block halt the traversal after updating
1157      it appropriately.
1158
1159      If we were threading through an joiner block, then we want
1160      to keep its control statement and redirect an outgoing edge.
1161      Else we want to remove the control statement & edges, then create
1162      a new outgoing edge.  In both cases we may need to update PHIs.  */
1163   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1164     {
1165       ssa_fix_duplicate_block_edges (rd, local_info);
1166       return 0;
1167     }
1168
1169   return 1;
1170 }
1171
1172 /* Hash table traversal callback to redirect each incoming edge
1173    associated with this hash table element to its new destination.  */
1174
1175 int
1176 ssa_redirect_edges (struct redirection_data **slot,
1177                     ssa_local_info_t *local_info)
1178 {
1179   struct redirection_data *rd = *slot;
1180   struct el *next, *el;
1181
1182   /* Walk over all the incoming edges associated with this hash table
1183      entry.  */
1184   for (el = rd->incoming_edges; el; el = next)
1185     {
1186       edge e = el->e;
1187       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1188
1189       /* Go ahead and free this element from the list.  Doing this now
1190          avoids the need for another list walk when we destroy the hash
1191          table.  */
1192       next = el->next;
1193       free (el);
1194
1195       thread_stats.num_threaded_edges++;
1196
1197       if (rd->dup_blocks[0])
1198         {
1199           edge e2;
1200
1201           if (dump_file && (dump_flags & TDF_DETAILS))
1202             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1203                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1204
1205           /* Redirect the incoming edge (possibly to the joiner block) to the
1206              appropriate duplicate block.  */
1207           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1208           gcc_assert (e == e2);
1209           flush_pending_stmts (e2);
1210         }
1211
1212       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1213          to clear it will cause all kinds of unpleasant problems later.  */
1214       delete_jump_thread_path (path);
1215       e->aux = NULL;
1216
1217     }
1218
1219   /* Indicate that we actually threaded one or more jumps.  */
1220   if (rd->incoming_edges)
1221     local_info->jumps_threaded = true;
1222
1223   return 1;
1224 }
1225
1226 /* Return true if this block has no executable statements other than
1227    a simple ctrl flow instruction.  When the number of outgoing edges
1228    is one, this is equivalent to a "forwarder" block.  */
1229
1230 static bool
1231 redirection_block_p (basic_block bb)
1232 {
1233   gimple_stmt_iterator gsi;
1234
1235   /* Advance to the first executable statement.  */
1236   gsi = gsi_start_bb (bb);
1237   while (!gsi_end_p (gsi)
1238          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1239              || is_gimple_debug (gsi_stmt (gsi))
1240              || gimple_nop_p (gsi_stmt (gsi))
1241              || gimple_clobber_p (gsi_stmt (gsi))))
1242     gsi_next (&gsi);
1243
1244   /* Check if this is an empty block.  */
1245   if (gsi_end_p (gsi))
1246     return true;
1247
1248   /* Test that we've reached the terminating control statement.  */
1249   return gsi_stmt (gsi)
1250          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1251              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1252              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1253 }
1254
1255 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1256    is reached via one or more specific incoming edges, we know which
1257    outgoing edge from BB will be traversed.
1258
1259    We want to redirect those incoming edges to the target of the
1260    appropriate outgoing edge.  Doing so avoids a conditional branch
1261    and may expose new optimization opportunities.  Note that we have
1262    to update dominator tree and SSA graph after such changes.
1263
1264    The key to keeping the SSA graph update manageable is to duplicate
1265    the side effects occurring in BB so that those side effects still
1266    occur on the paths which bypass BB after redirecting edges.
1267
1268    We accomplish this by creating duplicates of BB and arranging for
1269    the duplicates to unconditionally pass control to one specific
1270    successor of BB.  We then revector the incoming edges into BB to
1271    the appropriate duplicate of BB.
1272
1273    If NOLOOP_ONLY is true, we only perform the threading as long as it
1274    does not affect the structure of the loops in a nontrivial way.
1275
1276    If JOINERS is true, then thread through joiner blocks as well.  */
1277
1278 static bool
1279 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1280 {
1281   /* E is an incoming edge into BB that we may or may not want to
1282      redirect to a duplicate of BB.  */
1283   edge e, e2;
1284   edge_iterator ei;
1285   ssa_local_info_t local_info;
1286
1287   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1288   local_info.need_profile_correction = false;
1289
1290   /* To avoid scanning a linear array for the element we need we instead
1291      use a hash table.  For normal code there should be no noticeable
1292      difference.  However, if we have a block with a large number of
1293      incoming and outgoing edges such linear searches can get expensive.  */
1294   redirection_data
1295     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1296
1297   /* Record each unique threaded destination into a hash table for
1298      efficient lookups.  */
1299   edge last = NULL;
1300   FOR_EACH_EDGE (e, ei, bb->preds)
1301     {
1302       if (e->aux == NULL)
1303         continue;
1304
1305       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1306
1307       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1308           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1309         continue;
1310
1311       e2 = path->last ()->e;
1312       if (!e2 || noloop_only)
1313         {
1314           /* If NOLOOP_ONLY is true, we only allow threading through the
1315              header of a loop to exit edges.  */
1316
1317           /* One case occurs when there was loop header buried in a jump
1318              threading path that crosses loop boundaries.  We do not try
1319              and thread this elsewhere, so just cancel the jump threading
1320              request by clearing the AUX field now.  */
1321           if (bb->loop_father != e2->src->loop_father
1322               && !loop_exit_edge_p (e2->src->loop_father, e2))
1323             {
1324               /* Since this case is not handled by our special code
1325                  to thread through a loop header, we must explicitly
1326                  cancel the threading request here.  */
1327               delete_jump_thread_path (path);
1328               e->aux = NULL;
1329               continue;
1330             }
1331
1332           /* Another case occurs when trying to thread through our
1333              own loop header, possibly from inside the loop.  We will
1334              thread these later.  */
1335           unsigned int i;
1336           for (i = 1; i < path->length (); i++)
1337             {
1338               if ((*path)[i]->e->src == bb->loop_father->header
1339                   && (!loop_exit_edge_p (bb->loop_father, e2)
1340                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1341                 break;
1342             }
1343
1344           if (i != path->length ())
1345             continue;
1346         }
1347
1348       /* Insert the outgoing edge into the hash table if it is not
1349          already in the hash table.  */
1350       lookup_redirection_data (e, INSERT);
1351
1352       /* When we have thread paths through a common joiner with different
1353          final destinations, then we may need corrections to deal with
1354          profile insanities.  See the big comment before compute_path_counts.  */
1355       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1356         {
1357           if (!last)
1358             last = e2;
1359           else if (e2 != last)
1360             local_info.need_profile_correction = true;
1361         }
1362     }
1363
1364   /* We do not update dominance info.  */
1365   free_dominance_info (CDI_DOMINATORS);
1366
1367   /* We know we only thread through the loop header to loop exits.
1368      Let the basic block duplication hook know we are not creating
1369      a multiple entry loop.  */
1370   if (noloop_only
1371       && bb == bb->loop_father->header)
1372     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1373
1374   /* Now create duplicates of BB.
1375
1376      Note that for a block with a high outgoing degree we can waste
1377      a lot of time and memory creating and destroying useless edges.
1378
1379      So we first duplicate BB and remove the control structure at the
1380      tail of the duplicate as well as all outgoing edges from the
1381      duplicate.  We then use that duplicate block as a template for
1382      the rest of the duplicates.  */
1383   local_info.template_block = NULL;
1384   local_info.bb = bb;
1385   local_info.jumps_threaded = false;
1386   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1387                             (&local_info);
1388
1389   /* The template does not have an outgoing edge.  Create that outgoing
1390      edge and update PHI nodes as the edge's target as necessary.
1391
1392      We do this after creating all the duplicates to avoid creating
1393      unnecessary edges.  */
1394   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1395                             (&local_info);
1396
1397   /* The hash table traversals above created the duplicate blocks (and the
1398      statements within the duplicate blocks).  This loop creates PHI nodes for
1399      the duplicated blocks and redirects the incoming edges into BB to reach
1400      the duplicates of BB.  */
1401   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1402                             (&local_info);
1403
1404   /* Done with this block.  Clear REDIRECTION_DATA.  */
1405   delete redirection_data;
1406   redirection_data = NULL;
1407
1408   if (noloop_only
1409       && bb == bb->loop_father->header)
1410     set_loop_copy (bb->loop_father, NULL);
1411
1412   BITMAP_FREE (local_info.duplicate_blocks);
1413   local_info.duplicate_blocks = NULL;
1414
1415   /* Indicate to our caller whether or not any jumps were threaded.  */
1416   return local_info.jumps_threaded;
1417 }
1418
1419 /* Wrapper for thread_block_1 so that we can first handle jump
1420    thread paths which do not involve copying joiner blocks, then
1421    handle jump thread paths which have joiner blocks.
1422
1423    By doing things this way we can be as aggressive as possible and
1424    not worry that copying a joiner block will create a jump threading
1425    opportunity.  */
1426
1427 static bool
1428 thread_block (basic_block bb, bool noloop_only)
1429 {
1430   bool retval;
1431   retval = thread_block_1 (bb, noloop_only, false);
1432   retval |= thread_block_1 (bb, noloop_only, true);
1433   return retval;
1434 }
1435
1436 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1437    from STOP and DBDS_CE_STOP.  */
1438
1439 static basic_block dbds_ce_stop;
1440 static bool
1441 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1442 {
1443   return (bb != (const_basic_block) stop
1444           && bb != dbds_ce_stop);
1445 }
1446
1447 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1448    returns the state.  */
1449
1450 enum bb_dom_status
1451 determine_bb_domination_status (struct loop *loop, basic_block bb)
1452 {
1453   basic_block *bblocks;
1454   unsigned nblocks, i;
1455   bool bb_reachable = false;
1456   edge_iterator ei;
1457   edge e;
1458
1459   /* This function assumes BB is a successor of LOOP->header.
1460      If that is not the case return DOMST_NONDOMINATING which
1461      is always safe.  */
1462     {
1463       bool ok = false;
1464
1465       FOR_EACH_EDGE (e, ei, bb->preds)
1466         {
1467           if (e->src == loop->header)
1468             {
1469               ok = true;
1470               break;
1471             }
1472         }
1473
1474       if (!ok)
1475         return DOMST_NONDOMINATING;
1476     }
1477
1478   if (bb == loop->latch)
1479     return DOMST_DOMINATING;
1480
1481   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1482      from it.  */
1483
1484   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1485   dbds_ce_stop = loop->header;
1486   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1487                                 bblocks, loop->num_nodes, bb);
1488   for (i = 0; i < nblocks; i++)
1489     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1490       {
1491         if (e->src == loop->header)
1492           {
1493             free (bblocks);
1494             return DOMST_NONDOMINATING;
1495           }
1496         if (e->src == bb)
1497           bb_reachable = true;
1498       }
1499
1500   free (bblocks);
1501   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1502 }
1503
1504 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1505    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1506    to the inside of the loop.  */
1507
1508 static bool
1509 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1510 {
1511   basic_block header = loop->header;
1512   edge e, tgt_edge, latch = loop_latch_edge (loop);
1513   edge_iterator ei;
1514   basic_block tgt_bb, atgt_bb;
1515   enum bb_dom_status domst;
1516
1517   /* We have already threaded through headers to exits, so all the threading
1518      requests now are to the inside of the loop.  We need to avoid creating
1519      irreducible regions (i.e., loops with more than one entry block), and
1520      also loop with several latch edges, or new subloops of the loop (although
1521      there are cases where it might be appropriate, it is difficult to decide,
1522      and doing it wrongly may confuse other optimizers).
1523
1524      We could handle more general cases here.  However, the intention is to
1525      preserve some information about the loop, which is impossible if its
1526      structure changes significantly, in a way that is not well understood.
1527      Thus we only handle few important special cases, in which also updating
1528      of the loop-carried information should be feasible:
1529
1530      1) Propagation of latch edge to a block that dominates the latch block
1531         of a loop.  This aims to handle the following idiom:
1532
1533         first = 1;
1534         while (1)
1535           {
1536             if (first)
1537               initialize;
1538             first = 0;
1539             body;
1540           }
1541
1542         After threading the latch edge, this becomes
1543
1544         first = 1;
1545         if (first)
1546           initialize;
1547         while (1)
1548           {
1549             first = 0;
1550             body;
1551           }
1552
1553         The original header of the loop is moved out of it, and we may thread
1554         the remaining edges through it without further constraints.
1555
1556      2) All entry edges are propagated to a single basic block that dominates
1557         the latch block of the loop.  This aims to handle the following idiom
1558         (normally created for "for" loops):
1559
1560         i = 0;
1561         while (1)
1562           {
1563             if (i >= 100)
1564               break;
1565             body;
1566             i++;
1567           }
1568
1569         This becomes
1570
1571         i = 0;
1572         while (1)
1573           {
1574             body;
1575             i++;
1576             if (i >= 100)
1577               break;
1578           }
1579      */
1580
1581   /* Threading through the header won't improve the code if the header has just
1582      one successor.  */
1583   if (single_succ_p (header))
1584     goto fail;
1585
1586   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1587     goto fail;
1588   else
1589     {
1590       tgt_bb = NULL;
1591       tgt_edge = NULL;
1592       FOR_EACH_EDGE (e, ei, header->preds)
1593         {
1594           if (!e->aux)
1595             {
1596               if (e == latch)
1597                 continue;
1598
1599               /* If latch is not threaded, and there is a header
1600                  edge that is not threaded, we would create loop
1601                  with multiple entries.  */
1602               goto fail;
1603             }
1604
1605           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1606
1607           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1608             goto fail;
1609           tgt_edge = (*path)[1]->e;
1610           atgt_bb = tgt_edge->dest;
1611           if (!tgt_bb)
1612             tgt_bb = atgt_bb;
1613           /* Two targets of threading would make us create loop
1614              with multiple entries.  */
1615           else if (tgt_bb != atgt_bb)
1616             goto fail;
1617         }
1618
1619       if (!tgt_bb)
1620         {
1621           /* There are no threading requests.  */
1622           return false;
1623         }
1624
1625       /* Redirecting to empty loop latch is useless.  */
1626       if (tgt_bb == loop->latch
1627           && empty_block_p (loop->latch))
1628         goto fail;
1629     }
1630
1631   /* The target block must dominate the loop latch, otherwise we would be
1632      creating a subloop.  */
1633   domst = determine_bb_domination_status (loop, tgt_bb);
1634   if (domst == DOMST_NONDOMINATING)
1635     goto fail;
1636   if (domst == DOMST_LOOP_BROKEN)
1637     {
1638       /* If the loop ceased to exist, mark it as such, and thread through its
1639          original header.  */
1640       mark_loop_for_removal (loop);
1641       return thread_block (header, false);
1642     }
1643
1644   if (tgt_bb->loop_father->header == tgt_bb)
1645     {
1646       /* If the target of the threading is a header of a subloop, we need
1647          to create a preheader for it, so that the headers of the two loops
1648          do not merge.  */
1649       if (EDGE_COUNT (tgt_bb->preds) > 2)
1650         {
1651           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1652           gcc_assert (tgt_bb != NULL);
1653         }
1654       else
1655         tgt_bb = split_edge (tgt_edge);
1656     }
1657
1658   basic_block new_preheader;
1659
1660   /* Now consider the case entry edges are redirected to the new entry
1661      block.  Remember one entry edge, so that we can find the new
1662      preheader (its destination after threading).  */
1663   FOR_EACH_EDGE (e, ei, header->preds)
1664     {
1665       if (e->aux)
1666         break;
1667     }
1668
1669   /* The duplicate of the header is the new preheader of the loop.  Ensure
1670      that it is placed correctly in the loop hierarchy.  */
1671   set_loop_copy (loop, loop_outer (loop));
1672
1673   thread_block (header, false);
1674   set_loop_copy (loop, NULL);
1675   new_preheader = e->dest;
1676
1677   /* Create the new latch block.  This is always necessary, as the latch
1678      must have only a single successor, but the original header had at
1679      least two successors.  */
1680   loop->latch = NULL;
1681   mfb_kj_edge = single_succ_edge (new_preheader);
1682   loop->header = mfb_kj_edge->dest;
1683   latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1684   loop->header = latch->dest;
1685   loop->latch = latch->src;
1686   return true;
1687
1688 fail:
1689   /* We failed to thread anything.  Cancel the requests.  */
1690   FOR_EACH_EDGE (e, ei, header->preds)
1691     {
1692       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1693
1694       if (path)
1695         {
1696           delete_jump_thread_path (path);
1697           e->aux = NULL;
1698         }
1699     }
1700   return false;
1701 }
1702
1703 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
1704    PHI arguments associated with those edges are equal or there are no
1705    PHI arguments, otherwise return FALSE.  */
1706
1707 static bool
1708 phi_args_equal_on_edges (edge e1, edge e2)
1709 {
1710   gphi_iterator gsi;
1711   int indx1 = e1->dest_idx;
1712   int indx2 = e2->dest_idx;
1713
1714   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1715     {
1716       gphi *phi = gsi.phi ();
1717
1718       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1719                             gimple_phi_arg_def (phi, indx2), 0))
1720         return false;
1721     }
1722   return true;
1723 }
1724
1725 /* Walk through the registered jump threads and convert them into a
1726    form convenient for this pass.
1727
1728    Any block which has incoming edges threaded to outgoing edges
1729    will have its entry in THREADED_BLOCK set.
1730
1731    Any threaded edge will have its new outgoing edge stored in the
1732    original edge's AUX field.
1733
1734    This form avoids the need to walk all the edges in the CFG to
1735    discover blocks which need processing and avoids unnecessary
1736    hash table lookups to map from threaded edge to new target.  */
1737
1738 static void
1739 mark_threaded_blocks (bitmap threaded_blocks)
1740 {
1741   unsigned int i;
1742   bitmap_iterator bi;
1743   auto_bitmap tmp;
1744   basic_block bb;
1745   edge e;
1746   edge_iterator ei;
1747
1748   /* It is possible to have jump threads in which one is a subpath
1749      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
1750      block and (B, C), (C, D) where no joiner block exists.
1751
1752      When this occurs ignore the jump thread request with the joiner
1753      block.  It's totally subsumed by the simpler jump thread request.
1754
1755      This results in less block copying, simpler CFGs.  More importantly,
1756      when we duplicate the joiner block, B, in this case we will create
1757      a new threading opportunity that we wouldn't be able to optimize
1758      until the next jump threading iteration.
1759
1760      So first convert the jump thread requests which do not require a
1761      joiner block.  */
1762   for (i = 0; i < paths.length (); i++)
1763     {
1764       vec<jump_thread_edge *> *path = paths[i];
1765
1766       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1767         {
1768           edge e = (*path)[0]->e;
1769           e->aux = (void *)path;
1770           bitmap_set_bit (tmp, e->dest->index);
1771         }
1772     }
1773
1774   /* Now iterate again, converting cases where we want to thread
1775      through a joiner block, but only if no other edge on the path
1776      already has a jump thread attached to it.  We do this in two passes,
1777      to avoid situations where the order in the paths vec can hide overlapping
1778      threads (the path is recorded on the incoming edge, so we would miss
1779      cases where the second path starts at a downstream edge on the same
1780      path).  First record all joiner paths, deleting any in the unexpected
1781      case where there is already a path for that incoming edge.  */
1782   for (i = 0; i < paths.length ();)
1783     {
1784       vec<jump_thread_edge *> *path = paths[i];
1785
1786       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1787         {
1788           /* Attach the path to the starting edge if none is yet recorded.  */
1789           if ((*path)[0]->e->aux == NULL)
1790             {
1791               (*path)[0]->e->aux = path;
1792               i++;
1793             }
1794           else
1795             {
1796               paths.unordered_remove (i);
1797               if (dump_file && (dump_flags & TDF_DETAILS))
1798                 dump_jump_thread_path (dump_file, *path, false);
1799               delete_jump_thread_path (path);
1800             }
1801         }
1802       else
1803         {
1804           i++;
1805         }
1806     }
1807
1808   /* Second, look for paths that have any other jump thread attached to
1809      them, and either finish converting them or cancel them.  */
1810   for (i = 0; i < paths.length ();)
1811     {
1812       vec<jump_thread_edge *> *path = paths[i];
1813       edge e = (*path)[0]->e;
1814
1815       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
1816         {
1817           unsigned int j;
1818           for (j = 1; j < path->length (); j++)
1819             if ((*path)[j]->e->aux != NULL)
1820               break;
1821
1822           /* If we iterated through the entire path without exiting the loop,
1823              then we are good to go, record it.  */
1824           if (j == path->length ())
1825             {
1826               bitmap_set_bit (tmp, e->dest->index);
1827               i++;
1828             }
1829           else
1830             {
1831               e->aux = NULL;
1832               paths.unordered_remove (i);
1833               if (dump_file && (dump_flags & TDF_DETAILS))
1834                 dump_jump_thread_path (dump_file, *path, false);
1835               delete_jump_thread_path (path);
1836             }
1837         }
1838       else
1839         {
1840           i++;
1841         }
1842     }
1843
1844   /* If optimizing for size, only thread through block if we don't have
1845      to duplicate it or it's an otherwise empty redirection block.  */
1846   if (optimize_function_for_size_p (cfun))
1847     {
1848       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1849         {
1850           bb = BASIC_BLOCK_FOR_FN (cfun, i);
1851           if (EDGE_COUNT (bb->preds) > 1
1852               && !redirection_block_p (bb))
1853             {
1854               FOR_EACH_EDGE (e, ei, bb->preds)
1855                 {
1856                   if (e->aux)
1857                     {
1858                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1859                       delete_jump_thread_path (path);
1860                       e->aux = NULL;
1861                     }
1862                 }
1863             }
1864           else
1865             bitmap_set_bit (threaded_blocks, i);
1866         }
1867     }
1868   else
1869     bitmap_copy (threaded_blocks, tmp);
1870
1871   /* If we have a joiner block (J) which has two successors S1 and S2 and
1872      we are threading though S1 and the final destination of the thread
1873      is S2, then we must verify that any PHI nodes in S2 have the same
1874      PHI arguments for the edge J->S2 and J->S1->...->S2.
1875
1876      We used to detect this prior to registering the jump thread, but
1877      that prohibits propagation of edge equivalences into non-dominated
1878      PHI nodes as the equivalency test might occur before propagation.
1879
1880      This must also occur after we truncate any jump threading paths
1881      as this scenario may only show up after truncation.
1882
1883      This works for now, but will need improvement as part of the FSA
1884      optimization.
1885
1886      Note since we've moved the thread request data to the edges,
1887      we have to iterate on those rather than the threaded_edges vector.  */
1888   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1889     {
1890       bb = BASIC_BLOCK_FOR_FN (cfun, i);
1891       FOR_EACH_EDGE (e, ei, bb->preds)
1892         {
1893           if (e->aux)
1894             {
1895               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1896               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
1897
1898               if (have_joiner)
1899                 {
1900                   basic_block joiner = e->dest;
1901                   edge final_edge = path->last ()->e;
1902                   basic_block final_dest = final_edge->dest;
1903                   edge e2 = find_edge (joiner, final_dest);
1904
1905                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
1906                     {
1907                       delete_jump_thread_path (path);
1908                       e->aux = NULL;
1909                     }
1910                 }
1911             }
1912         }
1913     }
1914
1915   /* Look for jump threading paths which cross multiple loop headers.
1916
1917      The code to thread through loop headers will change the CFG in ways
1918      that invalidate the cached loop iteration information.  So we must
1919      detect that case and wipe the cached information.  */
1920   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1921     {
1922       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
1923       FOR_EACH_EDGE (e, ei, bb->preds)
1924         {
1925           if (e->aux)
1926             {
1927               vec<jump_thread_edge *> *path = THREAD_PATH (e);
1928
1929               for (unsigned int i = 0, crossed_headers = 0;
1930                    i < path->length ();
1931                    i++)
1932                 {
1933                   basic_block dest = (*path)[i]->e->dest;
1934                   basic_block src = (*path)[i]->e->src;
1935                   /* If we enter a loop.  */
1936                   if (flow_loop_nested_p (src->loop_father, dest->loop_father))
1937                     ++crossed_headers;
1938                   /* If we step from a block outside an irreducible region
1939                      to a block inside an irreducible region, then we have
1940                      crossed into a loop.  */
1941                   else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
1942                            && (dest->flags & BB_IRREDUCIBLE_LOOP))
1943                       ++crossed_headers;
1944                   if (crossed_headers > 1)
1945                     {
1946                       vect_free_loop_info_assumptions
1947                         ((*path)[path->length () - 1]->e->dest->loop_father);
1948                       break;
1949                     }
1950                 }
1951             }
1952         }
1953     }
1954 }
1955
1956
1957 /* Verify that the REGION is a valid jump thread.  A jump thread is a special
1958    case of SEME Single Entry Multiple Exits region in which all nodes in the
1959    REGION have exactly one incoming edge.  The only exception is the first block
1960    that may not have been connected to the rest of the cfg yet.  */
1961
1962 DEBUG_FUNCTION void
1963 verify_jump_thread (basic_block *region, unsigned n_region)
1964 {
1965   for (unsigned i = 0; i < n_region; i++)
1966     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
1967 }
1968
1969 /* Return true when BB is one of the first N items in BBS.  */
1970
1971 static inline bool
1972 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
1973 {
1974   for (int i = 0; i < n; i++)
1975     if (bb == bbs[i])
1976       return true;
1977
1978   return false;
1979 }
1980
1981 /* Duplicates a jump-thread path of N_REGION basic blocks.
1982    The ENTRY edge is redirected to the duplicate of the region.
1983
1984    Remove the last conditional statement in the last basic block in the REGION,
1985    and create a single fallthru edge pointing to the same destination as the
1986    EXIT edge.
1987
1988    Returns false if it is unable to copy the region, true otherwise.  */
1989
1990 static bool
1991 duplicate_thread_path (edge entry, edge exit, basic_block *region,
1992                        unsigned n_region)
1993 {
1994   unsigned i;
1995   struct loop *loop = entry->dest->loop_father;
1996   edge exit_copy;
1997   edge redirected;
1998   profile_count curr_count;
1999
2000   if (!can_copy_bbs_p (region, n_region))
2001     return false;
2002
2003   /* Some sanity checking.  Note that we do not check for all possible
2004      missuses of the functions.  I.e. if you ask to copy something weird,
2005      it will work, but the state of structures probably will not be
2006      correct.  */
2007   for (i = 0; i < n_region; i++)
2008     {
2009       /* We do not handle subloops, i.e. all the blocks must belong to the
2010          same loop.  */
2011       if (region[i]->loop_father != loop)
2012         return false;
2013     }
2014
2015   initialize_original_copy_tables ();
2016
2017   set_loop_copy (loop, loop);
2018
2019   basic_block *region_copy = XNEWVEC (basic_block, n_region);
2020   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2021             split_edge_bb_loc (entry), false);
2022
2023   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
2024      following code ensures that all the edges exiting the jump-thread path are
2025      redirected back to the original code: these edges are exceptions
2026      invalidating the property that is propagated by executing all the blocks of
2027      the jump-thread path in order.  */
2028
2029   curr_count = entry->count ();
2030
2031   for (i = 0; i < n_region; i++)
2032     {
2033       edge e;
2034       edge_iterator ei;
2035       basic_block bb = region_copy[i];
2036
2037       /* Watch inconsistent profile.  */
2038       if (curr_count > region[i]->count)
2039         curr_count = region[i]->count;
2040       /* Scale current BB.  */
2041       if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2042         {
2043           /* In the middle of the path we only scale the frequencies.
2044              In last BB we need to update probabilities of outgoing edges
2045              because we know which one is taken at the threaded path.  */
2046           if (i + 1 != n_region)
2047             scale_bbs_frequencies_profile_count (region + i, 1,
2048                                                  region[i]->count - curr_count,
2049                                                  region[i]->count);
2050           else
2051             update_bb_profile_for_threading (region[i],
2052                                              curr_count,
2053                                              exit);
2054           scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2055                                                region_copy[i]->count);
2056         }
2057
2058       if (single_succ_p (bb))
2059         {
2060           /* Make sure the successor is the next node in the path.  */
2061           gcc_assert (i + 1 == n_region
2062                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
2063           if (i + 1 != n_region)
2064             {
2065               curr_count = single_succ_edge (bb)->count ();
2066             }
2067           continue;
2068         }
2069
2070       /* Special case the last block on the path: make sure that it does not
2071          jump back on the copied path, including back to itself.  */
2072       if (i + 1 == n_region)
2073         {
2074           FOR_EACH_EDGE (e, ei, bb->succs)
2075             if (bb_in_bbs (e->dest, region_copy, n_region))
2076               {
2077                 basic_block orig = get_bb_original (e->dest);
2078                 if (orig)
2079                   redirect_edge_and_branch_force (e, orig);
2080               }
2081           continue;
2082         }
2083
2084       /* Redirect all other edges jumping to non-adjacent blocks back to the
2085          original code.  */
2086       FOR_EACH_EDGE (e, ei, bb->succs)
2087         if (region_copy[i + 1] != e->dest)
2088           {
2089             basic_block orig = get_bb_original (e->dest);
2090             if (orig)
2091               redirect_edge_and_branch_force (e, orig);
2092           }
2093         else
2094           {
2095             curr_count = e->count ();
2096           }
2097     }
2098
2099
2100   if (flag_checking)
2101     verify_jump_thread (region_copy, n_region);
2102
2103   /* Remove the last branch in the jump thread path.  */
2104   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2105
2106   /* And fixup the flags on the single remaining edge.  */
2107   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2108   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2109   fix_e->flags |= EDGE_FALLTHRU;
2110
2111   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2112
2113   if (e)
2114     {
2115       rescan_loop_exit (e, true, false);
2116       e->probability = profile_probability::always ();
2117     }
2118
2119   /* Redirect the entry and add the phi node arguments.  */
2120   if (entry->dest == loop->header)
2121     mark_loop_for_removal (loop);
2122   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2123   gcc_assert (redirected != NULL);
2124   flush_pending_stmts (entry);
2125
2126   /* Add the other PHI node arguments.  */
2127   add_phi_args_after_copy (region_copy, n_region, NULL);
2128
2129   free (region_copy);
2130
2131   free_original_copy_tables ();
2132   return true;
2133 }
2134
2135 /* Return true when PATH is a valid jump-thread path.  */
2136
2137 static bool
2138 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2139 {
2140   unsigned len = path->length ();
2141
2142   /* Check that the path is connected.  */
2143   for (unsigned int j = 0; j < len - 1; j++)
2144     {
2145       edge e = (*path)[j]->e;
2146       if (e->dest != (*path)[j+1]->e->src)
2147         return false;
2148     }
2149   return true;
2150 }
2151
2152 /* Remove any queued jump threads that include edge E.
2153
2154    We don't actually remove them here, just record the edges into ax
2155    hash table.  That way we can do the search once per iteration of
2156    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
2157
2158 void
2159 remove_jump_threads_including (edge_def *e)
2160 {
2161   if (!paths.exists ())
2162     return;
2163
2164   if (!removed_edges)
2165     removed_edges = new hash_table<struct removed_edges> (17);
2166
2167   edge *slot = removed_edges->find_slot (e, INSERT);
2168   *slot = e;
2169 }
2170
2171 /* Walk through all blocks and thread incoming edges to the appropriate
2172    outgoing edge for each edge pair recorded in THREADED_EDGES.
2173
2174    It is the caller's responsibility to fix the dominance information
2175    and rewrite duplicated SSA_NAMEs back into SSA form.
2176
2177    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2178    loop headers if it does not simplify the loop.
2179
2180    Returns true if one or more edges were threaded, false otherwise.  */
2181
2182 bool
2183 thread_through_all_blocks (bool may_peel_loop_headers)
2184 {
2185   bool retval = false;
2186   unsigned int i;
2187   bitmap_iterator bi;
2188   struct loop *loop;
2189   auto_bitmap threaded_blocks;
2190
2191   if (!paths.exists ())
2192     {
2193       retval = false;
2194       goto out;
2195     }
2196
2197   memset (&thread_stats, 0, sizeof (thread_stats));
2198
2199   /* Remove any paths that referenced removed edges.  */
2200   if (removed_edges)
2201     for (i = 0; i < paths.length (); )
2202       {
2203         unsigned int j;
2204         vec<jump_thread_edge *> *path = paths[i];
2205
2206         for (j = 0; j < path->length (); j++)
2207           {
2208             edge e = (*path)[j]->e;
2209             if (removed_edges->find_slot (e, NO_INSERT))
2210               break;
2211           }
2212
2213         if (j != path->length ())
2214           {
2215             delete_jump_thread_path (path);
2216             paths.unordered_remove (i);
2217             continue;
2218           }
2219         i++;
2220       }
2221
2222   /* Jump-thread all FSM threads before other jump-threads.  */
2223   for (i = 0; i < paths.length ();)
2224     {
2225       vec<jump_thread_edge *> *path = paths[i];
2226       edge entry = (*path)[0]->e;
2227
2228       /* Only code-generate FSM jump-threads in this loop.  */
2229       if ((*path)[0]->type != EDGE_FSM_THREAD)
2230         {
2231           i++;
2232           continue;
2233         }
2234
2235       /* Do not jump-thread twice from the same block.  */
2236       if (bitmap_bit_p (threaded_blocks, entry->src->index)
2237           /* We may not want to realize this jump thread path
2238              for various reasons.  So check it first.  */
2239           || !valid_jump_thread_path (path))
2240         {
2241           /* Remove invalid FSM jump-thread paths.  */
2242           delete_jump_thread_path (path);
2243           paths.unordered_remove (i);
2244           continue;
2245         }
2246
2247       unsigned len = path->length ();
2248       edge exit = (*path)[len - 1]->e;
2249       basic_block *region = XNEWVEC (basic_block, len - 1);
2250
2251       for (unsigned int j = 0; j < len - 1; j++)
2252         region[j] = (*path)[j]->e->dest;
2253
2254       if (duplicate_thread_path (entry, exit, region, len - 1))
2255         {
2256           /* We do not update dominance info.  */
2257           free_dominance_info (CDI_DOMINATORS);
2258           bitmap_set_bit (threaded_blocks, entry->src->index);
2259           retval = true;
2260           thread_stats.num_threaded_edges++;
2261         }
2262
2263       delete_jump_thread_path (path);
2264       paths.unordered_remove (i);
2265       free (region);
2266     }
2267
2268   /* Remove from PATHS all the jump-threads starting with an edge already
2269      jump-threaded.  */
2270   for (i = 0; i < paths.length ();)
2271     {
2272       vec<jump_thread_edge *> *path = paths[i];
2273       edge entry = (*path)[0]->e;
2274
2275       /* Do not jump-thread twice from the same block.  */
2276       if (bitmap_bit_p (threaded_blocks, entry->src->index))
2277         {
2278           delete_jump_thread_path (path);
2279           paths.unordered_remove (i);
2280         }
2281       else
2282         i++;
2283     }
2284
2285   bitmap_clear (threaded_blocks);
2286
2287   mark_threaded_blocks (threaded_blocks);
2288
2289   initialize_original_copy_tables ();
2290
2291   /* First perform the threading requests that do not affect
2292      loop structure.  */
2293   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2294     {
2295       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2296
2297       if (EDGE_COUNT (bb->preds) > 0)
2298         retval |= thread_block (bb, true);
2299     }
2300
2301   /* Then perform the threading through loop headers.  We start with the
2302      innermost loop, so that the changes in cfg we perform won't affect
2303      further threading.  */
2304   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2305     {
2306       if (!loop->header
2307           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2308         continue;
2309
2310       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2311     }
2312
2313   /* All jump threading paths should have been resolved at this
2314      point.  Verify that is the case.  */
2315   basic_block bb;
2316   FOR_EACH_BB_FN (bb, cfun)
2317     {
2318       edge_iterator ei;
2319       edge e;
2320       FOR_EACH_EDGE (e, ei, bb->preds)
2321         gcc_assert (e->aux == NULL);
2322     }
2323
2324   statistics_counter_event (cfun, "Jumps threaded",
2325                             thread_stats.num_threaded_edges);
2326
2327   free_original_copy_tables ();
2328
2329   paths.release ();
2330
2331   if (retval)
2332     loops_state_set (LOOPS_NEED_FIXUP);
2333
2334  out:
2335   delete removed_edges;
2336   removed_edges = NULL;
2337   return retval;
2338 }
2339
2340 /* Delete the jump threading path PATH.  We have to explicitly delete
2341    each entry in the vector, then the container.  */
2342
2343 void
2344 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2345 {
2346   for (unsigned int i = 0; i < path->length (); i++)
2347     delete (*path)[i];
2348   path->release();
2349   delete path;
2350 }
2351
2352 /* Register a jump threading opportunity.  We queue up all the jump
2353    threading opportunities discovered by a pass and update the CFG
2354    and SSA form all at once.
2355
2356    E is the edge we can thread, E2 is the new target edge, i.e., we
2357    are effectively recording that E->dest can be changed to E2->dest
2358    after fixing the SSA graph.  */
2359
2360 void
2361 register_jump_thread (vec<jump_thread_edge *> *path)
2362 {
2363   if (!dbg_cnt (registered_jump_thread))
2364     {
2365       delete_jump_thread_path (path);
2366       return;
2367     }
2368
2369   /* First make sure there are no NULL outgoing edges on the jump threading
2370      path.  That can happen for jumping to a constant address.  */
2371   for (unsigned int i = 0; i < path->length (); i++)
2372     {
2373       if ((*path)[i]->e == NULL)
2374         {
2375           if (dump_file && (dump_flags & TDF_DETAILS))
2376             {
2377               fprintf (dump_file,
2378                        "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2379               dump_jump_thread_path (dump_file, *path, false);
2380             }
2381
2382           delete_jump_thread_path (path);
2383           return;
2384         }
2385
2386       /* Only the FSM threader is allowed to thread across
2387          backedges in the CFG.  */
2388       if (flag_checking
2389           && (*path)[0]->type != EDGE_FSM_THREAD)
2390         gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0);
2391     }
2392
2393   if (dump_file && (dump_flags & TDF_DETAILS))
2394     dump_jump_thread_path (dump_file, *path, true);
2395
2396   if (!paths.exists ())
2397     paths.create (5);
2398
2399   paths.safe_push (path);
2400 }