gcc/tree-ssa-threadupdate.c

   1 /* Thread edges through blocks and update the control flow and SSA graphs.
   2    Copyright (C) 2004-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 3, or (at your option)
   9 any later version.
  10
  11 GCC is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 #include "config.h"
  21 #include "system.h"
  22 #include "coretypes.h"
  23 #include "tree.h"
  24 #include "flags.h"
  25 #include "basic-block.h"
  26 #include "function.h"
  27 #include "hash-table.h"
  28 #include "tree-ssa-alias.h"
  29 #include "internal-fn.h"
  30 #include "gimple-expr.h"
  31 #include "is-a.h"
  32 #include "gimple.h"
  33 #include "gimple-iterator.h"
  34 #include "gimple-ssa.h"
  35 #include "tree-phinodes.h"
  36 #include "tree-ssa.h"
  37 #include "tree-ssa-threadupdate.h"
  38 #include "ssa-iterators.h"
  39 #include "dumpfile.h"
  40 #include "cfgloop.h"
  41 #include "dbgcnt.h"
  42 #include "tree-cfg.h"
  43 #include "tree-pass.h"
  44
  45 /* Given a block B, update the CFG and SSA graph to reflect redirecting
  46    one or more in-edges to B to instead reach the destination of an
  47    out-edge from B while preserving any side effects in B.
  48
  49    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
  50    side effects of executing B.
  51
  52      1. Make a copy of B (including its outgoing edges and statements).  Call
  53         the copy B'.  Note B' has no incoming edges or PHIs at this time.
  54
  55      2. Remove the control statement at the end of B' and all outgoing edges
  56         except B'->C.
  57
  58      3. Add a new argument to each PHI in C with the same value as the existing
  59         argument associated with edge B->C.  Associate the new PHI arguments
  60         with the edge B'->C.
  61
  62      4. For each PHI in B, find or create a PHI in B' with an identical
  63         PHI_RESULT.  Add an argument to the PHI in B' which has the same
  64         value as the PHI in B associated with the edge A->B.  Associate
  65         the new argument in the PHI in B' with the edge A->B.
  66
  67      5. Change the edge A->B to A->B'.
  68
  69         5a. This automatically deletes any PHI arguments associated with the
  70             edge A->B in B.
  71
  72         5b. This automatically associates each new argument added in step 4
  73             with the edge A->B'.
  74
  75      6. Repeat for other incoming edges into B.
  76
  77      7. Put the duplicated resources in B and all the B' blocks into SSA form.
  78
  79    Note that block duplication can be minimized by first collecting the
  80    set of unique destination blocks that the incoming edges should
  81    be threaded to.
  82
  83    We reduce the number of edges and statements we create by not copying all
  84    the outgoing edges and the control statement in step #1.  We instead create
  85    a template block without the outgoing edges and duplicate the template.
  86
  87    Another case this code handles is threading through a "joiner" block.  In
  88    this case, we do not know the destination of the joiner block, but one
  89    of the outgoing edges from the joiner block leads to a threadable path.  This
  90    case largely works as outlined above, except the duplicate of the joiner
  91    block still contains a full set of outgoing edges and its control statement.
  92    We just redirect one of its outgoing edges to our jump threading path.  */
  93
  94
  95 /* Steps #5 and #6 of the above algorithm are best implemented by walking
  96    all the incoming edges which thread to the same destination edge at
  97    the same time.  That avoids lots of table lookups to get information
  98    for the destination edge.
  99
 100    To realize that implementation we create a list of incoming edges
 101    which thread to the same outgoing edge.  Thus to implement steps
 102    #5 and #6 we traverse our hash table of outgoing edge information.
 103    For each entry we walk the list of incoming edges which thread to
 104    the current outgoing edge.  */
 105
 106 struct el
 107 {
 108   edge e;
 109   struct el *next;
 110 };
 111
 112 /* Main data structure recording information regarding B's duplicate
 113    blocks.  */
 114
 115 /* We need to efficiently record the unique thread destinations of this
 116    block and specific information associated with those destinations.  We
 117    may have many incoming edges threaded to the same outgoing edge.  This
 118    can be naturally implemented with a hash table.  */
 119
 120 struct redirection_data : typed_free_remove<redirection_data>
 121 {
 122   /* We support wiring up two block duplicates in a jump threading path.
 123
 124      One is a normal block copy where we remove the control statement
 125      and wire up its single remaining outgoing edge to the thread path.
 126
 127      The other is a joiner block where we leave the control statement
 128      in place, but wire one of the outgoing edges to a thread path.
 129
 130      In theory we could have multiple block duplicates in a jump
 131      threading path, but I haven't tried that.
 132
 133      The duplicate blocks appear in this array in the same order in
 134      which they appear in the jump thread path.  */
 135   basic_block dup_blocks[2];
 136
 137   /* The jump threading path.  */
 138   vec<jump_thread_edge *> *path;
 139
 140   /* A list of incoming edges which we want to thread to the
 141      same path.  */
 142   struct el *incoming_edges;
 143
 144   /* hash_table support.  */
 145   typedef redirection_data value_type;
 146   typedef redirection_data compare_type;
 147   static inline hashval_t hash (const value_type *);
 148   static inline int equal (const value_type *, const compare_type *);
 149 };
 150
 151 /* Dump a jump threading path, including annotations about each
 152    edge in the path.  */
 153
 154 static void
 155 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
 156                        bool registering)
 157 {
 158   fprintf (dump_file,
 159            "  %s jump thread: (%d, %d) incoming edge; ",
 160            (registering ? "Registering" : "Cancelling"),
 161            path[0]->e->src->index, path[0]->e->dest->index);
 162
 163   for (unsigned int i = 1; i < path.length (); i++)
 164     {
 165       /* We can get paths with a NULL edge when the final destination
 166          of a jump thread turns out to be a constant address.  We dump
 167          those paths when debugging, so we have to be prepared for that
 168          possibility here.  */
 169       if (path[i]->e == NULL)
 170         continue;
 171
 172       if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 173         fprintf (dump_file, " (%d, %d) joiner; ",
 174                  path[i]->e->src->index, path[i]->e->dest->index);
 175       if (path[i]->type == EDGE_COPY_SRC_BLOCK)
 176        fprintf (dump_file, " (%d, %d) normal;",
 177                  path[i]->e->src->index, path[i]->e->dest->index);
 178       if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
 179        fprintf (dump_file, " (%d, %d) nocopy;",
 180                  path[i]->e->src->index, path[i]->e->dest->index);
 181     }
 182   fputc ('\n', dump_file);
 183 }
 184
 185 /* Simple hashing function.  For any given incoming edge E, we're going
 186    to be most concerned with the final destination of its jump thread
 187    path.  So hash on the block index of the final edge in the path.  */
 188
 189 inline hashval_t
 190 redirection_data::hash (const value_type *p)
 191 {
 192   vec<jump_thread_edge *> *path = p->path;
 193   return path->last ()->e->dest->index;
 194 }
 195
 196 /* Given two hash table entries, return true if they have the same
 197    jump threading path.  */
 198 inline int
 199 redirection_data::equal (const value_type *p1, const compare_type *p2)
 200 {
 201   vec<jump_thread_edge *> *path1 = p1->path;
 202   vec<jump_thread_edge *> *path2 = p2->path;
 203
 204   if (path1->length () != path2->length ())
 205     return false;
 206
 207   for (unsigned int i = 1; i < path1->length (); i++)
 208     {
 209       if ((*path1)[i]->type != (*path2)[i]->type
 210           || (*path1)[i]->e != (*path2)[i]->e)
 211         return false;
 212     }
 213
 214   return true;
 215 }
 216
 217 /* Data structure of information to pass to hash table traversal routines.  */
 218 struct ssa_local_info_t
 219 {
 220   /* The current block we are working on.  */
 221   basic_block bb;
 222
 223   /* We only create a template block for the first duplicated block in a
 224      jump threading path as we may need many duplicates of that block.
 225
 226      The second duplicate block in a path is specific to that path.  Creating
 227      and sharing a template for that block is considerably more difficult.  */
 228   basic_block template_block;
 229
 230   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
 231   bool jumps_threaded;
 232
 233   /* Blocks duplicated for the thread.  */
 234   bitmap duplicate_blocks;
 235 };
 236
 237 /* Passes which use the jump threading code register jump threading
 238    opportunities as they are discovered.  We keep the registered
 239    jump threading opportunities in this vector as edge pairs
 240    (original_edge, target_edge).  */
 241 static vec<vec<jump_thread_edge *> *> paths;
 242
 243 /* When we start updating the CFG for threading, data necessary for jump
 244    threading is attached to the AUX field for the incoming edge.  Use these
 245    macros to access the underlying structure attached to the AUX field.  */
 246 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
 247
 248 /* Jump threading statistics.  */
 249
 250 struct thread_stats_d
 251 {
 252   unsigned long num_threaded_edges;
 253 };
 254
 255 struct thread_stats_d thread_stats;
 256
 257
 258 /* Remove the last statement in block BB if it is a control statement
 259    Also remove all outgoing edges except the edge which reaches DEST_BB.
 260    If DEST_BB is NULL, then remove all outgoing edges.  */
 261
 262 static void
 263 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
 264 {
 265   gimple_stmt_iterator gsi;
 266   edge e;
 267   edge_iterator ei;
 268
 269   gsi = gsi_last_bb (bb);
 270
 271   /* If the duplicate ends with a control statement, then remove it.
 272
 273      Note that if we are duplicating the template block rather than the
 274      original basic block, then the duplicate might not have any real
 275      statements in it.  */
 276   if (!gsi_end_p (gsi)
 277       && gsi_stmt (gsi)
 278       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
 279           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
 280           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
 281     gsi_remove (&gsi, true);
 282
 283   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
 284     {
 285       if (e->dest != dest_bb)
 286         remove_edge (e);
 287       else
 288         ei_next (&ei);
 289     }
 290 }
 291
 292 /* Create a duplicate of BB.  Record the duplicate block in an array
 293    indexed by COUNT stored in RD.  */
 294
 295 static void
 296 create_block_for_threading (basic_block bb,
 297                             struct redirection_data *rd,
 298                             unsigned int count,
 299                             bitmap *duplicate_blocks)
 300 {
 301   edge_iterator ei;
 302   edge e;
 303
 304   /* We can use the generic block duplication code and simply remove
 305      the stuff we do not need.  */
 306   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
 307
 308   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
 309     e->aux = NULL;
 310
 311   /* Zero out the profile, since the block is unreachable for now.  */
 312   rd->dup_blocks[count]->frequency = 0;
 313   rd->dup_blocks[count]->count = 0;
 314   if (duplicate_blocks)
 315     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
 316 }
 317
 318 /* Main data structure to hold information for duplicates of BB.  */
 319
 320 static hash_table<redirection_data> *redirection_data;
 321
 322 /* Given an outgoing edge E lookup and return its entry in our hash table.
 323
 324    If INSERT is true, then we insert the entry into the hash table if
 325    it is not already present.  INCOMING_EDGE is added to the list of incoming
 326    edges associated with E in the hash table.  */
 327
 328 static struct redirection_data *
 329 lookup_redirection_data (edge e, enum insert_option insert)
 330 {
 331   struct redirection_data **slot;
 332   struct redirection_data *elt;
 333   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 334
 335  /* Build a hash table element so we can see if E is already
 336      in the table.  */
 337   elt = XNEW (struct redirection_data);
 338   elt->path = path;
 339   elt->dup_blocks[0] = NULL;
 340   elt->dup_blocks[1] = NULL;
 341   elt->incoming_edges = NULL;
 342
 343   slot = redirection_data->find_slot (elt, insert);
 344
 345   /* This will only happen if INSERT is false and the entry is not
 346      in the hash table.  */
 347   if (slot == NULL)
 348     {
 349       free (elt);
 350       return NULL;
 351     }
 352
 353   /* This will only happen if E was not in the hash table and
 354      INSERT is true.  */
 355   if (*slot == NULL)
 356     {
 357       *slot = elt;
 358       elt->incoming_edges = XNEW (struct el);
 359       elt->incoming_edges->e = e;
 360       elt->incoming_edges->next = NULL;
 361       return elt;
 362     }
 363   /* E was in the hash table.  */
 364   else
 365     {
 366       /* Free ELT as we do not need it anymore, we will extract the
 367          relevant entry from the hash table itself.  */
 368       free (elt);
 369
 370       /* Get the entry stored in the hash table.  */
 371       elt = *slot;
 372
 373       /* If insertion was requested, then we need to add INCOMING_EDGE
 374          to the list of incoming edges associated with E.  */
 375       if (insert)
 376         {
 377           struct el *el = XNEW (struct el);
 378           el->next = elt->incoming_edges;
 379           el->e = e;
 380           elt->incoming_edges = el;
 381         }
 382
 383       return elt;
 384     }
 385 }
 386
 387 /* Similar to copy_phi_args, except that the PHI arg exists, it just
 388    does not have a value associated with it.  */
 389
 390 static void
 391 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
 392 {
 393   int src_idx = src_e->dest_idx;
 394   int tgt_idx = tgt_e->dest_idx;
 395
 396   /* Iterate over each PHI in e->dest.  */
 397   for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest),
 398                             gsi2 = gsi_start_phis (tgt_e->dest);
 399        !gsi_end_p (gsi);
 400        gsi_next (&gsi), gsi_next (&gsi2))
 401     {
 402       gimple src_phi = gsi_stmt (gsi);
 403       gimple dest_phi = gsi_stmt (gsi2);
 404       tree val = gimple_phi_arg_def (src_phi, src_idx);
 405       source_location locus = gimple_phi_arg_location (src_phi, src_idx);
 406
 407       SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
 408       gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
 409     }
 410 }
 411
 412 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
 413    to see if it has constant value in a flow sensitive manner.  Set
 414    LOCUS to location of the constant phi arg and return the value.
 415    Return DEF directly if either PATH or idx is ZERO.  */
 416
 417 static tree
 418 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
 419                          basic_block bb, int idx, source_location *locus)
 420 {
 421   tree arg;
 422   gimple def_phi;
 423   basic_block def_bb;
 424
 425   if (path == NULL || idx == 0)
 426     return def;
 427
 428   def_phi = SSA_NAME_DEF_STMT (def);
 429   if (gimple_code (def_phi) != GIMPLE_PHI)
 430     return def;
 431
 432   def_bb = gimple_bb (def_phi);
 433   /* Don't propagate loop invariants into deeper loops.  */
 434   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
 435     return def;
 436
 437   /* Backtrack jump threading path from IDX to see if def has constant
 438      value.  */
 439   for (int j = idx - 1; j >= 0; j--)
 440     {
 441       edge e = (*path)[j]->e;
 442       if (e->dest == def_bb)
 443         {
 444           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
 445           if (is_gimple_min_invariant (arg))
 446             {
 447               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
 448               return arg;
 449             }
 450           break;
 451         }
 452     }
 453
 454   return def;
 455 }
 456
 457 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
 458    Try to backtrack jump threading PATH from node IDX to see if the arg
 459    has constant value, copy constant value instead of argument itself
 460    if yes.  */
 461
 462 static void
 463 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
 464                vec<jump_thread_edge *> *path, int idx)
 465 {
 466   gimple_stmt_iterator gsi;
 467   int src_indx = src_e->dest_idx;
 468
 469   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 470     {
 471       gimple phi = gsi_stmt (gsi);
 472       tree def = gimple_phi_arg_def (phi, src_indx);
 473       source_location locus = gimple_phi_arg_location (phi, src_indx);
 474
 475       if (TREE_CODE (def) == SSA_NAME
 476           && !virtual_operand_p (gimple_phi_result (phi)))
 477         def = get_value_locus_in_path (def, path, bb, idx, &locus);
 478
 479       add_phi_arg (phi, def, tgt_e, locus);
 480     }
 481 }
 482
 483 /* We have recently made a copy of ORIG_BB, including its outgoing
 484    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
 485    ORIG_BB has a new argument associated with edge from NEW_BB to the
 486    successor.  Initialize the PHI argument so that it is equal to the PHI
 487    argument associated with the edge from ORIG_BB to the successor.
 488    PATH and IDX are used to check if the new PHI argument has constant
 489    value in a flow sensitive manner.  */
 490
 491 static void
 492 update_destination_phis (basic_block orig_bb, basic_block new_bb,
 493                          vec<jump_thread_edge *> *path, int idx)
 494 {
 495   edge_iterator ei;
 496   edge e;
 497
 498   FOR_EACH_EDGE (e, ei, orig_bb->succs)
 499     {
 500       edge e2 = find_edge (new_bb, e->dest);
 501       copy_phi_args (e->dest, e, e2, path, idx);
 502     }
 503 }
 504
 505 /* Given a duplicate block and its single destination (both stored
 506    in RD).  Create an edge between the duplicate and its single
 507    destination.
 508
 509    Add an additional argument to any PHI nodes at the single
 510    destination.  IDX is the start node in jump threading path
 511    we start to check to see if the new PHI argument has constant
 512    value along the jump threading path.  */
 513
 514 static void
 515 create_edge_and_update_destination_phis (struct redirection_data *rd,
 516                                          basic_block bb, int idx)
 517 {
 518   edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
 519
 520   rescan_loop_exit (e, true, false);
 521   e->probability = REG_BR_PROB_BASE;
 522   e->count = bb->count;
 523
 524   /* We used to copy the thread path here.  That was added in 2007
 525      and dutifully updated through the representation changes in 2013.
 526
 527      In 2013 we added code to thread from an interior node through
 528      the backedge to another interior node.  That runs after the code
 529      to thread through loop headers from outside the loop.
 530
 531      The latter may delete edges in the CFG, including those
 532      which appeared in the jump threading path we copied here.  Thus
 533      we'd end up using a dangling pointer.
 534
 535      After reviewing the 2007/2011 code, I can't see how anything
 536      depended on copying the AUX field and clearly copying the jump
 537      threading path is problematical due to embedded edge pointers.
 538      It has been removed.  */
 539   e->aux = NULL;
 540
 541   /* If there are any PHI nodes at the destination of the outgoing edge
 542      from the duplicate block, then we will need to add a new argument
 543      to them.  The argument should have the same value as the argument
 544      associated with the outgoing edge stored in RD.  */
 545   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
 546 }
 547
 548 /* Look through PATH beginning at START and return TRUE if there are
 549    any additional blocks that need to be duplicated.  Otherwise,
 550    return FALSE.  */
 551 static bool
 552 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
 553                                  unsigned int start)
 554 {
 555   for (unsigned int i = start + 1; i < path->length (); i++)
 556     {
 557       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
 558           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
 559         return true;
 560     }
 561   return false;
 562 }
 563
 564
 565 /* Compute the amount of profile count/frequency coming into the jump threading
 566    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
 567    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
 568    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
 569    identify blocks duplicated for jump threading, which have duplicated
 570    edges that need to be ignored in the analysis.  Return true if path contains
 571    a joiner, false otherwise.
 572
 573    In the non-joiner case, this is straightforward - all the counts/frequency
 574    flowing into the jump threading path should flow through the duplicated
 575    block and out of the duplicated path.
 576
 577    In the joiner case, it is very tricky.  Some of the counts flowing into
 578    the original path go offpath at the joiner.  The problem is that while
 579    we know how much total count goes off-path in the original control flow,
 580    we don't know how many of the counts corresponding to just the jump
 581    threading path go offpath at the joiner.
 582
 583    For example, assume we have the following control flow and identified
 584    jump threading paths:
 585
 586                 A     B     C
 587                  \    |    /
 588                Ea \   |Eb / Ec
 589                    \  |  /
 590                     v v v
 591                       J       <-- Joiner
 592                      / \
 593                 Eoff/   \Eon
 594                    /     \
 595                   v       v
 596                 Soff     Son  <--- Normal
 597                          /\
 598                       Ed/  \ Ee
 599                        /    \
 600                       v     v
 601                       D      E
 602
 603             Jump threading paths: A -> J -> Son -> D (path 1)
 604                                   C -> J -> Son -> E (path 2)
 605
 606    Note that the control flow could be more complicated:
 607    - Each jump threading path may have more than one incoming edge.  I.e. A and
 608    Ea could represent multiple incoming blocks/edges that are included in
 609    path 1.
 610    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
 611    before or after the "normal" copy block).  These are not duplicated onto
 612    the jump threading path, as they are single-successor.
 613    - Any of the blocks along the path may have other incoming edges that
 614    are not part of any jump threading path, but add profile counts along
 615    the path.
 616
 617    In the aboe example, after all jump threading is complete, we will
 618    end up with the following control flow:
 619
 620                 A          B            C
 621                 |          |            |
 622               Ea|          |Eb          |Ec
 623                 |          |            |
 624                 v          v            v
 625                Ja          J           Jc
 626                / \        / \Eon'     / \
 627           Eona/   \   ---/---\--------   \Eonc
 628              /     \ /  /     \           \
 629             v       v  v       v          v
 630            Sona     Soff      Son        Sonc
 631              \                 /\         /
 632               \___________    /  \  _____/
 633                           \  /    \/
 634                            vv      v
 635                             D      E
 636
 637    The main issue to notice here is that when we are processing path 1
 638    (A->J->Son->D) we need to figure out the outgoing edge weights to
 639    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
 640    sum of the incoming weights to D remain Ed.  The problem with simply
 641    assuming that Ja (and Jc when processing path 2) has the same outgoing
 642    probabilities to its successors as the original block J, is that after
 643    all paths are processed and other edges/counts removed (e.g. none
 644    of Ec will reach D after processing path 2), we may end up with not
 645    enough count flowing along duplicated edge Sona->D.
 646
 647    Therefore, in the case of a joiner, we keep track of all counts
 648    coming in along the current path, as well as from predecessors not
 649    on any jump threading path (Eb in the above example).  While we
 650    first assume that the duplicated Eona for Ja->Sona has the same
 651    probability as the original, we later compensate for other jump
 652    threading paths that may eliminate edges.  We do that by keep track
 653    of all counts coming into the original path that are not in a jump
 654    thread (Eb in the above example, but as noted earlier, there could
 655    be other predecessors incoming to the path at various points, such
 656    as at Son).  Call this cumulative non-path count coming into the path
 657    before D as Enonpath.  We then ensure that the count from Sona->D is as at
 658    least as big as (Ed - Enonpath), but no bigger than the minimum
 659    weight along the jump threading path.  The probabilities of both the
 660    original and duplicated joiner block J and Ja will be adjusted
 661    accordingly after the updates.  */
 662
 663 static bool
 664 compute_path_counts (struct redirection_data *rd,
 665                      ssa_local_info_t *local_info,
 666                      gcov_type *path_in_count_ptr,
 667                      gcov_type *path_out_count_ptr,
 668                      int *path_in_freq_ptr)
 669 {
 670   edge e = rd->incoming_edges->e;
 671   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 672   edge elast = path->last ()->e;
 673   gcov_type nonpath_count = 0;
 674   bool has_joiner = false;
 675   gcov_type path_in_count = 0;
 676   int path_in_freq = 0;
 677
 678   /* Start by accumulating incoming edge counts to the path's first bb
 679      into a couple buckets:
 680         path_in_count: total count of incoming edges that flow into the
 681                   current path.
 682         nonpath_count: total count of incoming edges that are not
 683                   flowing along *any* path.  These are the counts
 684                   that will still flow along the original path after
 685                   all path duplication is done by potentially multiple
 686                   calls to this routine.
 687      (any other incoming edge counts are for a different jump threading
 688      path that will be handled by a later call to this routine.)
 689      To make this easier, start by recording all incoming edges that flow into
 690      the current path in a bitmap.  We could add up the path's incoming edge
 691      counts here, but we still need to walk all the first bb's incoming edges
 692      below to add up the counts of the other edges not included in this jump
 693      threading path.  */
 694   struct el *next, *el;
 695   bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
 696   for (el = rd->incoming_edges; el; el = next)
 697     {
 698       next = el->next;
 699       bitmap_set_bit (in_edge_srcs, el->e->src->index);
 700     }
 701   edge ein;
 702   edge_iterator ei;
 703   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 704     {
 705       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
 706       /* Simply check the incoming edge src against the set captured above.  */
 707       if (ein_path
 708           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
 709         {
 710           /* It is necessary but not sufficient that the last path edges
 711              are identical.  There may be different paths that share the
 712              same last path edge in the case where the last edge has a nocopy
 713              source block.  */
 714           gcc_assert (ein_path->last ()->e == elast);
 715           path_in_count += ein->count;
 716           path_in_freq += EDGE_FREQUENCY (ein);
 717         }
 718       else if (!ein_path)
 719         {
 720           /* Keep track of the incoming edges that are not on any jump-threading
 721              path.  These counts will still flow out of original path after all
 722              jump threading is complete.  */
 723             nonpath_count += ein->count;
 724         }
 725     }
 726   BITMAP_FREE (in_edge_srcs);
 727
 728   /* Now compute the fraction of the total count coming into the first
 729      path bb that is from the current threading path.  */
 730   gcov_type total_count = e->dest->count;
 731   /* Handle incoming profile insanities.  */
 732   if (total_count < path_in_count)
 733     path_in_count = total_count;
 734   int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
 735
 736   /* Walk the entire path to do some more computation in order to estimate
 737      how much of the path_in_count will flow out of the duplicated threading
 738      path.  In the non-joiner case this is straightforward (it should be
 739      the same as path_in_count, although we will handle incoming profile
 740      insanities by setting it equal to the minimum count along the path).
 741
 742      In the joiner case, we need to estimate how much of the path_in_count
 743      will stay on the threading path after the joiner's conditional branch.
 744      We don't really know for sure how much of the counts
 745      associated with this path go to each successor of the joiner, but we'll
 746      estimate based on the fraction of the total count coming into the path
 747      bb was from the threading paths (computed above in onpath_scale).
 748      Afterwards, we will need to do some fixup to account for other threading
 749      paths and possible profile insanities.
 750
 751      In order to estimate the joiner case's counts we also need to update
 752      nonpath_count with any additional counts coming into the path.  Other
 753      blocks along the path may have additional predecessors from outside
 754      the path.  */
 755   gcov_type path_out_count = path_in_count;
 756   gcov_type min_path_count = path_in_count;
 757   for (unsigned int i = 1; i < path->length (); i++)
 758     {
 759       edge epath = (*path)[i]->e;
 760       gcov_type cur_count = epath->count;
 761       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
 762         {
 763           has_joiner = true;
 764           cur_count = apply_probability (cur_count, onpath_scale);
 765         }
 766       /* In the joiner case we need to update nonpath_count for any edges
 767          coming into the path that will contribute to the count flowing
 768          into the path successor.  */
 769       if (has_joiner && epath != elast)
 770       {
 771         /* Look for other incoming edges after joiner.  */
 772         FOR_EACH_EDGE (ein, ei, epath->dest->preds)
 773           {
 774             if (ein != epath
 775                 /* Ignore in edges from blocks we have duplicated for a
 776                    threading path, which have duplicated edge counts until
 777                    they are redirected by an invocation of this routine.  */
 778                 && !bitmap_bit_p (local_info->duplicate_blocks,
 779                                   ein->src->index))
 780               nonpath_count += ein->count;
 781           }
 782       }
 783       if (cur_count < path_out_count)
 784         path_out_count = cur_count;
 785       if (epath->count < min_path_count)
 786         min_path_count = epath->count;
 787     }
 788
 789   /* We computed path_out_count above assuming that this path targeted
 790      the joiner's on-path successor with the same likelihood as it
 791      reached the joiner.  However, other thread paths through the joiner
 792      may take a different path through the normal copy source block
 793      (i.e. they have a different elast), meaning that they do not
 794      contribute any counts to this path's elast.  As a result, it may
 795      turn out that this path must have more count flowing to the on-path
 796      successor of the joiner.  Essentially, all of this path's elast
 797      count must be contributed by this path and any nonpath counts
 798      (since any path through the joiner with a different elast will not
 799      include a copy of this elast in its duplicated path).
 800      So ensure that this path's path_out_count is at least the
 801      difference between elast->count and nonpath_count.  Otherwise the edge
 802      counts after threading will not be sane.  */
 803   if (has_joiner && path_out_count < elast->count - nonpath_count)
 804   {
 805     path_out_count = elast->count - nonpath_count;
 806     /* But neither can we go above the minimum count along the path
 807        we are duplicating.  This can be an issue due to profile
 808        insanities coming in to this pass.  */
 809     if (path_out_count > min_path_count)
 810       path_out_count = min_path_count;
 811   }
 812
 813   *path_in_count_ptr = path_in_count;
 814   *path_out_count_ptr = path_out_count;
 815   *path_in_freq_ptr = path_in_freq;
 816   return has_joiner;
 817 }
 818
 819
 820 /* Update the counts and frequencies for both an original path
 821    edge EPATH and its duplicate EDUP.  The duplicate source block
 822    will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
 823    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
 824 static void
 825 update_profile (edge epath, edge edup, gcov_type path_in_count,
 826                 gcov_type path_out_count, int path_in_freq)
 827 {
 828
 829   /* First update the duplicated block's count / frequency.  */
 830   if (edup)
 831     {
 832       basic_block dup_block = edup->src;
 833       gcc_assert (dup_block->count == 0);
 834       gcc_assert (dup_block->frequency == 0);
 835       dup_block->count = path_in_count;
 836       dup_block->frequency = path_in_freq;
 837     }
 838
 839   /* Now update the original block's count and frequency in the
 840      opposite manner - remove the counts/freq that will flow
 841      into the duplicated block.  Handle underflow due to precision/
 842      rounding issues.  */
 843   epath->src->count -= path_in_count;
 844   if (epath->src->count < 0)
 845     epath->src->count = 0;
 846   epath->src->frequency -= path_in_freq;
 847   if (epath->src->frequency < 0)
 848     epath->src->frequency = 0;
 849
 850   /* Next update this path edge's original and duplicated counts.  We know
 851      that the duplicated path will have path_out_count flowing
 852      out of it (in the joiner case this is the count along the duplicated path
 853      out of the duplicated joiner).  This count can then be removed from the
 854      original path edge.  */
 855   if (edup)
 856     edup->count = path_out_count;
 857   epath->count -= path_out_count;
 858   gcc_assert (epath->count >= 0);
 859 }
 860
 861
 862 /* The duplicate and original joiner blocks may end up with different
 863    probabilities (different from both the original and from each other).
 864    Recompute the probabilities here once we have updated the edge
 865    counts and frequencies.  */
 866
 867 static void
 868 recompute_probabilities (basic_block bb)
 869 {
 870   edge esucc;
 871   edge_iterator ei;
 872   FOR_EACH_EDGE (esucc, ei, bb->succs)
 873     {
 874       if (!bb->count)
 875         continue;
 876
 877       /* Prevent overflow computation due to insane profiles.  */
 878       if (esucc->count < bb->count)
 879         esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
 880                                                  bb->count);
 881       else
 882         /* Can happen with missing/guessed probabilities, since we
 883            may determine that more is flowing along duplicated
 884            path than joiner succ probabilities allowed.
 885            Counts and freqs will be insane after jump threading,
 886            at least make sure probability is sane or we will
 887            get a flow verification error.
 888            Not much we can do to make counts/freqs sane without
 889            redoing the profile estimation.  */
 890         esucc->probability = REG_BR_PROB_BASE;
 891     }
 892 }
 893
 894
 895 /* Update the counts of the original and duplicated edges from a joiner
 896    that go off path, given that we have already determined that the
 897    duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
 898    outgoing count along the path PATH_OUT_COUNT.  The original (on-)path
 899    edge from joiner is EPATH.  */
 900
 901 static void
 902 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
 903                               gcov_type path_in_count,
 904                               gcov_type path_out_count)
 905 {
 906   /* Compute the count that currently flows off path from the joiner.
 907      In other words, the total count of joiner's out edges other than
 908      epath.  Compute this by walking the successors instead of
 909      subtracting epath's count from the joiner bb count, since there
 910      are sometimes slight insanities where the total out edge count is
 911      larger than the bb count (possibly due to rounding/truncation
 912      errors).  */
 913   gcov_type total_orig_off_path_count = 0;
 914   edge enonpath;
 915   edge_iterator ei;
 916   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 917     {
 918       if (enonpath == epath)
 919         continue;
 920       total_orig_off_path_count += enonpath->count;
 921     }
 922
 923   /* For the path that we are duplicating, the amount that will flow
 924      off path from the duplicated joiner is the delta between the
 925      path's cumulative in count and the portion of that count we
 926      estimated above as flowing from the joiner along the duplicated
 927      path.  */
 928   gcov_type total_dup_off_path_count = path_in_count - path_out_count;
 929
 930   /* Now do the actual updates of the off-path edges.  */
 931   FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
 932     {
 933       /* Look for edges going off of the threading path.  */
 934       if (enonpath == epath)
 935         continue;
 936
 937       /* Find the corresponding edge out of the duplicated joiner.  */
 938       edge enonpathdup = find_edge (dup_bb, enonpath->dest);
 939       gcc_assert (enonpathdup);
 940
 941       /* We can't use the original probability of the joiner's out
 942          edges, since the probabilities of the original branch
 943          and the duplicated branches may vary after all threading is
 944          complete.  But apportion the duplicated joiner's off-path
 945          total edge count computed earlier (total_dup_off_path_count)
 946          among the duplicated off-path edges based on their original
 947          ratio to the full off-path count (total_orig_off_path_count).
 948          */
 949       int scale = GCOV_COMPUTE_SCALE (enonpath->count,
 950                                       total_orig_off_path_count);
 951       /* Give the duplicated offpath edge a portion of the duplicated
 952          total.  */
 953       enonpathdup->count = apply_scale (scale,
 954                                         total_dup_off_path_count);
 955       /* Now update the original offpath edge count, handling underflow
 956          due to rounding errors.  */
 957       enonpath->count -= enonpathdup->count;
 958       if (enonpath->count < 0)
 959         enonpath->count = 0;
 960     }
 961 }
 962
 963
 964 /* Check if the paths through RD all have estimated frequencies but zero
 965    profile counts.  This is more accurate than checking the entry block
 966    for a zero profile count, since profile insanities sometimes creep in.  */
 967
 968 static bool
 969 estimated_freqs_path (struct redirection_data *rd)
 970 {
 971   edge e = rd->incoming_edges->e;
 972   vec<jump_thread_edge *> *path = THREAD_PATH (e);
 973   edge ein;
 974   edge_iterator ei;
 975   bool non_zero_freq = false;
 976   FOR_EACH_EDGE (ein, ei, e->dest->preds)
 977     {
 978       if (ein->count)
 979         return false;
 980       non_zero_freq |= ein->src->frequency != 0;
 981     }
 982
 983   for (unsigned int i = 1; i < path->length (); i++)
 984     {
 985       edge epath = (*path)[i]->e;
 986       if (epath->src->count)
 987         return false;
 988       non_zero_freq |= epath->src->frequency != 0;
 989       edge esucc;
 990       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
 991         {
 992           if (esucc->count)
 993             return false;
 994           non_zero_freq |= esucc->src->frequency != 0;
 995         }
 996     }
 997   return non_zero_freq;
 998 }
 999
1000
1001 /* Invoked for routines that have guessed frequencies and no profile
1002    counts to record the block and edge frequencies for paths through RD
1003    in the profile count fields of those blocks and edges.  This is because
1004    ssa_fix_duplicate_block_edges incrementally updates the block and
1005    edge counts as edges are redirected, and it is difficult to do that
1006    for edge frequencies which are computed on the fly from the source
1007    block frequency and probability.  When a block frequency is updated
1008    its outgoing edge frequencies are affected and become difficult to
1009    adjust.  */
1010
1011 static void
1012 freqs_to_counts_path (struct redirection_data *rd)
1013 {
1014   edge e = rd->incoming_edges->e;
1015   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1016   edge ein;
1017   edge_iterator ei;
1018   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1019     {
1020       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1021          errors applying the probability when the frequencies are very
1022          small.  */
1023       ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1024                                       ein->probability);
1025     }
1026
1027   for (unsigned int i = 1; i < path->length (); i++)
1028     {
1029       edge epath = (*path)[i]->e;
1030       edge esucc;
1031       /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1032          errors applying the edge probability when the frequencies are very
1033          small.  */
1034       epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1035       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1036         esucc->count = apply_probability (esucc->src->count,
1037                                           esucc->probability);
1038     }
1039 }
1040
1041
1042 /* For routines that have guessed frequencies and no profile counts, where we
1043    used freqs_to_counts_path to record block and edge frequencies for paths
1044    through RD, we clear the counts after completing all updates for RD.
1045    The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1046    but the block frequencies and edge probabilities were updated as well,
1047    so we can simply clear the count fields.  */
1048
1049 static void
1050 clear_counts_path (struct redirection_data *rd)
1051 {
1052   edge e = rd->incoming_edges->e;
1053   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1054   edge ein, esucc;
1055   edge_iterator ei;
1056   FOR_EACH_EDGE (ein, ei, e->dest->preds)
1057     ein->count = 0;
1058
1059   /* First clear counts along original path.  */
1060   for (unsigned int i = 1; i < path->length (); i++)
1061     {
1062       edge epath = (*path)[i]->e;
1063       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1064         esucc->count = 0;
1065       epath->src->count = 0;
1066     }
1067   /* Also need to clear the counts along duplicated path.  */
1068   for (unsigned int i = 0; i < 2; i++)
1069     {
1070       basic_block dup = rd->dup_blocks[i];
1071       if (!dup)
1072         continue;
1073       FOR_EACH_EDGE (esucc, ei, dup->succs)
1074         esucc->count = 0;
1075       dup->count = 0;
1076     }
1077 }
1078
1079 /* Wire up the outgoing edges from the duplicate blocks and
1080    update any PHIs as needed.  Also update the profile counts
1081    on the original and duplicate blocks and edges.  */
1082 void
1083 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1084                                ssa_local_info_t *local_info)
1085 {
1086   bool multi_incomings = (rd->incoming_edges->next != NULL);
1087   edge e = rd->incoming_edges->e;
1088   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1089   edge elast = path->last ()->e;
1090   gcov_type path_in_count = 0;
1091   gcov_type path_out_count = 0;
1092   int path_in_freq = 0;
1093
1094   /* This routine updates profile counts, frequencies, and probabilities
1095      incrementally. Since it is difficult to do the incremental updates
1096      using frequencies/probabilities alone, for routines without profile
1097      data we first take a snapshot of the existing block and edge frequencies
1098      by copying them into the empty profile count fields.  These counts are
1099      then used to do the incremental updates, and cleared at the end of this
1100      routine.  If the function is marked as having a profile, we still check
1101      to see if the paths through RD are using estimated frequencies because
1102      the routine had zero profile counts.  */
1103   bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1104                              || estimated_freqs_path (rd));
1105   if (do_freqs_to_counts)
1106     freqs_to_counts_path (rd);
1107
1108   /* First determine how much profile count to move from original
1109      path to the duplicate path.  This is tricky in the presence of
1110      a joiner (see comments for compute_path_counts), where some portion
1111      of the path's counts will flow off-path from the joiner.  In the
1112      non-joiner case the path_in_count and path_out_count should be the
1113      same.  */
1114   bool has_joiner = compute_path_counts (rd, local_info,
1115                                          &path_in_count, &path_out_count,
1116                                          &path_in_freq);
1117
1118   int cur_path_freq = path_in_freq;
1119   for (unsigned int count = 0, i = 1; i < path->length (); i++)
1120     {
1121       edge epath = (*path)[i]->e;
1122
1123       /* If we were threading through an joiner block, then we want
1124          to keep its control statement and redirect an outgoing edge.
1125          Else we want to remove the control statement & edges, then create
1126          a new outgoing edge.  In both cases we may need to update PHIs.  */
1127       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1128         {
1129           edge victim;
1130           edge e2;
1131
1132           gcc_assert (has_joiner);
1133
1134           /* This updates the PHIs at the destination of the duplicate
1135              block.  Pass 0 instead of i if we are threading a path which
1136              has multiple incoming edges.  */
1137           update_destination_phis (local_info->bb, rd->dup_blocks[count],
1138                                    path, multi_incomings ? 0 : i);
1139
1140           /* Find the edge from the duplicate block to the block we're
1141              threading through.  That's the edge we want to redirect.  */
1142           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1143
1144           /* If there are no remaining blocks on the path to duplicate,
1145              then redirect VICTIM to the final destination of the jump
1146              threading path.  */
1147           if (!any_remaining_duplicated_blocks (path, i))
1148             {
1149               e2 = redirect_edge_and_branch (victim, elast->dest);
1150               /* If we redirected the edge, then we need to copy PHI arguments
1151                  at the target.  If the edge already existed (e2 != victim
1152                  case), then the PHIs in the target already have the correct
1153                  arguments.  */
1154               if (e2 == victim)
1155                 copy_phi_args (e2->dest, elast, e2,
1156                                path, multi_incomings ? 0 : i);
1157             }
1158           else
1159             {
1160               /* Redirect VICTIM to the next duplicated block in the path.  */
1161               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1162
1163               /* We need to update the PHIs in the next duplicated block.  We
1164                  want the new PHI args to have the same value as they had
1165                  in the source of the next duplicate block.
1166
1167                  Thus, we need to know which edge we traversed into the
1168                  source of the duplicate.  Furthermore, we may have
1169                  traversed many edges to reach the source of the duplicate.
1170
1171                  Walk through the path starting at element I until we
1172                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
1173                  the edge from the prior element.  */
1174               for (unsigned int j = i + 1; j < path->length (); j++)
1175                 {
1176                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1177                     {
1178                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1179                       break;
1180                     }
1181                 }
1182             }
1183
1184           /* Update the counts and frequency of both the original block
1185              and path edge, and the duplicates.  The path duplicate's
1186              incoming count and frequency are the totals for all edges
1187              incoming to this jump threading path computed earlier.
1188              And we know that the duplicated path will have path_out_count
1189              flowing out of it (i.e. along the duplicated path out of the
1190              duplicated joiner).  */
1191           update_profile (epath, e2, path_in_count, path_out_count,
1192                           path_in_freq);
1193
1194           /* Next we need to update the counts of the original and duplicated
1195              edges from the joiner that go off path.  */
1196           update_joiner_offpath_counts (epath, e2->src, path_in_count,
1197                                         path_out_count);
1198
1199           /* Finally, we need to set the probabilities on the duplicated
1200              edges out of the duplicated joiner (e2->src).  The probabilities
1201              along the original path will all be updated below after we finish
1202              processing the whole path.  */
1203           recompute_probabilities (e2->src);
1204
1205           /* Record the frequency flowing to the downstream duplicated
1206              path blocks.  */
1207           cur_path_freq = EDGE_FREQUENCY (e2);
1208         }
1209       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1210         {
1211           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1212           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1213                                                    multi_incomings ? 0 : i);
1214           if (count == 1)
1215             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1216
1217           /* Update the counts and frequency of both the original block
1218              and path edge, and the duplicates.  Since we are now after
1219              any joiner that may have existed on the path, the count
1220              flowing along the duplicated threaded path is path_out_count.
1221              If we didn't have a joiner, then cur_path_freq was the sum
1222              of the total frequencies along all incoming edges to the
1223              thread path (path_in_freq).  If we had a joiner, it would have
1224              been updated at the end of that handling to the edge frequency
1225              along the duplicated joiner path edge.  */
1226           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1227                           path_out_count, path_out_count,
1228                           cur_path_freq);
1229         }
1230       else
1231         {
1232           /* No copy case.  In this case we don't have an equivalent block
1233              on the duplicated thread path to update, but we do need
1234              to remove the portion of the counts/freqs that were moved
1235              to the duplicated path from the counts/freqs flowing through
1236              this block on the original path.  Since all the no-copy edges
1237              are after any joiner, the removed count is the same as
1238              path_out_count.
1239
1240              If we didn't have a joiner, then cur_path_freq was the sum
1241              of the total frequencies along all incoming edges to the
1242              thread path (path_in_freq).  If we had a joiner, it would have
1243              been updated at the end of that handling to the edge frequency
1244              along the duplicated joiner path edge.  */
1245              update_profile (epath, NULL, path_out_count, path_out_count,
1246                              cur_path_freq);
1247         }
1248
1249       /* Increment the index into the duplicated path when we processed
1250          a duplicated block.  */
1251       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1252           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1253       {
1254           count++;
1255       }
1256     }
1257
1258   /* Now walk orig blocks and update their probabilities, since the
1259      counts and freqs should be updated properly by above loop.  */
1260   for (unsigned int i = 1; i < path->length (); i++)
1261     {
1262       edge epath = (*path)[i]->e;
1263       recompute_probabilities (epath->src);
1264     }
1265
1266   /* Done with all profile and frequency updates, clear counts if they
1267      were copied.  */
1268   if (do_freqs_to_counts)
1269     clear_counts_path (rd);
1270 }
1271
1272 /* Hash table traversal callback routine to create duplicate blocks.  */
1273
1274 int
1275 ssa_create_duplicates (struct redirection_data **slot,
1276                        ssa_local_info_t *local_info)
1277 {
1278   struct redirection_data *rd = *slot;
1279
1280   /* The second duplicated block in a jump threading path is specific
1281      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
1282
1283      Each time we're called, we have to look through the path and see
1284      if a second block needs to be duplicated.
1285
1286      Note the search starts with the third edge on the path.  The first
1287      edge is the incoming edge, the second edge always has its source
1288      duplicated.  Thus we start our search with the third edge.  */
1289   vec<jump_thread_edge *> *path = rd->path;
1290   for (unsigned int i = 2; i < path->length (); i++)
1291     {
1292       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1293           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1294         {
1295           create_block_for_threading ((*path)[i]->e->src, rd, 1,
1296                                       &local_info->duplicate_blocks);
1297           break;
1298         }
1299     }
1300
1301   /* Create a template block if we have not done so already.  Otherwise
1302      use the template to create a new block.  */
1303   if (local_info->template_block == NULL)
1304     {
1305       create_block_for_threading ((*path)[1]->e->src, rd, 0,
1306                                   &local_info->duplicate_blocks);
1307       local_info->template_block = rd->dup_blocks[0];
1308
1309       /* We do not create any outgoing edges for the template.  We will
1310          take care of that in a later traversal.  That way we do not
1311          create edges that are going to just be deleted.  */
1312     }
1313   else
1314     {
1315       create_block_for_threading (local_info->template_block, rd, 0,
1316                                   &local_info->duplicate_blocks);
1317
1318       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1319          block.   */
1320       ssa_fix_duplicate_block_edges (rd, local_info);
1321     }
1322
1323   /* Keep walking the hash table.  */
1324   return 1;
1325 }
1326
1327 /* We did not create any outgoing edges for the template block during
1328    block creation.  This hash table traversal callback creates the
1329    outgoing edge for the template block.  */
1330
1331 inline int
1332 ssa_fixup_template_block (struct redirection_data **slot,
1333                           ssa_local_info_t *local_info)
1334 {
1335   struct redirection_data *rd = *slot;
1336
1337   /* If this is the template block halt the traversal after updating
1338      it appropriately.
1339
1340      If we were threading through an joiner block, then we want
1341      to keep its control statement and redirect an outgoing edge.
1342      Else we want to remove the control statement & edges, then create
1343      a new outgoing edge.  In both cases we may need to update PHIs.  */
1344   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1345     {
1346       ssa_fix_duplicate_block_edges (rd, local_info);
1347       return 0;
1348     }
1349
1350   return 1;
1351 }
1352
1353 /* Hash table traversal callback to redirect each incoming edge
1354    associated with this hash table element to its new destination.  */
1355
1356 int
1357 ssa_redirect_edges (struct redirection_data **slot,
1358                     ssa_local_info_t *local_info)
1359 {
1360   struct redirection_data *rd = *slot;
1361   struct el *next, *el;
1362
1363   /* Walk over all the incoming edges associated associated with this
1364      hash table entry.  */
1365   for (el = rd->incoming_edges; el; el = next)
1366     {
1367       edge e = el->e;
1368       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1369
1370       /* Go ahead and free this element from the list.  Doing this now
1371          avoids the need for another list walk when we destroy the hash
1372          table.  */
1373       next = el->next;
1374       free (el);
1375
1376       thread_stats.num_threaded_edges++;
1377
1378       if (rd->dup_blocks[0])
1379         {
1380           edge e2;
1381
1382           if (dump_file && (dump_flags & TDF_DETAILS))
1383             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1384                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1385
1386           /* If we redirect a loop latch edge cancel its loop.  */
1387           if (e->src == e->src->loop_father->latch)
1388             mark_loop_for_removal (e->src->loop_father);
1389
1390           /* Redirect the incoming edge (possibly to the joiner block) to the
1391              appropriate duplicate block.  */
1392           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1393           gcc_assert (e == e2);
1394           flush_pending_stmts (e2);
1395         }
1396
1397       /* Go ahead and clear E->aux.  It's not needed anymore and failure
1398          to clear it will cause all kinds of unpleasant problems later.  */
1399       delete_jump_thread_path (path);
1400       e->aux = NULL;
1401
1402     }
1403
1404   /* Indicate that we actually threaded one or more jumps.  */
1405   if (rd->incoming_edges)
1406     local_info->jumps_threaded = true;
1407
1408   return 1;
1409 }
1410
1411 /* Return true if this block has no executable statements other than
1412    a simple ctrl flow instruction.  When the number of outgoing edges
1413    is one, this is equivalent to a "forwarder" block.  */
1414
1415 static bool
1416 redirection_block_p (basic_block bb)
1417 {
1418   gimple_stmt_iterator gsi;
1419
1420   /* Advance to the first executable statement.  */
1421   gsi = gsi_start_bb (bb);
1422   while (!gsi_end_p (gsi)
1423          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1424              || is_gimple_debug (gsi_stmt (gsi))
1425              || gimple_nop_p (gsi_stmt (gsi))))
1426     gsi_next (&gsi);
1427
1428   /* Check if this is an empty block.  */
1429   if (gsi_end_p (gsi))
1430     return true;
1431
1432   /* Test that we've reached the terminating control statement.  */
1433   return gsi_stmt (gsi)
1434          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1435              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1436              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1437 }
1438
1439 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1440    is reached via one or more specific incoming edges, we know which
1441    outgoing edge from BB will be traversed.
1442
1443    We want to redirect those incoming edges to the target of the
1444    appropriate outgoing edge.  Doing so avoids a conditional branch
1445    and may expose new optimization opportunities.  Note that we have
1446    to update dominator tree and SSA graph after such changes.
1447
1448    The key to keeping the SSA graph update manageable is to duplicate
1449    the side effects occurring in BB so that those side effects still
1450    occur on the paths which bypass BB after redirecting edges.
1451
1452    We accomplish this by creating duplicates of BB and arranging for
1453    the duplicates to unconditionally pass control to one specific
1454    successor of BB.  We then revector the incoming edges into BB to
1455    the appropriate duplicate of BB.
1456
1457    If NOLOOP_ONLY is true, we only perform the threading as long as it
1458    does not affect the structure of the loops in a nontrivial way.
1459
1460    If JOINERS is true, then thread through joiner blocks as well.  */
1461
1462 static bool
1463 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1464 {
1465   /* E is an incoming edge into BB that we may or may not want to
1466      redirect to a duplicate of BB.  */
1467   edge e, e2;
1468   edge_iterator ei;
1469   ssa_local_info_t local_info;
1470
1471   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1472
1473   /* To avoid scanning a linear array for the element we need we instead
1474      use a hash table.  For normal code there should be no noticeable
1475      difference.  However, if we have a block with a large number of
1476      incoming and outgoing edges such linear searches can get expensive.  */
1477   redirection_data
1478     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1479
1480   /* Record each unique threaded destination into a hash table for
1481      efficient lookups.  */
1482   FOR_EACH_EDGE (e, ei, bb->preds)
1483     {
1484       if (e->aux == NULL)
1485         continue;
1486
1487       vec<jump_thread_edge *> *path = THREAD_PATH (e);
1488
1489       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1490           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1491         continue;
1492
1493       e2 = path->last ()->e;
1494       if (!e2 || noloop_only)
1495         {
1496           /* If NOLOOP_ONLY is true, we only allow threading through the
1497              header of a loop to exit edges.  */
1498
1499           /* One case occurs when there was loop header buried in a jump
1500              threading path that crosses loop boundaries.  We do not try
1501              and thread this elsewhere, so just cancel the jump threading
1502              request by clearing the AUX field now.  */
1503           if ((bb->loop_father != e2->src->loop_father
1504                && !loop_exit_edge_p (e2->src->loop_father, e2))
1505               || (e2->src->loop_father != e2->dest->loop_father
1506                   && !loop_exit_edge_p (e2->src->loop_father, e2)))
1507             {
1508               /* Since this case is not handled by our special code
1509                  to thread through a loop header, we must explicitly
1510                  cancel the threading request here.  */
1511               delete_jump_thread_path (path);
1512               e->aux = NULL;
1513               continue;
1514             }
1515
1516           /* Another case occurs when trying to thread through our
1517              own loop header, possibly from inside the loop.  We will
1518              thread these later.  */
1519           unsigned int i;
1520           for (i = 1; i < path->length (); i++)
1521             {
1522               if ((*path)[i]->e->src == bb->loop_father->header
1523                   && (!loop_exit_edge_p (bb->loop_father, e2)
1524                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1525                 break;
1526             }
1527
1528           if (i != path->length ())
1529             continue;
1530         }
1531
1532       /* Insert the outgoing edge into the hash table if it is not
1533          already in the hash table.  */
1534       lookup_redirection_data (e, INSERT);
1535     }
1536
1537   /* We do not update dominance info.  */
1538   free_dominance_info (CDI_DOMINATORS);
1539
1540   /* We know we only thread through the loop header to loop exits.
1541      Let the basic block duplication hook know we are not creating
1542      a multiple entry loop.  */
1543   if (noloop_only
1544       && bb == bb->loop_father->header)
1545     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1546
1547   /* Now create duplicates of BB.
1548
1549      Note that for a block with a high outgoing degree we can waste
1550      a lot of time and memory creating and destroying useless edges.
1551
1552      So we first duplicate BB and remove the control structure at the
1553      tail of the duplicate as well as all outgoing edges from the
1554      duplicate.  We then use that duplicate block as a template for
1555      the rest of the duplicates.  */
1556   local_info.template_block = NULL;
1557   local_info.bb = bb;
1558   local_info.jumps_threaded = false;
1559   redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1560                             (&local_info);
1561
1562   /* The template does not have an outgoing edge.  Create that outgoing
1563      edge and update PHI nodes as the edge's target as necessary.
1564
1565      We do this after creating all the duplicates to avoid creating
1566      unnecessary edges.  */
1567   redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1568                             (&local_info);
1569
1570   /* The hash table traversals above created the duplicate blocks (and the
1571      statements within the duplicate blocks).  This loop creates PHI nodes for
1572      the duplicated blocks and redirects the incoming edges into BB to reach
1573      the duplicates of BB.  */
1574   redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1575                             (&local_info);
1576
1577   /* Done with this block.  Clear REDIRECTION_DATA.  */
1578   delete redirection_data;
1579   redirection_data = NULL;
1580
1581   if (noloop_only
1582       && bb == bb->loop_father->header)
1583     set_loop_copy (bb->loop_father, NULL);
1584
1585   BITMAP_FREE (local_info.duplicate_blocks);
1586   local_info.duplicate_blocks = NULL;
1587
1588   /* Indicate to our caller whether or not any jumps were threaded.  */
1589   return local_info.jumps_threaded;
1590 }
1591
1592 /* Wrapper for thread_block_1 so that we can first handle jump
1593    thread paths which do not involve copying joiner blocks, then
1594    handle jump thread paths which have joiner blocks.
1595
1596    By doing things this way we can be as aggressive as possible and
1597    not worry that copying a joiner block will create a jump threading
1598    opportunity.  */
1599
1600 static bool
1601 thread_block (basic_block bb, bool noloop_only)
1602 {
1603   bool retval;
1604   retval = thread_block_1 (bb, noloop_only, false);
1605   retval |= thread_block_1 (bb, noloop_only, true);
1606   return retval;
1607 }
1608
1609
1610 /* Threads edge E through E->dest to the edge THREAD_TARGET (E).  Returns the
1611    copy of E->dest created during threading, or E->dest if it was not necessary
1612    to copy it (E is its single predecessor).  */
1613
1614 static basic_block
1615 thread_single_edge (edge e)
1616 {
1617   basic_block bb = e->dest;
1618   struct redirection_data rd;
1619   vec<jump_thread_edge *> *path = THREAD_PATH (e);
1620   edge eto = (*path)[1]->e;
1621
1622   for (unsigned int i = 0; i < path->length (); i++)
1623     delete (*path)[i];
1624   delete path;
1625   e->aux = NULL;
1626
1627   thread_stats.num_threaded_edges++;
1628
1629   if (single_pred_p (bb))
1630     {
1631       /* If BB has just a single predecessor, we should only remove the
1632          control statements at its end, and successors except for ETO.  */
1633       remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1634
1635       /* And fixup the flags on the single remaining edge.  */
1636       eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1637       eto->flags |= EDGE_FALLTHRU;
1638
1639       return bb;
1640     }
1641
1642   /* Otherwise, we need to create a copy.  */
1643   if (e->dest == eto->src)
1644     update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1645
1646   vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1647   jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1648   npath->safe_push (x);
1649
1650   x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1651   npath->safe_push (x);
1652   rd.path = npath;
1653
1654   create_block_for_threading (bb, &rd, 0, NULL);
1655   remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1656   create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1657
1658   if (dump_file && (dump_flags & TDF_DETAILS))
1659     fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
1660              e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1661
1662   rd.dup_blocks[0]->count = e->count;
1663   rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1664   single_succ_edge (rd.dup_blocks[0])->count = e->count;
1665   redirect_edge_and_branch (e, rd.dup_blocks[0]);
1666   flush_pending_stmts (e);
1667
1668   return rd.dup_blocks[0];
1669 }
1670
1671 /* Callback for dfs_enumerate_from.  Returns true if BB is different
1672    from STOP and DBDS_CE_STOP.  */
1673
1674 static basic_block dbds_ce_stop;
1675 static bool
1676 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1677 {
1678   return (bb != (const_basic_block) stop
1679           && bb != dbds_ce_stop);
1680 }
1681
1682 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1683    returns the state.  */
1684
1685 enum bb_dom_status
1686 {
1687   /* BB does not dominate latch of the LOOP.  */
1688   DOMST_NONDOMINATING,
1689   /* The LOOP is broken (there is no path from the header to its latch.  */
1690   DOMST_LOOP_BROKEN,
1691   /* BB dominates the latch of the LOOP.  */
1692   DOMST_DOMINATING
1693 };
1694
1695 static enum bb_dom_status
1696 determine_bb_domination_status (struct loop *loop, basic_block bb)
1697 {
1698   basic_block *bblocks;
1699   unsigned nblocks, i;
1700   bool bb_reachable = false;
1701   edge_iterator ei;
1702   edge e;
1703
1704   /* This function assumes BB is a successor of LOOP->header.
1705      If that is not the case return DOMST_NONDOMINATING which
1706      is always safe.  */
1707     {
1708       bool ok = false;
1709
1710       FOR_EACH_EDGE (e, ei, bb->preds)
1711         {
1712           if (e->src == loop->header)
1713             {
1714               ok = true;
1715               break;
1716             }
1717         }
1718
1719       if (!ok)
1720         return DOMST_NONDOMINATING;
1721     }
1722
1723   if (bb == loop->latch)
1724     return DOMST_DOMINATING;
1725
1726   /* Check that BB dominates LOOP->latch, and that it is back-reachable
1727      from it.  */
1728
1729   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1730   dbds_ce_stop = loop->header;
1731   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1732                                 bblocks, loop->num_nodes, bb);
1733   for (i = 0; i < nblocks; i++)
1734     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1735       {
1736         if (e->src == loop->header)
1737           {
1738             free (bblocks);
1739             return DOMST_NONDOMINATING;
1740           }
1741         if (e->src == bb)
1742           bb_reachable = true;
1743       }
1744
1745   free (bblocks);
1746   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1747 }
1748
1749 /* Return true if BB is part of the new pre-header that is created
1750    when threading the latch to DATA.  */
1751
1752 static bool
1753 def_split_header_continue_p (const_basic_block bb, const void *data)
1754 {
1755   const_basic_block new_header = (const_basic_block) data;
1756   const struct loop *l;
1757
1758   if (bb == new_header
1759       || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1760     return false;
1761   for (l = bb->loop_father; l; l = loop_outer (l))
1762     if (l == new_header->loop_father)
1763       return true;
1764   return false;
1765 }
1766
1767 /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
1768    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1769    to the inside of the loop.  */
1770
1771 static bool
1772 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1773 {
1774   basic_block header = loop->header;
1775   edge e, tgt_edge, latch = loop_latch_edge (loop);
1776   edge_iterator ei;
1777   basic_block tgt_bb, atgt_bb;
1778   enum bb_dom_status domst;
1779
1780   /* We have already threaded through headers to exits, so all the threading
1781      requests now are to the inside of the loop.  We need to avoid creating
1782      irreducible regions (i.e., loops with more than one entry block), and
1783      also loop with several latch edges, or new subloops of the loop (although
1784      there are cases where it might be appropriate, it is difficult to decide,
1785      and doing it wrongly may confuse other optimizers).
1786
1787      We could handle more general cases here.  However, the intention is to
1788      preserve some information about the loop, which is impossible if its
1789      structure changes significantly, in a way that is not well understood.
1790      Thus we only handle few important special cases, in which also updating
1791      of the loop-carried information should be feasible:
1792
1793      1) Propagation of latch edge to a block that dominates the latch block
1794         of a loop.  This aims to handle the following idiom:
1795
1796         first = 1;
1797         while (1)
1798           {
1799             if (first)
1800               initialize;
1801             first = 0;
1802             body;
1803           }
1804
1805         After threading the latch edge, this becomes
1806
1807         first = 1;
1808         if (first)
1809           initialize;
1810         while (1)
1811           {
1812             first = 0;
1813             body;
1814           }
1815
1816         The original header of the loop is moved out of it, and we may thread
1817         the remaining edges through it without further constraints.
1818
1819      2) All entry edges are propagated to a single basic block that dominates
1820         the latch block of the loop.  This aims to handle the following idiom
1821         (normally created for "for" loops):
1822
1823         i = 0;
1824         while (1)
1825           {
1826             if (i >= 100)
1827               break;
1828             body;
1829             i++;
1830           }
1831
1832         This becomes
1833
1834         i = 0;
1835         while (1)
1836           {
1837             body;
1838             i++;
1839             if (i >= 100)
1840               break;
1841           }
1842      */
1843
1844   /* Threading through the header won't improve the code if the header has just
1845      one successor.  */
1846   if (single_succ_p (header))
1847     goto fail;
1848
1849   /* If we threaded the latch using a joiner block, we cancel the
1850      threading opportunity out of an abundance of caution.  However,
1851      still allow threading from outside to inside the loop.  */
1852   if (latch->aux)
1853     {
1854       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1855       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1856         {
1857           delete_jump_thread_path (path);
1858           latch->aux = NULL;
1859         }
1860     }
1861
1862   if (latch->aux)
1863     {
1864       vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1865       tgt_edge = (*path)[1]->e;
1866       tgt_bb = tgt_edge->dest;
1867     }
1868   else if (!may_peel_loop_headers
1869            && !redirection_block_p (loop->header))
1870     goto fail;
1871   else
1872     {
1873       tgt_bb = NULL;
1874       tgt_edge = NULL;
1875       FOR_EACH_EDGE (e, ei, header->preds)
1876         {
1877           if (!e->aux)
1878             {
1879               if (e == latch)
1880                 continue;
1881
1882               /* If latch is not threaded, and there is a header
1883                  edge that is not threaded, we would create loop
1884                  with multiple entries.  */
1885               goto fail;
1886             }
1887
1888           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1889
1890           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1891             goto fail;
1892           tgt_edge = (*path)[1]->e;
1893           atgt_bb = tgt_edge->dest;
1894           if (!tgt_bb)
1895             tgt_bb = atgt_bb;
1896           /* Two targets of threading would make us create loop
1897              with multiple entries.  */
1898           else if (tgt_bb != atgt_bb)
1899             goto fail;
1900         }
1901
1902       if (!tgt_bb)
1903         {
1904           /* There are no threading requests.  */
1905           return false;
1906         }
1907
1908       /* Redirecting to empty loop latch is useless.  */
1909       if (tgt_bb == loop->latch
1910           && empty_block_p (loop->latch))
1911         goto fail;
1912     }
1913
1914   /* The target block must dominate the loop latch, otherwise we would be
1915      creating a subloop.  */
1916   domst = determine_bb_domination_status (loop, tgt_bb);
1917   if (domst == DOMST_NONDOMINATING)
1918     goto fail;
1919   if (domst == DOMST_LOOP_BROKEN)
1920     {
1921       /* If the loop ceased to exist, mark it as such, and thread through its
1922          original header.  */
1923       mark_loop_for_removal (loop);
1924       return thread_block (header, false);
1925     }
1926
1927   if (tgt_bb->loop_father->header == tgt_bb)
1928     {
1929       /* If the target of the threading is a header of a subloop, we need
1930          to create a preheader for it, so that the headers of the two loops
1931          do not merge.  */
1932       if (EDGE_COUNT (tgt_bb->preds) > 2)
1933         {
1934           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1935           gcc_assert (tgt_bb != NULL);
1936         }
1937       else
1938         tgt_bb = split_edge (tgt_edge);
1939     }
1940
1941   if (latch->aux)
1942     {
1943       basic_block *bblocks;
1944       unsigned nblocks, i;
1945
1946       /* First handle the case latch edge is redirected.  We are copying
1947          the loop header but not creating a multiple entry loop.  Make the
1948          cfg manipulation code aware of that fact.  */
1949       set_loop_copy (loop, loop);
1950       loop->latch = thread_single_edge (latch);
1951       set_loop_copy (loop, NULL);
1952       gcc_assert (single_succ (loop->latch) == tgt_bb);
1953       loop->header = tgt_bb;
1954
1955       /* Remove the new pre-header blocks from our loop.  */
1956       bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1957       nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1958                                     bblocks, loop->num_nodes, tgt_bb);
1959       for (i = 0; i < nblocks; i++)
1960         if (bblocks[i]->loop_father == loop)
1961           {
1962             remove_bb_from_loops (bblocks[i]);
1963             add_bb_to_loop (bblocks[i], loop_outer (loop));
1964           }
1965       free (bblocks);
1966
1967       /* If the new header has multiple latches mark it so.  */
1968       FOR_EACH_EDGE (e, ei, loop->header->preds)
1969         if (e->src->loop_father == loop
1970             && e->src != loop->latch)
1971           {
1972             loop->latch = NULL;
1973             loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1974           }
1975
1976       /* Cancel remaining threading requests that would make the
1977          loop a multiple entry loop.  */
1978       FOR_EACH_EDGE (e, ei, header->preds)
1979         {
1980           edge e2;
1981
1982           if (e->aux == NULL)
1983             continue;
1984
1985           vec<jump_thread_edge *> *path = THREAD_PATH (e);
1986           e2 = path->last ()->e;
1987
1988           if (e->src->loop_father != e2->dest->loop_father
1989               && e2->dest != loop->header)
1990             {
1991               delete_jump_thread_path (path);
1992               e->aux = NULL;
1993             }
1994         }
1995
1996       /* Thread the remaining edges through the former header.  */
1997       thread_block (header, false);
1998     }
1999   else
2000     {
2001       basic_block new_preheader;
2002
2003       /* Now consider the case entry edges are redirected to the new entry
2004          block.  Remember one entry edge, so that we can find the new
2005          preheader (its destination after threading).  */
2006       FOR_EACH_EDGE (e, ei, header->preds)
2007         {
2008           if (e->aux)
2009             break;
2010         }
2011
2012       /* The duplicate of the header is the new preheader of the loop.  Ensure
2013          that it is placed correctly in the loop hierarchy.  */
2014       set_loop_copy (loop, loop_outer (loop));
2015
2016       thread_block (header, false);
2017       set_loop_copy (loop, NULL);
2018       new_preheader = e->dest;
2019
2020       /* Create the new latch block.  This is always necessary, as the latch
2021          must have only a single successor, but the original header had at
2022          least two successors.  */
2023       loop->latch = NULL;
2024       mfb_kj_edge = single_succ_edge (new_preheader);
2025       loop->header = mfb_kj_edge->dest;
2026       latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2027       loop->header = latch->dest;
2028       loop->latch = latch->src;
2029     }
2030
2031   return true;
2032
2033 fail:
2034   /* We failed to thread anything.  Cancel the requests.  */
2035   FOR_EACH_EDGE (e, ei, header->preds)
2036     {
2037       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2038
2039       if (path)
2040         {
2041           delete_jump_thread_path (path);
2042           e->aux = NULL;
2043         }
2044     }
2045   return false;
2046 }
2047
2048 /* E1 and E2 are edges into the same basic block.  Return TRUE if the
2049    PHI arguments associated with those edges are equal or there are no
2050    PHI arguments, otherwise return FALSE.  */
2051
2052 static bool
2053 phi_args_equal_on_edges (edge e1, edge e2)
2054 {
2055   gimple_stmt_iterator gsi;
2056   int indx1 = e1->dest_idx;
2057   int indx2 = e2->dest_idx;
2058
2059   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2060     {
2061       gimple phi = gsi_stmt (gsi);
2062
2063       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2064                             gimple_phi_arg_def (phi, indx2), 0))
2065         return false;
2066     }
2067   return true;
2068 }
2069
2070 /* Walk through the registered jump threads and convert them into a
2071    form convenient for this pass.
2072
2073    Any block which has incoming edges threaded to outgoing edges
2074    will have its entry in THREADED_BLOCK set.
2075
2076    Any threaded edge will have its new outgoing edge stored in the
2077    original edge's AUX field.
2078
2079    This form avoids the need to walk all the edges in the CFG to
2080    discover blocks which need processing and avoids unnecessary
2081    hash table lookups to map from threaded edge to new target.  */
2082
2083 static void
2084 mark_threaded_blocks (bitmap threaded_blocks)
2085 {
2086   unsigned int i;
2087   bitmap_iterator bi;
2088   bitmap tmp = BITMAP_ALLOC (NULL);
2089   basic_block bb;
2090   edge e;
2091   edge_iterator ei;
2092
2093   /* It is possible to have jump threads in which one is a subpath
2094      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
2095      block and (B, C), (C, D) where no joiner block exists.
2096
2097      When this occurs ignore the jump thread request with the joiner
2098      block.  It's totally subsumed by the simpler jump thread request.
2099
2100      This results in less block copying, simpler CFGs.  More importantly,
2101      when we duplicate the joiner block, B, in this case we will create
2102      a new threading opportunity that we wouldn't be able to optimize
2103      until the next jump threading iteration.
2104
2105      So first convert the jump thread requests which do not require a
2106      joiner block.  */
2107   for (i = 0; i < paths.length (); i++)
2108     {
2109       vec<jump_thread_edge *> *path = paths[i];
2110
2111       if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2112         {
2113           edge e = (*path)[0]->e;
2114           e->aux = (void *)path;
2115           bitmap_set_bit (tmp, e->dest->index);
2116         }
2117     }
2118
2119   /* Now iterate again, converting cases where we want to thread
2120      through a joiner block, but only if no other edge on the path
2121      already has a jump thread attached to it.  We do this in two passes,
2122      to avoid situations where the order in the paths vec can hide overlapping
2123      threads (the path is recorded on the incoming edge, so we would miss
2124      cases where the second path starts at a downstream edge on the same
2125      path).  First record all joiner paths, deleting any in the unexpected
2126      case where there is already a path for that incoming edge.  */
2127   for (i = 0; i < paths.length (); i++)
2128     {
2129       vec<jump_thread_edge *> *path = paths[i];
2130
2131       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2132         {
2133           /* Attach the path to the starting edge if none is yet recorded.  */
2134           if ((*path)[0]->e->aux == NULL)
2135             (*path)[0]->e->aux = path;
2136           else if (dump_file && (dump_flags & TDF_DETAILS))
2137             dump_jump_thread_path (dump_file, *path, false);
2138         }
2139     }
2140   /* Second, look for paths that have any other jump thread attached to
2141      them, and either finish converting them or cancel them.  */
2142   for (i = 0; i < paths.length (); i++)
2143     {
2144       vec<jump_thread_edge *> *path = paths[i];
2145       edge e = (*path)[0]->e;
2146
2147       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2148         {
2149           unsigned int j;
2150           for (j = 1; j < path->length (); j++)
2151             if ((*path)[j]->e->aux != NULL)
2152               break;
2153
2154           /* If we iterated through the entire path without exiting the loop,
2155              then we are good to go, record it.  */
2156           if (j == path->length ())
2157             bitmap_set_bit (tmp, e->dest->index);
2158           else
2159             {
2160               e->aux = NULL;
2161               if (dump_file && (dump_flags & TDF_DETAILS))
2162                 dump_jump_thread_path (dump_file, *path, false);
2163             }
2164         }
2165     }
2166
2167   /* If optimizing for size, only thread through block if we don't have
2168      to duplicate it or it's an otherwise empty redirection block.  */
2169   if (optimize_function_for_size_p (cfun))
2170     {
2171       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2172         {
2173           bb = BASIC_BLOCK_FOR_FN (cfun, i);
2174           if (EDGE_COUNT (bb->preds) > 1
2175               && !redirection_block_p (bb))
2176             {
2177               FOR_EACH_EDGE (e, ei, bb->preds)
2178                 {
2179                   if (e->aux)
2180                     {
2181                       vec<jump_thread_edge *> *path = THREAD_PATH (e);
2182                       delete_jump_thread_path (path);
2183                       e->aux = NULL;
2184                     }
2185                 }
2186             }
2187           else
2188             bitmap_set_bit (threaded_blocks, i);
2189         }
2190     }
2191   else
2192     bitmap_copy (threaded_blocks, tmp);
2193
2194   /* Look for jump threading paths which cross multiple loop headers.
2195
2196      The code to thread through loop headers will change the CFG in ways
2197      that break assumptions made by the loop optimization code.
2198
2199      We don't want to blindly cancel the requests.  We can instead do better
2200      by trimming off the end of the jump thread path.  */
2201   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2202     {
2203       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2204       FOR_EACH_EDGE (e, ei, bb->preds)
2205         {
2206           if (e->aux)
2207             {
2208               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2209
2210               for (unsigned int i = 0, crossed_headers = 0;
2211                    i < path->length ();
2212                    i++)
2213                 {
2214                   basic_block dest = (*path)[i]->e->dest;
2215                   crossed_headers += (dest == dest->loop_father->header);
2216                   if (crossed_headers > 1)
2217                     {
2218                       /* Trim from entry I onwards.  */
2219                       for (unsigned int j = i; j < path->length (); j++)
2220                         delete (*path)[j];
2221                       path->truncate (i);
2222
2223                       /* Now that we've truncated the path, make sure
2224                          what's left is still valid.   We need at least
2225                          two edges on the path and the last edge can not
2226                          be a joiner.  This should never happen, but let's
2227                          be safe.  */
2228                       if (path->length () < 2
2229                           || (path->last ()->type
2230                               == EDGE_COPY_SRC_JOINER_BLOCK))
2231                         {
2232                           delete_jump_thread_path (path);
2233                           e->aux = NULL;
2234                         }
2235                       break;
2236                     }
2237                 }
2238             }
2239         }
2240     }
2241
2242   /* If we have a joiner block (J) which has two successors S1 and S2 and
2243      we are threading though S1 and the final destination of the thread
2244      is S2, then we must verify that any PHI nodes in S2 have the same
2245      PHI arguments for the edge J->S2 and J->S1->...->S2.
2246
2247      We used to detect this prior to registering the jump thread, but
2248      that prohibits propagation of edge equivalences into non-dominated
2249      PHI nodes as the equivalency test might occur before propagation.
2250
2251      This must also occur after we truncate any jump threading paths
2252      as this scenario may only show up after truncation.
2253
2254      This works for now, but will need improvement as part of the FSA
2255      optimization.
2256
2257      Note since we've moved the thread request data to the edges,
2258      we have to iterate on those rather than the threaded_edges vector.  */
2259   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2260     {
2261       bb = BASIC_BLOCK_FOR_FN (cfun, i);
2262       FOR_EACH_EDGE (e, ei, bb->preds)
2263         {
2264           if (e->aux)
2265             {
2266               vec<jump_thread_edge *> *path = THREAD_PATH (e);
2267               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2268
2269               if (have_joiner)
2270                 {
2271                   basic_block joiner = e->dest;
2272                   edge final_edge = path->last ()->e;
2273                   basic_block final_dest = final_edge->dest;
2274                   edge e2 = find_edge (joiner, final_dest);
2275
2276                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2277                     {
2278                       delete_jump_thread_path (path);
2279                       e->aux = NULL;
2280                     }
2281                 }
2282             }
2283         }
2284     }
2285
2286   BITMAP_FREE (tmp);
2287 }
2288
2289
2290 /* Return TRUE if BB ends with a switch statement or a computed goto.
2291    Otherwise return false.  */
2292 static bool
2293 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2294 {
2295   gimple stmt = last_stmt (bb);
2296   if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2297     return true;
2298   if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2299       && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2300     return true;
2301   return false;
2302 }
2303
2304 /* Walk through all blocks and thread incoming edges to the appropriate
2305    outgoing edge for each edge pair recorded in THREADED_EDGES.
2306
2307    It is the caller's responsibility to fix the dominance information
2308    and rewrite duplicated SSA_NAMEs back into SSA form.
2309
2310    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2311    loop headers if it does not simplify the loop.
2312
2313    Returns true if one or more edges were threaded, false otherwise.  */
2314
2315 bool
2316 thread_through_all_blocks (bool may_peel_loop_headers)
2317 {
2318   bool retval = false;
2319   unsigned int i;
2320   bitmap_iterator bi;
2321   bitmap threaded_blocks;
2322   struct loop *loop;
2323
2324   if (!paths.exists ())
2325     return false;
2326
2327   threaded_blocks = BITMAP_ALLOC (NULL);
2328   memset (&thread_stats, 0, sizeof (thread_stats));
2329
2330   mark_threaded_blocks (threaded_blocks);
2331
2332   initialize_original_copy_tables ();
2333
2334   /* First perform the threading requests that do not affect
2335      loop structure.  */
2336   EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2337     {
2338       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2339
2340       if (EDGE_COUNT (bb->preds) > 0)
2341         retval |= thread_block (bb, true);
2342     }
2343
2344   /* Then perform the threading through loop headers.  We start with the
2345      innermost loop, so that the changes in cfg we perform won't affect
2346      further threading.  */
2347   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2348     {
2349       if (!loop->header
2350           || !bitmap_bit_p (threaded_blocks, loop->header->index))
2351         continue;
2352
2353       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2354     }
2355
2356   /* Any jump threading paths that are still attached to edges at this
2357      point must be one of two cases.
2358
2359      First, we could have a jump threading path which went from outside
2360      a loop to inside a loop that was ignored because a prior jump thread
2361      across a backedge was realized (which indirectly causes the loop
2362      above to ignore the latter thread).  We can detect these because the
2363      loop structures will be different and we do not currently try to
2364      optimize this case.
2365
2366      Second, we could be threading across a backedge to a point within the
2367      same loop.  This occurrs for the FSA/FSM optimization and we would
2368      like to optimize it.  However, we have to be very careful as this
2369      may completely scramble the loop structures, with the result being
2370      irreducible loops causing us to throw away our loop structure.
2371
2372      As a compromise for the latter case, if the thread path ends in
2373      a block where the last statement is a multiway branch, then go
2374      ahead and thread it, else ignore it.  */
2375   basic_block bb;
2376   edge e;
2377   FOR_EACH_BB_FN (bb, cfun)
2378     {
2379       /* If we do end up threading here, we can remove elements from
2380          BB->preds.  Thus we can not use the FOR_EACH_EDGE iterator.  */
2381       for (edge_iterator ei = ei_start (bb->preds);
2382            (e = ei_safe_edge (ei));)
2383         if (e->aux)
2384           {
2385             vec<jump_thread_edge *> *path = THREAD_PATH (e);
2386
2387             /* Case 1, threading from outside to inside the loop
2388                after we'd already threaded through the header.  */
2389             if ((*path)[0]->e->dest->loop_father
2390                 != path->last ()->e->src->loop_father)
2391               {
2392                 delete_jump_thread_path (path);
2393                 e->aux = NULL;
2394                 ei_next (&ei);
2395               }
2396            else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2397               {
2398                 /* The code to thread through loop headers may have
2399                    split a block with jump threads attached to it.
2400
2401                    We can identify this with a disjoint jump threading
2402                    path.  If found, just remove it.  */
2403                 for (unsigned int i = 0; i < path->length () - 1; i++)
2404                   if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2405                     {
2406                       delete_jump_thread_path (path);
2407                       e->aux = NULL;
2408                       ei_next (&ei);
2409                       break;
2410                     }
2411
2412                 /* Our path is still valid, thread it.  */
2413                 if (e->aux)
2414                   {
2415                     struct loop *loop = (*path)[0]->e->dest->loop_father;
2416
2417                     if (thread_block ((*path)[0]->e->dest, false))
2418                       {
2419                         /* This jump thread likely totally scrambled this loop.
2420                            So arrange for it to be fixed up.  */
2421                         loop->header = NULL;
2422                         loop->latch = NULL;
2423                         e->aux = NULL;
2424                       }
2425                     else
2426                       {
2427                         delete_jump_thread_path (path);
2428                         e->aux = NULL;
2429                         ei_next (&ei);
2430                       }
2431                   }
2432               }
2433            else
2434               {
2435                 delete_jump_thread_path (path);
2436                 e->aux = NULL;
2437                 ei_next (&ei);
2438               }
2439           }
2440         else
2441           ei_next (&ei);
2442     }
2443
2444   statistics_counter_event (cfun, "Jumps threaded",
2445                             thread_stats.num_threaded_edges);
2446
2447   free_original_copy_tables ();
2448
2449   BITMAP_FREE (threaded_blocks);
2450   threaded_blocks = NULL;
2451   paths.release ();
2452
2453   if (retval)
2454     loops_state_set (LOOPS_NEED_FIXUP);
2455
2456   return retval;
2457 }
2458
2459 /* Delete the jump threading path PATH.  We have to explcitly delete
2460    each entry in the vector, then the container.  */
2461
2462 void
2463 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2464 {
2465   for (unsigned int i = 0; i < path->length (); i++)
2466     delete (*path)[i];
2467   path->release();
2468 }
2469
2470 /* Register a jump threading opportunity.  We queue up all the jump
2471    threading opportunities discovered by a pass and update the CFG
2472    and SSA form all at once.
2473
2474    E is the edge we can thread, E2 is the new target edge, i.e., we
2475    are effectively recording that E->dest can be changed to E2->dest
2476    after fixing the SSA graph.  */
2477
2478 void
2479 register_jump_thread (vec<jump_thread_edge *> *path)
2480 {
2481   if (!dbg_cnt (registered_jump_thread))
2482     {
2483       delete_jump_thread_path (path);
2484       return;
2485     }
2486
2487   /* First make sure there are no NULL outgoing edges on the jump threading
2488      path.  That can happen for jumping to a constant address.  */
2489   for (unsigned int i = 0; i < path->length (); i++)
2490     if ((*path)[i]->e == NULL)
2491       {
2492         if (dump_file && (dump_flags & TDF_DETAILS))
2493           {
2494             fprintf (dump_file,
2495                      "Found NULL edge in jump threading path.  Cancelling jump thread:\n");
2496             dump_jump_thread_path (dump_file, *path, false);
2497           }
2498
2499         delete_jump_thread_path (path);
2500         return;
2501       }
2502
2503   if (dump_file && (dump_flags & TDF_DETAILS))
2504     dump_jump_thread_path (dump_file, *path, true);
2505
2506   if (!paths.exists ())
2507     paths.create (5);
2508
2509   paths.safe_push (path);
2510 }