2015-10-18 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blob5632a888e46141b077a9966bb835335617fbfa29
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "hard-reg-set.h"
29 #include "ssa.h"
30 #include "options.h"
31 #include "fold-const.h"
32 #include "flags.h"
33 #include "cfganal.h"
34 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "tree-ssa.h"
37 #include "tree-ssa-threadupdate.h"
38 #include "dumpfile.h"
39 #include "cfgloop.h"
40 #include "dbgcnt.h"
41 #include "tree-cfg.h"
42 #include "tree-pass.h"
44 /* Given a block B, update the CFG and SSA graph to reflect redirecting
45 one or more in-edges to B to instead reach the destination of an
46 out-edge from B while preserving any side effects in B.
48 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
49 side effects of executing B.
51 1. Make a copy of B (including its outgoing edges and statements). Call
52 the copy B'. Note B' has no incoming edges or PHIs at this time.
54 2. Remove the control statement at the end of B' and all outgoing edges
55 except B'->C.
57 3. Add a new argument to each PHI in C with the same value as the existing
58 argument associated with edge B->C. Associate the new PHI arguments
59 with the edge B'->C.
61 4. For each PHI in B, find or create a PHI in B' with an identical
62 PHI_RESULT. Add an argument to the PHI in B' which has the same
63 value as the PHI in B associated with the edge A->B. Associate
64 the new argument in the PHI in B' with the edge A->B.
66 5. Change the edge A->B to A->B'.
68 5a. This automatically deletes any PHI arguments associated with the
69 edge A->B in B.
71 5b. This automatically associates each new argument added in step 4
72 with the edge A->B'.
74 6. Repeat for other incoming edges into B.
76 7. Put the duplicated resources in B and all the B' blocks into SSA form.
78 Note that block duplication can be minimized by first collecting the
79 set of unique destination blocks that the incoming edges should
80 be threaded to.
82 We reduce the number of edges and statements we create by not copying all
83 the outgoing edges and the control statement in step #1. We instead create
84 a template block without the outgoing edges and duplicate the template.
86 Another case this code handles is threading through a "joiner" block. In
87 this case, we do not know the destination of the joiner block, but one
88 of the outgoing edges from the joiner block leads to a threadable path. This
89 case largely works as outlined above, except the duplicate of the joiner
90 block still contains a full set of outgoing edges and its control statement.
91 We just redirect one of its outgoing edges to our jump threading path. */
94 /* Steps #5 and #6 of the above algorithm are best implemented by walking
95 all the incoming edges which thread to the same destination edge at
96 the same time. That avoids lots of table lookups to get information
97 for the destination edge.
99 To realize that implementation we create a list of incoming edges
100 which thread to the same outgoing edge. Thus to implement steps
101 #5 and #6 we traverse our hash table of outgoing edge information.
102 For each entry we walk the list of incoming edges which thread to
103 the current outgoing edge. */
105 struct el
107 edge e;
108 struct el *next;
111 /* Main data structure recording information regarding B's duplicate
112 blocks. */
114 /* We need to efficiently record the unique thread destinations of this
115 block and specific information associated with those destinations. We
116 may have many incoming edges threaded to the same outgoing edge. This
117 can be naturally implemented with a hash table. */
119 struct redirection_data : free_ptr_hash<redirection_data>
121 /* We support wiring up two block duplicates in a jump threading path.
123 One is a normal block copy where we remove the control statement
124 and wire up its single remaining outgoing edge to the thread path.
126 The other is a joiner block where we leave the control statement
127 in place, but wire one of the outgoing edges to a thread path.
129 In theory we could have multiple block duplicates in a jump
130 threading path, but I haven't tried that.
132 The duplicate blocks appear in this array in the same order in
133 which they appear in the jump thread path. */
134 basic_block dup_blocks[2];
136 /* The jump threading path. */
137 vec<jump_thread_edge *> *path;
139 /* A list of incoming edges which we want to thread to the
140 same path. */
141 struct el *incoming_edges;
143 /* hash_table support. */
144 static inline hashval_t hash (const redirection_data *);
145 static inline int equal (const redirection_data *, const redirection_data *);
148 /* Dump a jump threading path, including annotations about each
149 edge in the path. */
151 static void
152 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
153 bool registering)
155 fprintf (dump_file,
156 " %s%s jump thread: (%d, %d) incoming edge; ",
157 (registering ? "Registering" : "Cancelling"),
158 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
159 path[0]->e->src->index, path[0]->e->dest->index);
161 for (unsigned int i = 1; i < path.length (); i++)
163 /* We can get paths with a NULL edge when the final destination
164 of a jump thread turns out to be a constant address. We dump
165 those paths when debugging, so we have to be prepared for that
166 possibility here. */
167 if (path[i]->e == NULL)
168 continue;
170 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
171 fprintf (dump_file, " (%d, %d) joiner; ",
172 path[i]->e->src->index, path[i]->e->dest->index);
173 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
174 fprintf (dump_file, " (%d, %d) normal;",
175 path[i]->e->src->index, path[i]->e->dest->index);
176 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
177 fprintf (dump_file, " (%d, %d) nocopy;",
178 path[i]->e->src->index, path[i]->e->dest->index);
179 if (path[0]->type == EDGE_FSM_THREAD)
180 fprintf (dump_file, " (%d, %d) ",
181 path[i]->e->src->index, path[i]->e->dest->index);
183 fputc ('\n', dump_file);
186 /* Simple hashing function. For any given incoming edge E, we're going
187 to be most concerned with the final destination of its jump thread
188 path. So hash on the block index of the final edge in the path. */
190 inline hashval_t
191 redirection_data::hash (const redirection_data *p)
193 vec<jump_thread_edge *> *path = p->path;
194 return path->last ()->e->dest->index;
197 /* Given two hash table entries, return true if they have the same
198 jump threading path. */
199 inline int
200 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
202 vec<jump_thread_edge *> *path1 = p1->path;
203 vec<jump_thread_edge *> *path2 = p2->path;
205 if (path1->length () != path2->length ())
206 return false;
208 for (unsigned int i = 1; i < path1->length (); i++)
210 if ((*path1)[i]->type != (*path2)[i]->type
211 || (*path1)[i]->e != (*path2)[i]->e)
212 return false;
215 return true;
218 /* Rather than search all the edges in jump thread paths each time
219 DOM is able to simply if control statement, we build a hash table
220 with the deleted edges. We only care about the address of the edge,
221 not its contents. */
222 struct removed_edges : nofree_ptr_hash<edge_def>
224 static hashval_t hash (edge e) { return htab_hash_pointer (e); }
225 static bool equal (edge e1, edge e2) { return e1 == e2; }
228 static hash_table<removed_edges> *removed_edges;
230 /* Data structure of information to pass to hash table traversal routines. */
231 struct ssa_local_info_t
233 /* The current block we are working on. */
234 basic_block bb;
236 /* We only create a template block for the first duplicated block in a
237 jump threading path as we may need many duplicates of that block.
239 The second duplicate block in a path is specific to that path. Creating
240 and sharing a template for that block is considerably more difficult. */
241 basic_block template_block;
243 /* TRUE if we thread one or more jumps, FALSE otherwise. */
244 bool jumps_threaded;
246 /* Blocks duplicated for the thread. */
247 bitmap duplicate_blocks;
250 /* Passes which use the jump threading code register jump threading
251 opportunities as they are discovered. We keep the registered
252 jump threading opportunities in this vector as edge pairs
253 (original_edge, target_edge). */
254 static vec<vec<jump_thread_edge *> *> paths;
256 /* When we start updating the CFG for threading, data necessary for jump
257 threading is attached to the AUX field for the incoming edge. Use these
258 macros to access the underlying structure attached to the AUX field. */
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
261 /* Jump threading statistics. */
263 struct thread_stats_d
265 unsigned long num_threaded_edges;
268 struct thread_stats_d thread_stats;
271 /* Remove the last statement in block BB if it is a control statement
272 Also remove all outgoing edges except the edge which reaches DEST_BB.
273 If DEST_BB is NULL, then remove all outgoing edges. */
275 void
276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
278 gimple_stmt_iterator gsi;
279 edge e;
280 edge_iterator ei;
282 gsi = gsi_last_bb (bb);
284 /* If the duplicate ends with a control statement, then remove it.
286 Note that if we are duplicating the template block rather than the
287 original basic block, then the duplicate might not have any real
288 statements in it. */
289 if (!gsi_end_p (gsi)
290 && gsi_stmt (gsi)
291 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
292 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
293 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
294 gsi_remove (&gsi, true);
296 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
298 if (e->dest != dest_bb)
299 remove_edge (e);
300 else
301 ei_next (&ei);
304 /* If the remaining edge is a loop exit, there must have
305 a removed edge that was not a loop exit.
307 In that case BB and possibly other blocks were previously
308 in the loop, but are now outside the loop. Thus, we need
309 to update the loop structures. */
310 if (single_succ_p (bb)
311 && loop_outer (bb->loop_father)
312 && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
313 loops_state_set (LOOPS_NEED_FIXUP);
316 /* Create a duplicate of BB. Record the duplicate block in an array
317 indexed by COUNT stored in RD. */
319 static void
320 create_block_for_threading (basic_block bb,
321 struct redirection_data *rd,
322 unsigned int count,
323 bitmap *duplicate_blocks)
325 edge_iterator ei;
326 edge e;
328 /* We can use the generic block duplication code and simply remove
329 the stuff we do not need. */
330 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
332 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
333 e->aux = NULL;
335 /* Zero out the profile, since the block is unreachable for now. */
336 rd->dup_blocks[count]->frequency = 0;
337 rd->dup_blocks[count]->count = 0;
338 if (duplicate_blocks)
339 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
342 /* Main data structure to hold information for duplicates of BB. */
344 static hash_table<redirection_data> *redirection_data;
346 /* Given an outgoing edge E lookup and return its entry in our hash table.
348 If INSERT is true, then we insert the entry into the hash table if
349 it is not already present. INCOMING_EDGE is added to the list of incoming
350 edges associated with E in the hash table. */
352 static struct redirection_data *
353 lookup_redirection_data (edge e, enum insert_option insert)
355 struct redirection_data **slot;
356 struct redirection_data *elt;
357 vec<jump_thread_edge *> *path = THREAD_PATH (e);
359 /* Build a hash table element so we can see if E is already
360 in the table. */
361 elt = XNEW (struct redirection_data);
362 elt->path = path;
363 elt->dup_blocks[0] = NULL;
364 elt->dup_blocks[1] = NULL;
365 elt->incoming_edges = NULL;
367 slot = redirection_data->find_slot (elt, insert);
369 /* This will only happen if INSERT is false and the entry is not
370 in the hash table. */
371 if (slot == NULL)
373 free (elt);
374 return NULL;
377 /* This will only happen if E was not in the hash table and
378 INSERT is true. */
379 if (*slot == NULL)
381 *slot = elt;
382 elt->incoming_edges = XNEW (struct el);
383 elt->incoming_edges->e = e;
384 elt->incoming_edges->next = NULL;
385 return elt;
387 /* E was in the hash table. */
388 else
390 /* Free ELT as we do not need it anymore, we will extract the
391 relevant entry from the hash table itself. */
392 free (elt);
394 /* Get the entry stored in the hash table. */
395 elt = *slot;
397 /* If insertion was requested, then we need to add INCOMING_EDGE
398 to the list of incoming edges associated with E. */
399 if (insert)
401 struct el *el = XNEW (struct el);
402 el->next = elt->incoming_edges;
403 el->e = e;
404 elt->incoming_edges = el;
407 return elt;
411 /* Similar to copy_phi_args, except that the PHI arg exists, it just
412 does not have a value associated with it. */
414 static void
415 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
417 int src_idx = src_e->dest_idx;
418 int tgt_idx = tgt_e->dest_idx;
420 /* Iterate over each PHI in e->dest. */
421 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
422 gsi2 = gsi_start_phis (tgt_e->dest);
423 !gsi_end_p (gsi);
424 gsi_next (&gsi), gsi_next (&gsi2))
426 gphi *src_phi = gsi.phi ();
427 gphi *dest_phi = gsi2.phi ();
428 tree val = gimple_phi_arg_def (src_phi, src_idx);
429 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
431 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
432 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
436 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
437 to see if it has constant value in a flow sensitive manner. Set
438 LOCUS to location of the constant phi arg and return the value.
439 Return DEF directly if either PATH or idx is ZERO. */
441 static tree
442 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
443 basic_block bb, int idx, source_location *locus)
445 tree arg;
446 gphi *def_phi;
447 basic_block def_bb;
449 if (path == NULL || idx == 0)
450 return def;
452 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
453 if (!def_phi)
454 return def;
456 def_bb = gimple_bb (def_phi);
457 /* Don't propagate loop invariants into deeper loops. */
458 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
459 return def;
461 /* Backtrack jump threading path from IDX to see if def has constant
462 value. */
463 for (int j = idx - 1; j >= 0; j--)
465 edge e = (*path)[j]->e;
466 if (e->dest == def_bb)
468 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
469 if (is_gimple_min_invariant (arg))
471 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
472 return arg;
474 break;
478 return def;
481 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
482 Try to backtrack jump threading PATH from node IDX to see if the arg
483 has constant value, copy constant value instead of argument itself
484 if yes. */
486 static void
487 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
488 vec<jump_thread_edge *> *path, int idx)
490 gphi_iterator gsi;
491 int src_indx = src_e->dest_idx;
493 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
495 gphi *phi = gsi.phi ();
496 tree def = gimple_phi_arg_def (phi, src_indx);
497 source_location locus = gimple_phi_arg_location (phi, src_indx);
499 if (TREE_CODE (def) == SSA_NAME
500 && !virtual_operand_p (gimple_phi_result (phi)))
501 def = get_value_locus_in_path (def, path, bb, idx, &locus);
503 add_phi_arg (phi, def, tgt_e, locus);
507 /* We have recently made a copy of ORIG_BB, including its outgoing
508 edges. The copy is NEW_BB. Every PHI node in every direct successor of
509 ORIG_BB has a new argument associated with edge from NEW_BB to the
510 successor. Initialize the PHI argument so that it is equal to the PHI
511 argument associated with the edge from ORIG_BB to the successor.
512 PATH and IDX are used to check if the new PHI argument has constant
513 value in a flow sensitive manner. */
515 static void
516 update_destination_phis (basic_block orig_bb, basic_block new_bb,
517 vec<jump_thread_edge *> *path, int idx)
519 edge_iterator ei;
520 edge e;
522 FOR_EACH_EDGE (e, ei, orig_bb->succs)
524 edge e2 = find_edge (new_bb, e->dest);
525 copy_phi_args (e->dest, e, e2, path, idx);
529 /* Given a duplicate block and its single destination (both stored
530 in RD). Create an edge between the duplicate and its single
531 destination.
533 Add an additional argument to any PHI nodes at the single
534 destination. IDX is the start node in jump threading path
535 we start to check to see if the new PHI argument has constant
536 value along the jump threading path. */
538 static void
539 create_edge_and_update_destination_phis (struct redirection_data *rd,
540 basic_block bb, int idx)
542 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
544 rescan_loop_exit (e, true, false);
545 e->probability = REG_BR_PROB_BASE;
546 e->count = bb->count;
548 /* We used to copy the thread path here. That was added in 2007
549 and dutifully updated through the representation changes in 2013.
551 In 2013 we added code to thread from an interior node through
552 the backedge to another interior node. That runs after the code
553 to thread through loop headers from outside the loop.
555 The latter may delete edges in the CFG, including those
556 which appeared in the jump threading path we copied here. Thus
557 we'd end up using a dangling pointer.
559 After reviewing the 2007/2011 code, I can't see how anything
560 depended on copying the AUX field and clearly copying the jump
561 threading path is problematical due to embedded edge pointers.
562 It has been removed. */
563 e->aux = NULL;
565 /* If there are any PHI nodes at the destination of the outgoing edge
566 from the duplicate block, then we will need to add a new argument
567 to them. The argument should have the same value as the argument
568 associated with the outgoing edge stored in RD. */
569 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
572 /* Look through PATH beginning at START and return TRUE if there are
573 any additional blocks that need to be duplicated. Otherwise,
574 return FALSE. */
575 static bool
576 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
577 unsigned int start)
579 for (unsigned int i = start + 1; i < path->length (); i++)
581 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
582 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
583 return true;
585 return false;
589 /* Compute the amount of profile count/frequency coming into the jump threading
590 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
591 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
592 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
593 identify blocks duplicated for jump threading, which have duplicated
594 edges that need to be ignored in the analysis. Return true if path contains
595 a joiner, false otherwise.
597 In the non-joiner case, this is straightforward - all the counts/frequency
598 flowing into the jump threading path should flow through the duplicated
599 block and out of the duplicated path.
601 In the joiner case, it is very tricky. Some of the counts flowing into
602 the original path go offpath at the joiner. The problem is that while
603 we know how much total count goes off-path in the original control flow,
604 we don't know how many of the counts corresponding to just the jump
605 threading path go offpath at the joiner.
607 For example, assume we have the following control flow and identified
608 jump threading paths:
610 A B C
611 \ | /
612 Ea \ |Eb / Ec
613 \ | /
614 v v v
615 J <-- Joiner
617 Eoff/ \Eon
620 Soff Son <--- Normal
622 Ed/ \ Ee
627 Jump threading paths: A -> J -> Son -> D (path 1)
628 C -> J -> Son -> E (path 2)
630 Note that the control flow could be more complicated:
631 - Each jump threading path may have more than one incoming edge. I.e. A and
632 Ea could represent multiple incoming blocks/edges that are included in
633 path 1.
634 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
635 before or after the "normal" copy block). These are not duplicated onto
636 the jump threading path, as they are single-successor.
637 - Any of the blocks along the path may have other incoming edges that
638 are not part of any jump threading path, but add profile counts along
639 the path.
641 In the aboe example, after all jump threading is complete, we will
642 end up with the following control flow:
644 A B C
645 | | |
646 Ea| |Eb |Ec
647 | | |
648 v v v
649 Ja J Jc
650 / \ / \Eon' / \
651 Eona/ \ ---/---\-------- \Eonc
652 / \ / / \ \
653 v v v v v
654 Sona Soff Son Sonc
655 \ /\ /
656 \___________ / \ _____/
657 \ / \/
658 vv v
661 The main issue to notice here is that when we are processing path 1
662 (A->J->Son->D) we need to figure out the outgoing edge weights to
663 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
664 sum of the incoming weights to D remain Ed. The problem with simply
665 assuming that Ja (and Jc when processing path 2) has the same outgoing
666 probabilities to its successors as the original block J, is that after
667 all paths are processed and other edges/counts removed (e.g. none
668 of Ec will reach D after processing path 2), we may end up with not
669 enough count flowing along duplicated edge Sona->D.
671 Therefore, in the case of a joiner, we keep track of all counts
672 coming in along the current path, as well as from predecessors not
673 on any jump threading path (Eb in the above example). While we
674 first assume that the duplicated Eona for Ja->Sona has the same
675 probability as the original, we later compensate for other jump
676 threading paths that may eliminate edges. We do that by keep track
677 of all counts coming into the original path that are not in a jump
678 thread (Eb in the above example, but as noted earlier, there could
679 be other predecessors incoming to the path at various points, such
680 as at Son). Call this cumulative non-path count coming into the path
681 before D as Enonpath. We then ensure that the count from Sona->D is as at
682 least as big as (Ed - Enonpath), but no bigger than the minimum
683 weight along the jump threading path. The probabilities of both the
684 original and duplicated joiner block J and Ja will be adjusted
685 accordingly after the updates. */
687 static bool
688 compute_path_counts (struct redirection_data *rd,
689 ssa_local_info_t *local_info,
690 gcov_type *path_in_count_ptr,
691 gcov_type *path_out_count_ptr,
692 int *path_in_freq_ptr)
694 edge e = rd->incoming_edges->e;
695 vec<jump_thread_edge *> *path = THREAD_PATH (e);
696 edge elast = path->last ()->e;
697 gcov_type nonpath_count = 0;
698 bool has_joiner = false;
699 gcov_type path_in_count = 0;
700 int path_in_freq = 0;
702 /* Start by accumulating incoming edge counts to the path's first bb
703 into a couple buckets:
704 path_in_count: total count of incoming edges that flow into the
705 current path.
706 nonpath_count: total count of incoming edges that are not
707 flowing along *any* path. These are the counts
708 that will still flow along the original path after
709 all path duplication is done by potentially multiple
710 calls to this routine.
711 (any other incoming edge counts are for a different jump threading
712 path that will be handled by a later call to this routine.)
713 To make this easier, start by recording all incoming edges that flow into
714 the current path in a bitmap. We could add up the path's incoming edge
715 counts here, but we still need to walk all the first bb's incoming edges
716 below to add up the counts of the other edges not included in this jump
717 threading path. */
718 struct el *next, *el;
719 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
720 for (el = rd->incoming_edges; el; el = next)
722 next = el->next;
723 bitmap_set_bit (in_edge_srcs, el->e->src->index);
725 edge ein;
726 edge_iterator ei;
727 FOR_EACH_EDGE (ein, ei, e->dest->preds)
729 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
730 /* Simply check the incoming edge src against the set captured above. */
731 if (ein_path
732 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
734 /* It is necessary but not sufficient that the last path edges
735 are identical. There may be different paths that share the
736 same last path edge in the case where the last edge has a nocopy
737 source block. */
738 gcc_assert (ein_path->last ()->e == elast);
739 path_in_count += ein->count;
740 path_in_freq += EDGE_FREQUENCY (ein);
742 else if (!ein_path)
744 /* Keep track of the incoming edges that are not on any jump-threading
745 path. These counts will still flow out of original path after all
746 jump threading is complete. */
747 nonpath_count += ein->count;
751 /* This is needed due to insane incoming frequencies. */
752 if (path_in_freq > BB_FREQ_MAX)
753 path_in_freq = BB_FREQ_MAX;
755 BITMAP_FREE (in_edge_srcs);
757 /* Now compute the fraction of the total count coming into the first
758 path bb that is from the current threading path. */
759 gcov_type total_count = e->dest->count;
760 /* Handle incoming profile insanities. */
761 if (total_count < path_in_count)
762 path_in_count = total_count;
763 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
765 /* Walk the entire path to do some more computation in order to estimate
766 how much of the path_in_count will flow out of the duplicated threading
767 path. In the non-joiner case this is straightforward (it should be
768 the same as path_in_count, although we will handle incoming profile
769 insanities by setting it equal to the minimum count along the path).
771 In the joiner case, we need to estimate how much of the path_in_count
772 will stay on the threading path after the joiner's conditional branch.
773 We don't really know for sure how much of the counts
774 associated with this path go to each successor of the joiner, but we'll
775 estimate based on the fraction of the total count coming into the path
776 bb was from the threading paths (computed above in onpath_scale).
777 Afterwards, we will need to do some fixup to account for other threading
778 paths and possible profile insanities.
780 In order to estimate the joiner case's counts we also need to update
781 nonpath_count with any additional counts coming into the path. Other
782 blocks along the path may have additional predecessors from outside
783 the path. */
784 gcov_type path_out_count = path_in_count;
785 gcov_type min_path_count = path_in_count;
786 for (unsigned int i = 1; i < path->length (); i++)
788 edge epath = (*path)[i]->e;
789 gcov_type cur_count = epath->count;
790 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
792 has_joiner = true;
793 cur_count = apply_probability (cur_count, onpath_scale);
795 /* In the joiner case we need to update nonpath_count for any edges
796 coming into the path that will contribute to the count flowing
797 into the path successor. */
798 if (has_joiner && epath != elast)
800 /* Look for other incoming edges after joiner. */
801 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
803 if (ein != epath
804 /* Ignore in edges from blocks we have duplicated for a
805 threading path, which have duplicated edge counts until
806 they are redirected by an invocation of this routine. */
807 && !bitmap_bit_p (local_info->duplicate_blocks,
808 ein->src->index))
809 nonpath_count += ein->count;
812 if (cur_count < path_out_count)
813 path_out_count = cur_count;
814 if (epath->count < min_path_count)
815 min_path_count = epath->count;
818 /* We computed path_out_count above assuming that this path targeted
819 the joiner's on-path successor with the same likelihood as it
820 reached the joiner. However, other thread paths through the joiner
821 may take a different path through the normal copy source block
822 (i.e. they have a different elast), meaning that they do not
823 contribute any counts to this path's elast. As a result, it may
824 turn out that this path must have more count flowing to the on-path
825 successor of the joiner. Essentially, all of this path's elast
826 count must be contributed by this path and any nonpath counts
827 (since any path through the joiner with a different elast will not
828 include a copy of this elast in its duplicated path).
829 So ensure that this path's path_out_count is at least the
830 difference between elast->count and nonpath_count. Otherwise the edge
831 counts after threading will not be sane. */
832 if (has_joiner && path_out_count < elast->count - nonpath_count)
834 path_out_count = elast->count - nonpath_count;
835 /* But neither can we go above the minimum count along the path
836 we are duplicating. This can be an issue due to profile
837 insanities coming in to this pass. */
838 if (path_out_count > min_path_count)
839 path_out_count = min_path_count;
842 *path_in_count_ptr = path_in_count;
843 *path_out_count_ptr = path_out_count;
844 *path_in_freq_ptr = path_in_freq;
845 return has_joiner;
849 /* Update the counts and frequencies for both an original path
850 edge EPATH and its duplicate EDUP. The duplicate source block
851 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
852 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
853 static void
854 update_profile (edge epath, edge edup, gcov_type path_in_count,
855 gcov_type path_out_count, int path_in_freq)
858 /* First update the duplicated block's count / frequency. */
859 if (edup)
861 basic_block dup_block = edup->src;
862 gcc_assert (dup_block->count == 0);
863 gcc_assert (dup_block->frequency == 0);
864 dup_block->count = path_in_count;
865 dup_block->frequency = path_in_freq;
868 /* Now update the original block's count and frequency in the
869 opposite manner - remove the counts/freq that will flow
870 into the duplicated block. Handle underflow due to precision/
871 rounding issues. */
872 epath->src->count -= path_in_count;
873 if (epath->src->count < 0)
874 epath->src->count = 0;
875 epath->src->frequency -= path_in_freq;
876 if (epath->src->frequency < 0)
877 epath->src->frequency = 0;
879 /* Next update this path edge's original and duplicated counts. We know
880 that the duplicated path will have path_out_count flowing
881 out of it (in the joiner case this is the count along the duplicated path
882 out of the duplicated joiner). This count can then be removed from the
883 original path edge. */
884 if (edup)
885 edup->count = path_out_count;
886 epath->count -= path_out_count;
887 gcc_assert (epath->count >= 0);
891 /* The duplicate and original joiner blocks may end up with different
892 probabilities (different from both the original and from each other).
893 Recompute the probabilities here once we have updated the edge
894 counts and frequencies. */
896 static void
897 recompute_probabilities (basic_block bb)
899 edge esucc;
900 edge_iterator ei;
901 FOR_EACH_EDGE (esucc, ei, bb->succs)
903 if (!bb->count)
904 continue;
906 /* Prevent overflow computation due to insane profiles. */
907 if (esucc->count < bb->count)
908 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
909 bb->count);
910 else
911 /* Can happen with missing/guessed probabilities, since we
912 may determine that more is flowing along duplicated
913 path than joiner succ probabilities allowed.
914 Counts and freqs will be insane after jump threading,
915 at least make sure probability is sane or we will
916 get a flow verification error.
917 Not much we can do to make counts/freqs sane without
918 redoing the profile estimation. */
919 esucc->probability = REG_BR_PROB_BASE;
924 /* Update the counts of the original and duplicated edges from a joiner
925 that go off path, given that we have already determined that the
926 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
927 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
928 edge from joiner is EPATH. */
930 static void
931 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
932 gcov_type path_in_count,
933 gcov_type path_out_count)
935 /* Compute the count that currently flows off path from the joiner.
936 In other words, the total count of joiner's out edges other than
937 epath. Compute this by walking the successors instead of
938 subtracting epath's count from the joiner bb count, since there
939 are sometimes slight insanities where the total out edge count is
940 larger than the bb count (possibly due to rounding/truncation
941 errors). */
942 gcov_type total_orig_off_path_count = 0;
943 edge enonpath;
944 edge_iterator ei;
945 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
947 if (enonpath == epath)
948 continue;
949 total_orig_off_path_count += enonpath->count;
952 /* For the path that we are duplicating, the amount that will flow
953 off path from the duplicated joiner is the delta between the
954 path's cumulative in count and the portion of that count we
955 estimated above as flowing from the joiner along the duplicated
956 path. */
957 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
959 /* Now do the actual updates of the off-path edges. */
960 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
962 /* Look for edges going off of the threading path. */
963 if (enonpath == epath)
964 continue;
966 /* Find the corresponding edge out of the duplicated joiner. */
967 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
968 gcc_assert (enonpathdup);
970 /* We can't use the original probability of the joiner's out
971 edges, since the probabilities of the original branch
972 and the duplicated branches may vary after all threading is
973 complete. But apportion the duplicated joiner's off-path
974 total edge count computed earlier (total_dup_off_path_count)
975 among the duplicated off-path edges based on their original
976 ratio to the full off-path count (total_orig_off_path_count).
978 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
979 total_orig_off_path_count);
980 /* Give the duplicated offpath edge a portion of the duplicated
981 total. */
982 enonpathdup->count = apply_scale (scale,
983 total_dup_off_path_count);
984 /* Now update the original offpath edge count, handling underflow
985 due to rounding errors. */
986 enonpath->count -= enonpathdup->count;
987 if (enonpath->count < 0)
988 enonpath->count = 0;
993 /* Check if the paths through RD all have estimated frequencies but zero
994 profile counts. This is more accurate than checking the entry block
995 for a zero profile count, since profile insanities sometimes creep in. */
997 static bool
998 estimated_freqs_path (struct redirection_data *rd)
1000 edge e = rd->incoming_edges->e;
1001 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1002 edge ein;
1003 edge_iterator ei;
1004 bool non_zero_freq = false;
1005 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1007 if (ein->count)
1008 return false;
1009 non_zero_freq |= ein->src->frequency != 0;
1012 for (unsigned int i = 1; i < path->length (); i++)
1014 edge epath = (*path)[i]->e;
1015 if (epath->src->count)
1016 return false;
1017 non_zero_freq |= epath->src->frequency != 0;
1018 edge esucc;
1019 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1021 if (esucc->count)
1022 return false;
1023 non_zero_freq |= esucc->src->frequency != 0;
1026 return non_zero_freq;
1030 /* Invoked for routines that have guessed frequencies and no profile
1031 counts to record the block and edge frequencies for paths through RD
1032 in the profile count fields of those blocks and edges. This is because
1033 ssa_fix_duplicate_block_edges incrementally updates the block and
1034 edge counts as edges are redirected, and it is difficult to do that
1035 for edge frequencies which are computed on the fly from the source
1036 block frequency and probability. When a block frequency is updated
1037 its outgoing edge frequencies are affected and become difficult to
1038 adjust. */
1040 static void
1041 freqs_to_counts_path (struct redirection_data *rd)
1043 edge e = rd->incoming_edges->e;
1044 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1045 edge ein;
1046 edge_iterator ei;
1047 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1049 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1050 errors applying the probability when the frequencies are very
1051 small. */
1052 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1053 ein->probability);
1056 for (unsigned int i = 1; i < path->length (); i++)
1058 edge epath = (*path)[i]->e;
1059 edge esucc;
1060 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1061 errors applying the edge probability when the frequencies are very
1062 small. */
1063 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1064 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1065 esucc->count = apply_probability (esucc->src->count,
1066 esucc->probability);
1071 /* For routines that have guessed frequencies and no profile counts, where we
1072 used freqs_to_counts_path to record block and edge frequencies for paths
1073 through RD, we clear the counts after completing all updates for RD.
1074 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1075 but the block frequencies and edge probabilities were updated as well,
1076 so we can simply clear the count fields. */
1078 static void
1079 clear_counts_path (struct redirection_data *rd)
1081 edge e = rd->incoming_edges->e;
1082 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1083 edge ein, esucc;
1084 edge_iterator ei;
1085 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1086 ein->count = 0;
1088 /* First clear counts along original path. */
1089 for (unsigned int i = 1; i < path->length (); i++)
1091 edge epath = (*path)[i]->e;
1092 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1093 esucc->count = 0;
1094 epath->src->count = 0;
1096 /* Also need to clear the counts along duplicated path. */
1097 for (unsigned int i = 0; i < 2; i++)
1099 basic_block dup = rd->dup_blocks[i];
1100 if (!dup)
1101 continue;
1102 FOR_EACH_EDGE (esucc, ei, dup->succs)
1103 esucc->count = 0;
1104 dup->count = 0;
1108 /* Wire up the outgoing edges from the duplicate blocks and
1109 update any PHIs as needed. Also update the profile counts
1110 on the original and duplicate blocks and edges. */
1111 void
1112 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1113 ssa_local_info_t *local_info)
1115 bool multi_incomings = (rd->incoming_edges->next != NULL);
1116 edge e = rd->incoming_edges->e;
1117 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1118 edge elast = path->last ()->e;
1119 gcov_type path_in_count = 0;
1120 gcov_type path_out_count = 0;
1121 int path_in_freq = 0;
1123 /* This routine updates profile counts, frequencies, and probabilities
1124 incrementally. Since it is difficult to do the incremental updates
1125 using frequencies/probabilities alone, for routines without profile
1126 data we first take a snapshot of the existing block and edge frequencies
1127 by copying them into the empty profile count fields. These counts are
1128 then used to do the incremental updates, and cleared at the end of this
1129 routine. If the function is marked as having a profile, we still check
1130 to see if the paths through RD are using estimated frequencies because
1131 the routine had zero profile counts. */
1132 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1133 || estimated_freqs_path (rd));
1134 if (do_freqs_to_counts)
1135 freqs_to_counts_path (rd);
1137 /* First determine how much profile count to move from original
1138 path to the duplicate path. This is tricky in the presence of
1139 a joiner (see comments for compute_path_counts), where some portion
1140 of the path's counts will flow off-path from the joiner. In the
1141 non-joiner case the path_in_count and path_out_count should be the
1142 same. */
1143 bool has_joiner = compute_path_counts (rd, local_info,
1144 &path_in_count, &path_out_count,
1145 &path_in_freq);
1147 int cur_path_freq = path_in_freq;
1148 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1150 edge epath = (*path)[i]->e;
1152 /* If we were threading through an joiner block, then we want
1153 to keep its control statement and redirect an outgoing edge.
1154 Else we want to remove the control statement & edges, then create
1155 a new outgoing edge. In both cases we may need to update PHIs. */
1156 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1158 edge victim;
1159 edge e2;
1161 gcc_assert (has_joiner);
1163 /* This updates the PHIs at the destination of the duplicate
1164 block. Pass 0 instead of i if we are threading a path which
1165 has multiple incoming edges. */
1166 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1167 path, multi_incomings ? 0 : i);
1169 /* Find the edge from the duplicate block to the block we're
1170 threading through. That's the edge we want to redirect. */
1171 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1173 /* If there are no remaining blocks on the path to duplicate,
1174 then redirect VICTIM to the final destination of the jump
1175 threading path. */
1176 if (!any_remaining_duplicated_blocks (path, i))
1178 e2 = redirect_edge_and_branch (victim, elast->dest);
1179 /* If we redirected the edge, then we need to copy PHI arguments
1180 at the target. If the edge already existed (e2 != victim
1181 case), then the PHIs in the target already have the correct
1182 arguments. */
1183 if (e2 == victim)
1184 copy_phi_args (e2->dest, elast, e2,
1185 path, multi_incomings ? 0 : i);
1187 else
1189 /* Redirect VICTIM to the next duplicated block in the path. */
1190 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1192 /* We need to update the PHIs in the next duplicated block. We
1193 want the new PHI args to have the same value as they had
1194 in the source of the next duplicate block.
1196 Thus, we need to know which edge we traversed into the
1197 source of the duplicate. Furthermore, we may have
1198 traversed many edges to reach the source of the duplicate.
1200 Walk through the path starting at element I until we
1201 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1202 the edge from the prior element. */
1203 for (unsigned int j = i + 1; j < path->length (); j++)
1205 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1207 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1208 break;
1213 /* Update the counts and frequency of both the original block
1214 and path edge, and the duplicates. The path duplicate's
1215 incoming count and frequency are the totals for all edges
1216 incoming to this jump threading path computed earlier.
1217 And we know that the duplicated path will have path_out_count
1218 flowing out of it (i.e. along the duplicated path out of the
1219 duplicated joiner). */
1220 update_profile (epath, e2, path_in_count, path_out_count,
1221 path_in_freq);
1223 /* Next we need to update the counts of the original and duplicated
1224 edges from the joiner that go off path. */
1225 update_joiner_offpath_counts (epath, e2->src, path_in_count,
1226 path_out_count);
1228 /* Finally, we need to set the probabilities on the duplicated
1229 edges out of the duplicated joiner (e2->src). The probabilities
1230 along the original path will all be updated below after we finish
1231 processing the whole path. */
1232 recompute_probabilities (e2->src);
1234 /* Record the frequency flowing to the downstream duplicated
1235 path blocks. */
1236 cur_path_freq = EDGE_FREQUENCY (e2);
1238 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1240 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1241 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1242 multi_incomings ? 0 : i);
1243 if (count == 1)
1244 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1246 /* Update the counts and frequency of both the original block
1247 and path edge, and the duplicates. Since we are now after
1248 any joiner that may have existed on the path, the count
1249 flowing along the duplicated threaded path is path_out_count.
1250 If we didn't have a joiner, then cur_path_freq was the sum
1251 of the total frequencies along all incoming edges to the
1252 thread path (path_in_freq). If we had a joiner, it would have
1253 been updated at the end of that handling to the edge frequency
1254 along the duplicated joiner path edge. */
1255 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1256 path_out_count, path_out_count,
1257 cur_path_freq);
1259 else
1261 /* No copy case. In this case we don't have an equivalent block
1262 on the duplicated thread path to update, but we do need
1263 to remove the portion of the counts/freqs that were moved
1264 to the duplicated path from the counts/freqs flowing through
1265 this block on the original path. Since all the no-copy edges
1266 are after any joiner, the removed count is the same as
1267 path_out_count.
1269 If we didn't have a joiner, then cur_path_freq was the sum
1270 of the total frequencies along all incoming edges to the
1271 thread path (path_in_freq). If we had a joiner, it would have
1272 been updated at the end of that handling to the edge frequency
1273 along the duplicated joiner path edge. */
1274 update_profile (epath, NULL, path_out_count, path_out_count,
1275 cur_path_freq);
1278 /* Increment the index into the duplicated path when we processed
1279 a duplicated block. */
1280 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1281 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1283 count++;
1287 /* Now walk orig blocks and update their probabilities, since the
1288 counts and freqs should be updated properly by above loop. */
1289 for (unsigned int i = 1; i < path->length (); i++)
1291 edge epath = (*path)[i]->e;
1292 recompute_probabilities (epath->src);
1295 /* Done with all profile and frequency updates, clear counts if they
1296 were copied. */
1297 if (do_freqs_to_counts)
1298 clear_counts_path (rd);
1301 /* Hash table traversal callback routine to create duplicate blocks. */
1304 ssa_create_duplicates (struct redirection_data **slot,
1305 ssa_local_info_t *local_info)
1307 struct redirection_data *rd = *slot;
1309 /* The second duplicated block in a jump threading path is specific
1310 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1312 Each time we're called, we have to look through the path and see
1313 if a second block needs to be duplicated.
1315 Note the search starts with the third edge on the path. The first
1316 edge is the incoming edge, the second edge always has its source
1317 duplicated. Thus we start our search with the third edge. */
1318 vec<jump_thread_edge *> *path = rd->path;
1319 for (unsigned int i = 2; i < path->length (); i++)
1321 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1322 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1324 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1325 &local_info->duplicate_blocks);
1326 break;
1330 /* Create a template block if we have not done so already. Otherwise
1331 use the template to create a new block. */
1332 if (local_info->template_block == NULL)
1334 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1335 &local_info->duplicate_blocks);
1336 local_info->template_block = rd->dup_blocks[0];
1338 /* We do not create any outgoing edges for the template. We will
1339 take care of that in a later traversal. That way we do not
1340 create edges that are going to just be deleted. */
1342 else
1344 create_block_for_threading (local_info->template_block, rd, 0,
1345 &local_info->duplicate_blocks);
1347 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1348 block. */
1349 ssa_fix_duplicate_block_edges (rd, local_info);
1352 /* Keep walking the hash table. */
1353 return 1;
1356 /* We did not create any outgoing edges for the template block during
1357 block creation. This hash table traversal callback creates the
1358 outgoing edge for the template block. */
1360 inline int
1361 ssa_fixup_template_block (struct redirection_data **slot,
1362 ssa_local_info_t *local_info)
1364 struct redirection_data *rd = *slot;
1366 /* If this is the template block halt the traversal after updating
1367 it appropriately.
1369 If we were threading through an joiner block, then we want
1370 to keep its control statement and redirect an outgoing edge.
1371 Else we want to remove the control statement & edges, then create
1372 a new outgoing edge. In both cases we may need to update PHIs. */
1373 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1375 ssa_fix_duplicate_block_edges (rd, local_info);
1376 return 0;
1379 return 1;
1382 /* Hash table traversal callback to redirect each incoming edge
1383 associated with this hash table element to its new destination. */
1386 ssa_redirect_edges (struct redirection_data **slot,
1387 ssa_local_info_t *local_info)
1389 struct redirection_data *rd = *slot;
1390 struct el *next, *el;
1392 /* Walk over all the incoming edges associated with this hash table
1393 entry. */
1394 for (el = rd->incoming_edges; el; el = next)
1396 edge e = el->e;
1397 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1399 /* Go ahead and free this element from the list. Doing this now
1400 avoids the need for another list walk when we destroy the hash
1401 table. */
1402 next = el->next;
1403 free (el);
1405 thread_stats.num_threaded_edges++;
1407 if (rd->dup_blocks[0])
1409 edge e2;
1411 if (dump_file && (dump_flags & TDF_DETAILS))
1412 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1413 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1415 /* If we redirect a loop latch edge cancel its loop. */
1416 if (e->src == e->src->loop_father->latch)
1417 mark_loop_for_removal (e->src->loop_father);
1419 /* Redirect the incoming edge (possibly to the joiner block) to the
1420 appropriate duplicate block. */
1421 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1422 gcc_assert (e == e2);
1423 flush_pending_stmts (e2);
1426 /* Go ahead and clear E->aux. It's not needed anymore and failure
1427 to clear it will cause all kinds of unpleasant problems later. */
1428 delete_jump_thread_path (path);
1429 e->aux = NULL;
1433 /* Indicate that we actually threaded one or more jumps. */
1434 if (rd->incoming_edges)
1435 local_info->jumps_threaded = true;
1437 return 1;
1440 /* Return true if this block has no executable statements other than
1441 a simple ctrl flow instruction. When the number of outgoing edges
1442 is one, this is equivalent to a "forwarder" block. */
1444 static bool
1445 redirection_block_p (basic_block bb)
1447 gimple_stmt_iterator gsi;
1449 /* Advance to the first executable statement. */
1450 gsi = gsi_start_bb (bb);
1451 while (!gsi_end_p (gsi)
1452 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1453 || is_gimple_debug (gsi_stmt (gsi))
1454 || gimple_nop_p (gsi_stmt (gsi))
1455 || gimple_clobber_p (gsi_stmt (gsi))))
1456 gsi_next (&gsi);
1458 /* Check if this is an empty block. */
1459 if (gsi_end_p (gsi))
1460 return true;
1462 /* Test that we've reached the terminating control statement. */
1463 return gsi_stmt (gsi)
1464 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1465 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1466 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1469 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1470 is reached via one or more specific incoming edges, we know which
1471 outgoing edge from BB will be traversed.
1473 We want to redirect those incoming edges to the target of the
1474 appropriate outgoing edge. Doing so avoids a conditional branch
1475 and may expose new optimization opportunities. Note that we have
1476 to update dominator tree and SSA graph after such changes.
1478 The key to keeping the SSA graph update manageable is to duplicate
1479 the side effects occurring in BB so that those side effects still
1480 occur on the paths which bypass BB after redirecting edges.
1482 We accomplish this by creating duplicates of BB and arranging for
1483 the duplicates to unconditionally pass control to one specific
1484 successor of BB. We then revector the incoming edges into BB to
1485 the appropriate duplicate of BB.
1487 If NOLOOP_ONLY is true, we only perform the threading as long as it
1488 does not affect the structure of the loops in a nontrivial way.
1490 If JOINERS is true, then thread through joiner blocks as well. */
1492 static bool
1493 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1495 /* E is an incoming edge into BB that we may or may not want to
1496 redirect to a duplicate of BB. */
1497 edge e, e2;
1498 edge_iterator ei;
1499 ssa_local_info_t local_info;
1501 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1503 /* To avoid scanning a linear array for the element we need we instead
1504 use a hash table. For normal code there should be no noticeable
1505 difference. However, if we have a block with a large number of
1506 incoming and outgoing edges such linear searches can get expensive. */
1507 redirection_data
1508 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1510 /* Record each unique threaded destination into a hash table for
1511 efficient lookups. */
1512 FOR_EACH_EDGE (e, ei, bb->preds)
1514 if (e->aux == NULL)
1515 continue;
1517 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1519 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1520 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1521 continue;
1523 e2 = path->last ()->e;
1524 if (!e2 || noloop_only)
1526 /* If NOLOOP_ONLY is true, we only allow threading through the
1527 header of a loop to exit edges. */
1529 /* One case occurs when there was loop header buried in a jump
1530 threading path that crosses loop boundaries. We do not try
1531 and thread this elsewhere, so just cancel the jump threading
1532 request by clearing the AUX field now. */
1533 if ((bb->loop_father != e2->src->loop_father
1534 && !loop_exit_edge_p (e2->src->loop_father, e2))
1535 || (e2->src->loop_father != e2->dest->loop_father
1536 && !loop_exit_edge_p (e2->src->loop_father, e2)))
1538 /* Since this case is not handled by our special code
1539 to thread through a loop header, we must explicitly
1540 cancel the threading request here. */
1541 delete_jump_thread_path (path);
1542 e->aux = NULL;
1543 continue;
1546 /* Another case occurs when trying to thread through our
1547 own loop header, possibly from inside the loop. We will
1548 thread these later. */
1549 unsigned int i;
1550 for (i = 1; i < path->length (); i++)
1552 if ((*path)[i]->e->src == bb->loop_father->header
1553 && (!loop_exit_edge_p (bb->loop_father, e2)
1554 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1555 break;
1558 if (i != path->length ())
1559 continue;
1562 /* Insert the outgoing edge into the hash table if it is not
1563 already in the hash table. */
1564 lookup_redirection_data (e, INSERT);
1567 /* We do not update dominance info. */
1568 free_dominance_info (CDI_DOMINATORS);
1570 /* We know we only thread through the loop header to loop exits.
1571 Let the basic block duplication hook know we are not creating
1572 a multiple entry loop. */
1573 if (noloop_only
1574 && bb == bb->loop_father->header)
1575 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1577 /* Now create duplicates of BB.
1579 Note that for a block with a high outgoing degree we can waste
1580 a lot of time and memory creating and destroying useless edges.
1582 So we first duplicate BB and remove the control structure at the
1583 tail of the duplicate as well as all outgoing edges from the
1584 duplicate. We then use that duplicate block as a template for
1585 the rest of the duplicates. */
1586 local_info.template_block = NULL;
1587 local_info.bb = bb;
1588 local_info.jumps_threaded = false;
1589 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1590 (&local_info);
1592 /* The template does not have an outgoing edge. Create that outgoing
1593 edge and update PHI nodes as the edge's target as necessary.
1595 We do this after creating all the duplicates to avoid creating
1596 unnecessary edges. */
1597 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1598 (&local_info);
1600 /* The hash table traversals above created the duplicate blocks (and the
1601 statements within the duplicate blocks). This loop creates PHI nodes for
1602 the duplicated blocks and redirects the incoming edges into BB to reach
1603 the duplicates of BB. */
1604 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1605 (&local_info);
1607 /* Done with this block. Clear REDIRECTION_DATA. */
1608 delete redirection_data;
1609 redirection_data = NULL;
1611 if (noloop_only
1612 && bb == bb->loop_father->header)
1613 set_loop_copy (bb->loop_father, NULL);
1615 BITMAP_FREE (local_info.duplicate_blocks);
1616 local_info.duplicate_blocks = NULL;
1618 /* Indicate to our caller whether or not any jumps were threaded. */
1619 return local_info.jumps_threaded;
1622 /* Wrapper for thread_block_1 so that we can first handle jump
1623 thread paths which do not involve copying joiner blocks, then
1624 handle jump thread paths which have joiner blocks.
1626 By doing things this way we can be as aggressive as possible and
1627 not worry that copying a joiner block will create a jump threading
1628 opportunity. */
1630 static bool
1631 thread_block (basic_block bb, bool noloop_only)
1633 bool retval;
1634 retval = thread_block_1 (bb, noloop_only, false);
1635 retval |= thread_block_1 (bb, noloop_only, true);
1636 return retval;
1640 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1641 copy of E->dest created during threading, or E->dest if it was not necessary
1642 to copy it (E is its single predecessor). */
1644 static basic_block
1645 thread_single_edge (edge e)
1647 basic_block bb = e->dest;
1648 struct redirection_data rd;
1649 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1650 edge eto = (*path)[1]->e;
1652 delete_jump_thread_path (path);
1653 e->aux = NULL;
1655 thread_stats.num_threaded_edges++;
1657 if (single_pred_p (bb))
1659 /* If BB has just a single predecessor, we should only remove the
1660 control statements at its end, and successors except for ETO. */
1661 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1663 /* And fixup the flags on the single remaining edge. */
1664 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1665 eto->flags |= EDGE_FALLTHRU;
1667 return bb;
1670 /* Otherwise, we need to create a copy. */
1671 if (e->dest == eto->src)
1672 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1674 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1675 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1676 npath->safe_push (x);
1678 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1679 npath->safe_push (x);
1680 rd.path = npath;
1682 create_block_for_threading (bb, &rd, 0, NULL);
1683 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1684 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1686 if (dump_file && (dump_flags & TDF_DETAILS))
1687 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1688 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1690 rd.dup_blocks[0]->count = e->count;
1691 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1692 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1693 redirect_edge_and_branch (e, rd.dup_blocks[0]);
1694 flush_pending_stmts (e);
1696 delete_jump_thread_path (npath);
1697 return rd.dup_blocks[0];
1700 /* Callback for dfs_enumerate_from. Returns true if BB is different
1701 from STOP and DBDS_CE_STOP. */
1703 static basic_block dbds_ce_stop;
1704 static bool
1705 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1707 return (bb != (const_basic_block) stop
1708 && bb != dbds_ce_stop);
1711 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1712 returns the state. */
1714 enum bb_dom_status
1716 /* BB does not dominate latch of the LOOP. */
1717 DOMST_NONDOMINATING,
1718 /* The LOOP is broken (there is no path from the header to its latch. */
1719 DOMST_LOOP_BROKEN,
1720 /* BB dominates the latch of the LOOP. */
1721 DOMST_DOMINATING
1724 static enum bb_dom_status
1725 determine_bb_domination_status (struct loop *loop, basic_block bb)
1727 basic_block *bblocks;
1728 unsigned nblocks, i;
1729 bool bb_reachable = false;
1730 edge_iterator ei;
1731 edge e;
1733 /* This function assumes BB is a successor of LOOP->header.
1734 If that is not the case return DOMST_NONDOMINATING which
1735 is always safe. */
1737 bool ok = false;
1739 FOR_EACH_EDGE (e, ei, bb->preds)
1741 if (e->src == loop->header)
1743 ok = true;
1744 break;
1748 if (!ok)
1749 return DOMST_NONDOMINATING;
1752 if (bb == loop->latch)
1753 return DOMST_DOMINATING;
1755 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1756 from it. */
1758 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1759 dbds_ce_stop = loop->header;
1760 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1761 bblocks, loop->num_nodes, bb);
1762 for (i = 0; i < nblocks; i++)
1763 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1765 if (e->src == loop->header)
1767 free (bblocks);
1768 return DOMST_NONDOMINATING;
1770 if (e->src == bb)
1771 bb_reachable = true;
1774 free (bblocks);
1775 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1778 /* Return true if BB is part of the new pre-header that is created
1779 when threading the latch to DATA. */
1781 static bool
1782 def_split_header_continue_p (const_basic_block bb, const void *data)
1784 const_basic_block new_header = (const_basic_block) data;
1785 const struct loop *l;
1787 if (bb == new_header
1788 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1789 return false;
1790 for (l = bb->loop_father; l; l = loop_outer (l))
1791 if (l == new_header->loop_father)
1792 return true;
1793 return false;
1796 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1797 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1798 to the inside of the loop. */
1800 static bool
1801 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1803 basic_block header = loop->header;
1804 edge e, tgt_edge, latch = loop_latch_edge (loop);
1805 edge_iterator ei;
1806 basic_block tgt_bb, atgt_bb;
1807 enum bb_dom_status domst;
1809 /* We have already threaded through headers to exits, so all the threading
1810 requests now are to the inside of the loop. We need to avoid creating
1811 irreducible regions (i.e., loops with more than one entry block), and
1812 also loop with several latch edges, or new subloops of the loop (although
1813 there are cases where it might be appropriate, it is difficult to decide,
1814 and doing it wrongly may confuse other optimizers).
1816 We could handle more general cases here. However, the intention is to
1817 preserve some information about the loop, which is impossible if its
1818 structure changes significantly, in a way that is not well understood.
1819 Thus we only handle few important special cases, in which also updating
1820 of the loop-carried information should be feasible:
1822 1) Propagation of latch edge to a block that dominates the latch block
1823 of a loop. This aims to handle the following idiom:
1825 first = 1;
1826 while (1)
1828 if (first)
1829 initialize;
1830 first = 0;
1831 body;
1834 After threading the latch edge, this becomes
1836 first = 1;
1837 if (first)
1838 initialize;
1839 while (1)
1841 first = 0;
1842 body;
1845 The original header of the loop is moved out of it, and we may thread
1846 the remaining edges through it without further constraints.
1848 2) All entry edges are propagated to a single basic block that dominates
1849 the latch block of the loop. This aims to handle the following idiom
1850 (normally created for "for" loops):
1852 i = 0;
1853 while (1)
1855 if (i >= 100)
1856 break;
1857 body;
1858 i++;
1861 This becomes
1863 i = 0;
1864 while (1)
1866 body;
1867 i++;
1868 if (i >= 100)
1869 break;
1873 /* Threading through the header won't improve the code if the header has just
1874 one successor. */
1875 if (single_succ_p (header))
1876 goto fail;
1878 /* If we threaded the latch using a joiner block, we cancel the
1879 threading opportunity out of an abundance of caution. However,
1880 still allow threading from outside to inside the loop. */
1881 if (latch->aux)
1883 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1884 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1886 delete_jump_thread_path (path);
1887 latch->aux = NULL;
1891 if (latch->aux)
1893 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1894 tgt_edge = (*path)[1]->e;
1895 tgt_bb = tgt_edge->dest;
1897 else if (!may_peel_loop_headers
1898 && !redirection_block_p (loop->header))
1899 goto fail;
1900 else
1902 tgt_bb = NULL;
1903 tgt_edge = NULL;
1904 FOR_EACH_EDGE (e, ei, header->preds)
1906 if (!e->aux)
1908 if (e == latch)
1909 continue;
1911 /* If latch is not threaded, and there is a header
1912 edge that is not threaded, we would create loop
1913 with multiple entries. */
1914 goto fail;
1917 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1919 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1920 goto fail;
1921 tgt_edge = (*path)[1]->e;
1922 atgt_bb = tgt_edge->dest;
1923 if (!tgt_bb)
1924 tgt_bb = atgt_bb;
1925 /* Two targets of threading would make us create loop
1926 with multiple entries. */
1927 else if (tgt_bb != atgt_bb)
1928 goto fail;
1931 if (!tgt_bb)
1933 /* There are no threading requests. */
1934 return false;
1937 /* Redirecting to empty loop latch is useless. */
1938 if (tgt_bb == loop->latch
1939 && empty_block_p (loop->latch))
1940 goto fail;
1943 /* The target block must dominate the loop latch, otherwise we would be
1944 creating a subloop. */
1945 domst = determine_bb_domination_status (loop, tgt_bb);
1946 if (domst == DOMST_NONDOMINATING)
1947 goto fail;
1948 if (domst == DOMST_LOOP_BROKEN)
1950 /* If the loop ceased to exist, mark it as such, and thread through its
1951 original header. */
1952 mark_loop_for_removal (loop);
1953 return thread_block (header, false);
1956 if (tgt_bb->loop_father->header == tgt_bb)
1958 /* If the target of the threading is a header of a subloop, we need
1959 to create a preheader for it, so that the headers of the two loops
1960 do not merge. */
1961 if (EDGE_COUNT (tgt_bb->preds) > 2)
1963 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1964 gcc_assert (tgt_bb != NULL);
1966 else
1967 tgt_bb = split_edge (tgt_edge);
1970 if (latch->aux)
1972 basic_block *bblocks;
1973 unsigned nblocks, i;
1975 /* First handle the case latch edge is redirected. We are copying
1976 the loop header but not creating a multiple entry loop. Make the
1977 cfg manipulation code aware of that fact. */
1978 set_loop_copy (loop, loop);
1979 loop->latch = thread_single_edge (latch);
1980 set_loop_copy (loop, NULL);
1981 gcc_assert (single_succ (loop->latch) == tgt_bb);
1982 loop->header = tgt_bb;
1984 /* Remove the new pre-header blocks from our loop. */
1985 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1986 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1987 bblocks, loop->num_nodes, tgt_bb);
1988 for (i = 0; i < nblocks; i++)
1989 if (bblocks[i]->loop_father == loop)
1991 remove_bb_from_loops (bblocks[i]);
1992 add_bb_to_loop (bblocks[i], loop_outer (loop));
1994 free (bblocks);
1996 /* If the new header has multiple latches mark it so. */
1997 FOR_EACH_EDGE (e, ei, loop->header->preds)
1998 if (e->src->loop_father == loop
1999 && e->src != loop->latch)
2001 loop->latch = NULL;
2002 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
2005 /* Cancel remaining threading requests that would make the
2006 loop a multiple entry loop. */
2007 FOR_EACH_EDGE (e, ei, header->preds)
2009 edge e2;
2011 if (e->aux == NULL)
2012 continue;
2014 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2015 e2 = path->last ()->e;
2017 if (e->src->loop_father != e2->dest->loop_father
2018 && e2->dest != loop->header)
2020 delete_jump_thread_path (path);
2021 e->aux = NULL;
2025 /* Thread the remaining edges through the former header. */
2026 thread_block (header, false);
2028 else
2030 basic_block new_preheader;
2032 /* Now consider the case entry edges are redirected to the new entry
2033 block. Remember one entry edge, so that we can find the new
2034 preheader (its destination after threading). */
2035 FOR_EACH_EDGE (e, ei, header->preds)
2037 if (e->aux)
2038 break;
2041 /* The duplicate of the header is the new preheader of the loop. Ensure
2042 that it is placed correctly in the loop hierarchy. */
2043 set_loop_copy (loop, loop_outer (loop));
2045 thread_block (header, false);
2046 set_loop_copy (loop, NULL);
2047 new_preheader = e->dest;
2049 /* Create the new latch block. This is always necessary, as the latch
2050 must have only a single successor, but the original header had at
2051 least two successors. */
2052 loop->latch = NULL;
2053 mfb_kj_edge = single_succ_edge (new_preheader);
2054 loop->header = mfb_kj_edge->dest;
2055 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2056 loop->header = latch->dest;
2057 loop->latch = latch->src;
2060 return true;
2062 fail:
2063 /* We failed to thread anything. Cancel the requests. */
2064 FOR_EACH_EDGE (e, ei, header->preds)
2066 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2068 if (path)
2070 delete_jump_thread_path (path);
2071 e->aux = NULL;
2074 return false;
2077 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2078 PHI arguments associated with those edges are equal or there are no
2079 PHI arguments, otherwise return FALSE. */
2081 static bool
2082 phi_args_equal_on_edges (edge e1, edge e2)
2084 gphi_iterator gsi;
2085 int indx1 = e1->dest_idx;
2086 int indx2 = e2->dest_idx;
2088 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2090 gphi *phi = gsi.phi ();
2092 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2093 gimple_phi_arg_def (phi, indx2), 0))
2094 return false;
2096 return true;
2099 /* Walk through the registered jump threads and convert them into a
2100 form convenient for this pass.
2102 Any block which has incoming edges threaded to outgoing edges
2103 will have its entry in THREADED_BLOCK set.
2105 Any threaded edge will have its new outgoing edge stored in the
2106 original edge's AUX field.
2108 This form avoids the need to walk all the edges in the CFG to
2109 discover blocks which need processing and avoids unnecessary
2110 hash table lookups to map from threaded edge to new target. */
2112 static void
2113 mark_threaded_blocks (bitmap threaded_blocks)
2115 unsigned int i;
2116 bitmap_iterator bi;
2117 bitmap tmp = BITMAP_ALLOC (NULL);
2118 basic_block bb;
2119 edge e;
2120 edge_iterator ei;
2122 /* It is possible to have jump threads in which one is a subpath
2123 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2124 block and (B, C), (C, D) where no joiner block exists.
2126 When this occurs ignore the jump thread request with the joiner
2127 block. It's totally subsumed by the simpler jump thread request.
2129 This results in less block copying, simpler CFGs. More importantly,
2130 when we duplicate the joiner block, B, in this case we will create
2131 a new threading opportunity that we wouldn't be able to optimize
2132 until the next jump threading iteration.
2134 So first convert the jump thread requests which do not require a
2135 joiner block. */
2136 for (i = 0; i < paths.length (); i++)
2138 vec<jump_thread_edge *> *path = paths[i];
2140 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2142 edge e = (*path)[0]->e;
2143 e->aux = (void *)path;
2144 bitmap_set_bit (tmp, e->dest->index);
2148 /* Now iterate again, converting cases where we want to thread
2149 through a joiner block, but only if no other edge on the path
2150 already has a jump thread attached to it. We do this in two passes,
2151 to avoid situations where the order in the paths vec can hide overlapping
2152 threads (the path is recorded on the incoming edge, so we would miss
2153 cases where the second path starts at a downstream edge on the same
2154 path). First record all joiner paths, deleting any in the unexpected
2155 case where there is already a path for that incoming edge. */
2156 for (i = 0; i < paths.length ();)
2158 vec<jump_thread_edge *> *path = paths[i];
2160 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2162 /* Attach the path to the starting edge if none is yet recorded. */
2163 if ((*path)[0]->e->aux == NULL)
2165 (*path)[0]->e->aux = path;
2166 i++;
2168 else
2170 paths.unordered_remove (i);
2171 if (dump_file && (dump_flags & TDF_DETAILS))
2172 dump_jump_thread_path (dump_file, *path, false);
2173 delete_jump_thread_path (path);
2176 else
2178 i++;
2182 /* Second, look for paths that have any other jump thread attached to
2183 them, and either finish converting them or cancel them. */
2184 for (i = 0; i < paths.length ();)
2186 vec<jump_thread_edge *> *path = paths[i];
2187 edge e = (*path)[0]->e;
2189 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2191 unsigned int j;
2192 for (j = 1; j < path->length (); j++)
2193 if ((*path)[j]->e->aux != NULL)
2194 break;
2196 /* If we iterated through the entire path without exiting the loop,
2197 then we are good to go, record it. */
2198 if (j == path->length ())
2200 bitmap_set_bit (tmp, e->dest->index);
2201 i++;
2203 else
2205 e->aux = NULL;
2206 paths.unordered_remove (i);
2207 if (dump_file && (dump_flags & TDF_DETAILS))
2208 dump_jump_thread_path (dump_file, *path, false);
2209 delete_jump_thread_path (path);
2212 else
2214 i++;
2218 /* If optimizing for size, only thread through block if we don't have
2219 to duplicate it or it's an otherwise empty redirection block. */
2220 if (optimize_function_for_size_p (cfun))
2222 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2224 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2225 if (EDGE_COUNT (bb->preds) > 1
2226 && !redirection_block_p (bb))
2228 FOR_EACH_EDGE (e, ei, bb->preds)
2230 if (e->aux)
2232 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2233 delete_jump_thread_path (path);
2234 e->aux = NULL;
2238 else
2239 bitmap_set_bit (threaded_blocks, i);
2242 else
2243 bitmap_copy (threaded_blocks, tmp);
2245 /* Look for jump threading paths which cross multiple loop headers.
2247 The code to thread through loop headers will change the CFG in ways
2248 that break assumptions made by the loop optimization code.
2250 We don't want to blindly cancel the requests. We can instead do better
2251 by trimming off the end of the jump thread path. */
2252 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2254 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2255 FOR_EACH_EDGE (e, ei, bb->preds)
2257 if (e->aux)
2259 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2261 for (unsigned int i = 0, crossed_headers = 0;
2262 i < path->length ();
2263 i++)
2265 basic_block dest = (*path)[i]->e->dest;
2266 crossed_headers += (dest == dest->loop_father->header);
2267 if (crossed_headers > 1)
2269 /* Trim from entry I onwards. */
2270 for (unsigned int j = i; j < path->length (); j++)
2271 delete (*path)[j];
2272 path->truncate (i);
2274 /* Now that we've truncated the path, make sure
2275 what's left is still valid. We need at least
2276 two edges on the path and the last edge can not
2277 be a joiner. This should never happen, but let's
2278 be safe. */
2279 if (path->length () < 2
2280 || (path->last ()->type
2281 == EDGE_COPY_SRC_JOINER_BLOCK))
2283 delete_jump_thread_path (path);
2284 e->aux = NULL;
2286 break;
2293 /* If we have a joiner block (J) which has two successors S1 and S2 and
2294 we are threading though S1 and the final destination of the thread
2295 is S2, then we must verify that any PHI nodes in S2 have the same
2296 PHI arguments for the edge J->S2 and J->S1->...->S2.
2298 We used to detect this prior to registering the jump thread, but
2299 that prohibits propagation of edge equivalences into non-dominated
2300 PHI nodes as the equivalency test might occur before propagation.
2302 This must also occur after we truncate any jump threading paths
2303 as this scenario may only show up after truncation.
2305 This works for now, but will need improvement as part of the FSA
2306 optimization.
2308 Note since we've moved the thread request data to the edges,
2309 we have to iterate on those rather than the threaded_edges vector. */
2310 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2312 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2313 FOR_EACH_EDGE (e, ei, bb->preds)
2315 if (e->aux)
2317 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2318 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2320 if (have_joiner)
2322 basic_block joiner = e->dest;
2323 edge final_edge = path->last ()->e;
2324 basic_block final_dest = final_edge->dest;
2325 edge e2 = find_edge (joiner, final_dest);
2327 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2329 delete_jump_thread_path (path);
2330 e->aux = NULL;
2337 BITMAP_FREE (tmp);
2341 /* Return TRUE if BB ends with a switch statement or a computed goto.
2342 Otherwise return false. */
2343 static bool
2344 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2346 gimple *stmt = last_stmt (bb);
2347 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2348 return true;
2349 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2350 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2351 return true;
2352 return false;
2355 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2356 case of SEME Single Entry Multiple Exits region in which all nodes in the
2357 REGION have exactly one incoming edge. The only exception is the first block
2358 that may not have been connected to the rest of the cfg yet. */
2360 DEBUG_FUNCTION void
2361 verify_jump_thread (basic_block *region, unsigned n_region)
2363 for (unsigned i = 0; i < n_region; i++)
2364 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2367 /* Return true when BB is one of the first N items in BBS. */
2369 static inline bool
2370 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2372 for (int i = 0; i < n; i++)
2373 if (bb == bbs[i])
2374 return true;
2376 return false;
2379 /* Duplicates a jump-thread path of N_REGION basic blocks.
2380 The ENTRY edge is redirected to the duplicate of the region.
2382 Remove the last conditional statement in the last basic block in the REGION,
2383 and create a single fallthru edge pointing to the same destination as the
2384 EXIT edge.
2386 The new basic blocks are stored to REGION_COPY in the same order as they had
2387 in REGION, provided that REGION_COPY is not NULL.
2389 Returns false if it is unable to copy the region, true otherwise. */
2391 static bool
2392 duplicate_thread_path (edge entry, edge exit,
2393 basic_block *region, unsigned n_region,
2394 basic_block *region_copy)
2396 unsigned i;
2397 bool free_region_copy = false;
2398 struct loop *loop = entry->dest->loop_father;
2399 edge exit_copy;
2400 edge redirected;
2401 int total_freq = 0, entry_freq = 0;
2402 gcov_type total_count = 0, entry_count = 0;
2404 if (!can_copy_bbs_p (region, n_region))
2405 return false;
2407 /* Some sanity checking. Note that we do not check for all possible
2408 missuses of the functions. I.e. if you ask to copy something weird,
2409 it will work, but the state of structures probably will not be
2410 correct. */
2411 for (i = 0; i < n_region; i++)
2413 /* We do not handle subloops, i.e. all the blocks must belong to the
2414 same loop. */
2415 if (region[i]->loop_father != loop)
2416 return false;
2419 initialize_original_copy_tables ();
2421 set_loop_copy (loop, loop);
2423 if (!region_copy)
2425 region_copy = XNEWVEC (basic_block, n_region);
2426 free_region_copy = true;
2429 if (entry->dest->count)
2431 total_count = entry->dest->count;
2432 entry_count = entry->count;
2433 /* Fix up corner cases, to avoid division by zero or creation of negative
2434 frequencies. */
2435 if (entry_count > total_count)
2436 entry_count = total_count;
2438 else
2440 total_freq = entry->dest->frequency;
2441 entry_freq = EDGE_FREQUENCY (entry);
2442 /* Fix up corner cases, to avoid division by zero or creation of negative
2443 frequencies. */
2444 if (total_freq == 0)
2445 total_freq = 1;
2446 else if (entry_freq > total_freq)
2447 entry_freq = total_freq;
2450 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2451 split_edge_bb_loc (entry), false);
2453 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2454 following code ensures that all the edges exiting the jump-thread path are
2455 redirected back to the original code: these edges are exceptions
2456 invalidating the property that is propagated by executing all the blocks of
2457 the jump-thread path in order. */
2459 for (i = 0; i < n_region; i++)
2461 edge e;
2462 edge_iterator ei;
2463 basic_block bb = region_copy[i];
2465 if (single_succ_p (bb))
2467 /* Make sure the successor is the next node in the path. */
2468 gcc_assert (i + 1 == n_region
2469 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2470 continue;
2473 /* Special case the last block on the path: make sure that it does not
2474 jump back on the copied path. */
2475 if (i + 1 == n_region)
2477 FOR_EACH_EDGE (e, ei, bb->succs)
2478 if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2480 basic_block orig = get_bb_original (e->dest);
2481 if (orig)
2482 redirect_edge_and_branch_force (e, orig);
2484 continue;
2487 /* Redirect all other edges jumping to non-adjacent blocks back to the
2488 original code. */
2489 FOR_EACH_EDGE (e, ei, bb->succs)
2490 if (region_copy[i + 1] != e->dest)
2492 basic_block orig = get_bb_original (e->dest);
2493 if (orig)
2494 redirect_edge_and_branch_force (e, orig);
2498 if (total_count)
2500 scale_bbs_frequencies_gcov_type (region, n_region,
2501 total_count - entry_count,
2502 total_count);
2503 scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2504 total_count);
2506 else
2508 scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2509 total_freq);
2510 scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2513 #ifdef ENABLE_CHECKING
2514 verify_jump_thread (region_copy, n_region);
2515 #endif
2517 /* Remove the last branch in the jump thread path. */
2518 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2520 /* And fixup the flags on the single remaining edge. */
2521 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2522 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2523 fix_e->flags |= EDGE_FALLTHRU;
2525 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2527 if (e) {
2528 rescan_loop_exit (e, true, false);
2529 e->probability = REG_BR_PROB_BASE;
2530 e->count = region_copy[n_region - 1]->count;
2533 /* Redirect the entry and add the phi node arguments. */
2534 if (entry->dest == loop->header)
2535 mark_loop_for_removal (loop);
2536 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2537 gcc_assert (redirected != NULL);
2538 flush_pending_stmts (entry);
2540 /* Add the other PHI node arguments. */
2541 add_phi_args_after_copy (region_copy, n_region, NULL);
2543 if (free_region_copy)
2544 free (region_copy);
2546 free_original_copy_tables ();
2547 return true;
2550 /* Return true when PATH is a valid jump-thread path. */
2552 static bool
2553 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2555 unsigned len = path->length ();
2557 /* Check that the path is connected. */
2558 for (unsigned int j = 0; j < len - 1; j++)
2559 if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2560 return false;
2562 return true;
2565 /* Remove any queued jump threads that include edge E.
2567 We don't actually remove them here, just record the edges into ax
2568 hash table. That way we can do the search once per iteration of
2569 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2571 void
2572 remove_jump_threads_including (edge_def *e)
2574 if (!paths.exists ())
2575 return;
2577 if (!removed_edges)
2578 removed_edges = new hash_table<struct removed_edges> (17);
2580 edge *slot = removed_edges->find_slot (e, INSERT);
2581 *slot = e;
2584 /* Walk through all blocks and thread incoming edges to the appropriate
2585 outgoing edge for each edge pair recorded in THREADED_EDGES.
2587 It is the caller's responsibility to fix the dominance information
2588 and rewrite duplicated SSA_NAMEs back into SSA form.
2590 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2591 loop headers if it does not simplify the loop.
2593 Returns true if one or more edges were threaded, false otherwise. */
2595 bool
2596 thread_through_all_blocks (bool may_peel_loop_headers)
2598 bool retval = false;
2599 unsigned int i;
2600 bitmap_iterator bi;
2601 bitmap threaded_blocks;
2602 struct loop *loop;
2604 if (!paths.exists ())
2606 retval = false;
2607 goto out;
2610 threaded_blocks = BITMAP_ALLOC (NULL);
2611 memset (&thread_stats, 0, sizeof (thread_stats));
2613 /* Remove any paths that referenced removed edges. */
2614 if (removed_edges)
2615 for (i = 0; i < paths.length (); )
2617 unsigned int j;
2618 vec<jump_thread_edge *> *path = paths[i];
2620 for (j = 0; j < path->length (); j++)
2622 edge e = (*path)[j]->e;
2623 if (removed_edges->find_slot (e, NO_INSERT))
2624 break;
2627 if (j != path->length ())
2629 delete_jump_thread_path (path);
2630 paths.unordered_remove (i);
2631 continue;
2633 i++;
2636 /* Jump-thread all FSM threads before other jump-threads. */
2637 for (i = 0; i < paths.length ();)
2639 vec<jump_thread_edge *> *path = paths[i];
2640 edge entry = (*path)[0]->e;
2642 /* Only code-generate FSM jump-threads in this loop. */
2643 if ((*path)[0]->type != EDGE_FSM_THREAD)
2645 i++;
2646 continue;
2649 /* Do not jump-thread twice from the same block. */
2650 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2651 /* Verify that the jump thread path is still valid: a
2652 previous jump-thread may have changed the CFG, and
2653 invalidated the current path. */
2654 || !valid_jump_thread_path (path))
2656 /* Remove invalid FSM jump-thread paths. */
2657 delete_jump_thread_path (path);
2658 paths.unordered_remove (i);
2659 continue;
2662 unsigned len = path->length ();
2663 edge exit = (*path)[len - 1]->e;
2664 basic_block *region = XNEWVEC (basic_block, len - 1);
2666 for (unsigned int j = 0; j < len - 1; j++)
2667 region[j] = (*path)[j]->e->dest;
2669 if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2671 /* We do not update dominance info. */
2672 free_dominance_info (CDI_DOMINATORS);
2673 bitmap_set_bit (threaded_blocks, entry->src->index);
2674 retval = true;
2675 thread_stats.num_threaded_edges++;
2678 delete_jump_thread_path (path);
2679 paths.unordered_remove (i);
2682 /* Remove from PATHS all the jump-threads starting with an edge already
2683 jump-threaded. */
2684 for (i = 0; i < paths.length ();)
2686 vec<jump_thread_edge *> *path = paths[i];
2687 edge entry = (*path)[0]->e;
2689 /* Do not jump-thread twice from the same block. */
2690 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2692 delete_jump_thread_path (path);
2693 paths.unordered_remove (i);
2695 else
2696 i++;
2699 bitmap_clear (threaded_blocks);
2701 mark_threaded_blocks (threaded_blocks);
2703 initialize_original_copy_tables ();
2705 /* First perform the threading requests that do not affect
2706 loop structure. */
2707 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2709 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2711 if (EDGE_COUNT (bb->preds) > 0)
2712 retval |= thread_block (bb, true);
2715 /* Then perform the threading through loop headers. We start with the
2716 innermost loop, so that the changes in cfg we perform won't affect
2717 further threading. */
2718 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2720 if (!loop->header
2721 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2722 continue;
2724 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2727 /* Any jump threading paths that are still attached to edges at this
2728 point must be one of two cases.
2730 First, we could have a jump threading path which went from outside
2731 a loop to inside a loop that was ignored because a prior jump thread
2732 across a backedge was realized (which indirectly causes the loop
2733 above to ignore the latter thread). We can detect these because the
2734 loop structures will be different and we do not currently try to
2735 optimize this case.
2737 Second, we could be threading across a backedge to a point within the
2738 same loop. This occurrs for the FSA/FSM optimization and we would
2739 like to optimize it. However, we have to be very careful as this
2740 may completely scramble the loop structures, with the result being
2741 irreducible loops causing us to throw away our loop structure.
2743 As a compromise for the latter case, if the thread path ends in
2744 a block where the last statement is a multiway branch, then go
2745 ahead and thread it, else ignore it. */
2746 basic_block bb;
2747 edge e;
2748 FOR_EACH_BB_FN (bb, cfun)
2750 /* If we do end up threading here, we can remove elements from
2751 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2752 for (edge_iterator ei = ei_start (bb->preds);
2753 (e = ei_safe_edge (ei));)
2754 if (e->aux)
2756 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2758 /* Case 1, threading from outside to inside the loop
2759 after we'd already threaded through the header. */
2760 if ((*path)[0]->e->dest->loop_father
2761 != path->last ()->e->src->loop_father)
2763 delete_jump_thread_path (path);
2764 e->aux = NULL;
2765 ei_next (&ei);
2767 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2769 /* The code to thread through loop headers may have
2770 split a block with jump threads attached to it.
2772 We can identify this with a disjoint jump threading
2773 path. If found, just remove it. */
2774 for (unsigned int i = 0; i < path->length () - 1; i++)
2775 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2777 delete_jump_thread_path (path);
2778 e->aux = NULL;
2779 ei_next (&ei);
2780 break;
2783 /* Our path is still valid, thread it. */
2784 if (e->aux)
2786 if (thread_block ((*path)[0]->e->dest, false))
2787 e->aux = NULL;
2788 else
2790 delete_jump_thread_path (path);
2791 e->aux = NULL;
2792 ei_next (&ei);
2796 else
2798 delete_jump_thread_path (path);
2799 e->aux = NULL;
2800 ei_next (&ei);
2803 else
2804 ei_next (&ei);
2807 statistics_counter_event (cfun, "Jumps threaded",
2808 thread_stats.num_threaded_edges);
2810 free_original_copy_tables ();
2812 BITMAP_FREE (threaded_blocks);
2813 threaded_blocks = NULL;
2814 paths.release ();
2816 if (retval)
2817 loops_state_set (LOOPS_NEED_FIXUP);
2819 out:
2820 delete removed_edges;
2821 removed_edges = NULL;
2822 return retval;
2825 /* Delete the jump threading path PATH. We have to explcitly delete
2826 each entry in the vector, then the container. */
2828 void
2829 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2831 for (unsigned int i = 0; i < path->length (); i++)
2832 delete (*path)[i];
2833 path->release();
2834 delete path;
2837 /* Register a jump threading opportunity. We queue up all the jump
2838 threading opportunities discovered by a pass and update the CFG
2839 and SSA form all at once.
2841 E is the edge we can thread, E2 is the new target edge, i.e., we
2842 are effectively recording that E->dest can be changed to E2->dest
2843 after fixing the SSA graph. */
2845 void
2846 register_jump_thread (vec<jump_thread_edge *> *path)
2848 if (!dbg_cnt (registered_jump_thread))
2850 delete_jump_thread_path (path);
2851 return;
2854 /* First make sure there are no NULL outgoing edges on the jump threading
2855 path. That can happen for jumping to a constant address. */
2856 for (unsigned int i = 0; i < path->length (); i++)
2857 if ((*path)[i]->e == NULL)
2859 if (dump_file && (dump_flags & TDF_DETAILS))
2861 fprintf (dump_file,
2862 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2863 dump_jump_thread_path (dump_file, *path, false);
2866 delete_jump_thread_path (path);
2867 return;
2870 if (dump_file && (dump_flags & TDF_DETAILS))
2871 dump_jump_thread_path (dump_file, *path, true);
2873 if (!paths.exists ())
2874 paths.create (5);
2876 paths.safe_push (path);