1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "fold-const.h"
31 #include "gimple-iterator.h"
33 #include "tree-ssa-threadupdate.h"
37 #include "tree-vectorizer.h"
39 /* Given a block B, update the CFG and SSA graph to reflect redirecting
40 one or more in-edges to B to instead reach the destination of an
41 out-edge from B while preserving any side effects in B.
43 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
44 side effects of executing B.
46 1. Make a copy of B (including its outgoing edges and statements). Call
47 the copy B'. Note B' has no incoming edges or PHIs at this time.
49 2. Remove the control statement at the end of B' and all outgoing edges
52 3. Add a new argument to each PHI in C with the same value as the existing
53 argument associated with edge B->C. Associate the new PHI arguments
56 4. For each PHI in B, find or create a PHI in B' with an identical
57 PHI_RESULT. Add an argument to the PHI in B' which has the same
58 value as the PHI in B associated with the edge A->B. Associate
59 the new argument in the PHI in B' with the edge A->B.
61 5. Change the edge A->B to A->B'.
63 5a. This automatically deletes any PHI arguments associated with the
66 5b. This automatically associates each new argument added in step 4
69 6. Repeat for other incoming edges into B.
71 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73 Note that block duplication can be minimized by first collecting the
74 set of unique destination blocks that the incoming edges should
77 We reduce the number of edges and statements we create by not copying all
78 the outgoing edges and the control statement in step #1. We instead create
79 a template block without the outgoing edges and duplicate the template.
81 Another case this code handles is threading through a "joiner" block. In
82 this case, we do not know the destination of the joiner block, but one
83 of the outgoing edges from the joiner block leads to a threadable path. This
84 case largely works as outlined above, except the duplicate of the joiner
85 block still contains a full set of outgoing edges and its control statement.
86 We just redirect one of its outgoing edges to our jump threading path. */
89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
90 all the incoming edges which thread to the same destination edge at
91 the same time. That avoids lots of table lookups to get information
92 for the destination edge.
94 To realize that implementation we create a list of incoming edges
95 which thread to the same outgoing edge. Thus to implement steps
96 #5 and #6 we traverse our hash table of outgoing edge information.
97 For each entry we walk the list of incoming edges which thread to
98 the current outgoing edge. */
106 /* Main data structure recording information regarding B's duplicate
109 /* We need to efficiently record the unique thread destinations of this
110 block and specific information associated with those destinations. We
111 may have many incoming edges threaded to the same outgoing edge. This
112 can be naturally implemented with a hash table. */
114 struct redirection_data
: free_ptr_hash
<redirection_data
>
116 /* We support wiring up two block duplicates in a jump threading path.
118 One is a normal block copy where we remove the control statement
119 and wire up its single remaining outgoing edge to the thread path.
121 The other is a joiner block where we leave the control statement
122 in place, but wire one of the outgoing edges to a thread path.
124 In theory we could have multiple block duplicates in a jump
125 threading path, but I haven't tried that.
127 The duplicate blocks appear in this array in the same order in
128 which they appear in the jump thread path. */
129 basic_block dup_blocks
[2];
131 /* The jump threading path. */
132 vec
<jump_thread_edge
*> *path
;
134 /* A list of incoming edges which we want to thread to the
136 struct el
*incoming_edges
;
138 /* hash_table support. */
139 static inline hashval_t
hash (const redirection_data
*);
140 static inline int equal (const redirection_data
*, const redirection_data
*);
143 /* Dump a jump threading path, including annotations about each
147 dump_jump_thread_path (FILE *dump_file
, vec
<jump_thread_edge
*> path
,
151 " %s%s jump thread: (%d, %d) incoming edge; ",
152 (registering
? "Registering" : "Cancelling"),
153 (path
[0]->type
== EDGE_FSM_THREAD
? " FSM": ""),
154 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
156 for (unsigned int i
= 1; i
< path
.length (); i
++)
158 /* We can get paths with a NULL edge when the final destination
159 of a jump thread turns out to be a constant address. We dump
160 those paths when debugging, so we have to be prepared for that
162 if (path
[i
]->e
== NULL
)
165 if (path
[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
166 fprintf (dump_file
, " (%d, %d) joiner; ",
167 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
168 if (path
[i
]->type
== EDGE_COPY_SRC_BLOCK
)
169 fprintf (dump_file
, " (%d, %d) normal;",
170 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
171 if (path
[i
]->type
== EDGE_NO_COPY_SRC_BLOCK
)
172 fprintf (dump_file
, " (%d, %d) nocopy;",
173 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
174 if (path
[0]->type
== EDGE_FSM_THREAD
)
175 fprintf (dump_file
, " (%d, %d) ",
176 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
178 fputc ('\n', dump_file
);
181 /* Simple hashing function. For any given incoming edge E, we're going
182 to be most concerned with the final destination of its jump thread
183 path. So hash on the block index of the final edge in the path. */
186 redirection_data::hash (const redirection_data
*p
)
188 vec
<jump_thread_edge
*> *path
= p
->path
;
189 return path
->last ()->e
->dest
->index
;
192 /* Given two hash table entries, return true if they have the same
193 jump threading path. */
195 redirection_data::equal (const redirection_data
*p1
, const redirection_data
*p2
)
197 vec
<jump_thread_edge
*> *path1
= p1
->path
;
198 vec
<jump_thread_edge
*> *path2
= p2
->path
;
200 if (path1
->length () != path2
->length ())
203 for (unsigned int i
= 1; i
< path1
->length (); i
++)
205 if ((*path1
)[i
]->type
!= (*path2
)[i
]->type
206 || (*path1
)[i
]->e
!= (*path2
)[i
]->e
)
213 /* Rather than search all the edges in jump thread paths each time
214 DOM is able to simply if control statement, we build a hash table
215 with the deleted edges. We only care about the address of the edge,
217 struct removed_edges
: nofree_ptr_hash
<edge_def
>
219 static hashval_t
hash (edge e
) { return htab_hash_pointer (e
); }
220 static bool equal (edge e1
, edge e2
) { return e1
== e2
; }
223 static hash_table
<removed_edges
> *removed_edges
;
225 /* Data structure of information to pass to hash table traversal routines. */
226 struct ssa_local_info_t
228 /* The current block we are working on. */
231 /* We only create a template block for the first duplicated block in a
232 jump threading path as we may need many duplicates of that block.
234 The second duplicate block in a path is specific to that path. Creating
235 and sharing a template for that block is considerably more difficult. */
236 basic_block template_block
;
238 /* Blocks duplicated for the thread. */
239 bitmap duplicate_blocks
;
241 /* TRUE if we thread one or more jumps, FALSE otherwise. */
244 /* When we have multiple paths through a joiner which reach different
245 final destinations, then we may need to correct for potential
246 profile insanities. */
247 bool need_profile_correction
;
250 /* Passes which use the jump threading code register jump threading
251 opportunities as they are discovered. We keep the registered
252 jump threading opportunities in this vector as edge pairs
253 (original_edge, target_edge). */
254 static vec
<vec
<jump_thread_edge
*> *> paths
;
256 /* When we start updating the CFG for threading, data necessary for jump
257 threading is attached to the AUX field for the incoming edge. Use these
258 macros to access the underlying structure attached to the AUX field. */
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
261 /* Jump threading statistics. */
263 struct thread_stats_d
265 unsigned long num_threaded_edges
;
268 struct thread_stats_d thread_stats
;
271 /* Remove the last statement in block BB if it is a control statement
272 Also remove all outgoing edges except the edge which reaches DEST_BB.
273 If DEST_BB is NULL, then remove all outgoing edges. */
276 remove_ctrl_stmt_and_useless_edges (basic_block bb
, basic_block dest_bb
)
278 gimple_stmt_iterator gsi
;
282 gsi
= gsi_last_bb (bb
);
284 /* If the duplicate ends with a control statement, then remove it.
286 Note that if we are duplicating the template block rather than the
287 original basic block, then the duplicate might not have any real
291 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
292 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
293 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
))
294 gsi_remove (&gsi
, true);
296 for (ei
= ei_start (bb
->succs
); (e
= ei_safe_edge (ei
)); )
298 if (e
->dest
!= dest_bb
)
300 free_dom_edge_info (e
);
305 e
->probability
= profile_probability::always ();
310 /* If the remaining edge is a loop exit, there must have
311 a removed edge that was not a loop exit.
313 In that case BB and possibly other blocks were previously
314 in the loop, but are now outside the loop. Thus, we need
315 to update the loop structures. */
316 if (single_succ_p (bb
)
317 && loop_outer (bb
->loop_father
)
318 && loop_exit_edge_p (bb
->loop_father
, single_succ_edge (bb
)))
319 loops_state_set (LOOPS_NEED_FIXUP
);
322 /* Create a duplicate of BB. Record the duplicate block in an array
323 indexed by COUNT stored in RD. */
326 create_block_for_threading (basic_block bb
,
327 struct redirection_data
*rd
,
329 bitmap
*duplicate_blocks
)
334 /* We can use the generic block duplication code and simply remove
335 the stuff we do not need. */
336 rd
->dup_blocks
[count
] = duplicate_block (bb
, NULL
, NULL
);
338 FOR_EACH_EDGE (e
, ei
, rd
->dup_blocks
[count
]->succs
)
341 /* Zero out the profile, since the block is unreachable for now. */
342 rd
->dup_blocks
[count
]->frequency
= 0;
343 rd
->dup_blocks
[count
]->count
= profile_count::uninitialized ();
344 if (duplicate_blocks
)
345 bitmap_set_bit (*duplicate_blocks
, rd
->dup_blocks
[count
]->index
);
348 /* Main data structure to hold information for duplicates of BB. */
350 static hash_table
<redirection_data
> *redirection_data
;
352 /* Given an outgoing edge E lookup and return its entry in our hash table.
354 If INSERT is true, then we insert the entry into the hash table if
355 it is not already present. INCOMING_EDGE is added to the list of incoming
356 edges associated with E in the hash table. */
358 static struct redirection_data
*
359 lookup_redirection_data (edge e
, enum insert_option insert
)
361 struct redirection_data
**slot
;
362 struct redirection_data
*elt
;
363 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
365 /* Build a hash table element so we can see if E is already
367 elt
= XNEW (struct redirection_data
);
369 elt
->dup_blocks
[0] = NULL
;
370 elt
->dup_blocks
[1] = NULL
;
371 elt
->incoming_edges
= NULL
;
373 slot
= redirection_data
->find_slot (elt
, insert
);
375 /* This will only happen if INSERT is false and the entry is not
376 in the hash table. */
383 /* This will only happen if E was not in the hash table and
388 elt
->incoming_edges
= XNEW (struct el
);
389 elt
->incoming_edges
->e
= e
;
390 elt
->incoming_edges
->next
= NULL
;
393 /* E was in the hash table. */
396 /* Free ELT as we do not need it anymore, we will extract the
397 relevant entry from the hash table itself. */
400 /* Get the entry stored in the hash table. */
403 /* If insertion was requested, then we need to add INCOMING_EDGE
404 to the list of incoming edges associated with E. */
407 struct el
*el
= XNEW (struct el
);
408 el
->next
= elt
->incoming_edges
;
410 elt
->incoming_edges
= el
;
417 /* Similar to copy_phi_args, except that the PHI arg exists, it just
418 does not have a value associated with it. */
421 copy_phi_arg_into_existing_phi (edge src_e
, edge tgt_e
)
423 int src_idx
= src_e
->dest_idx
;
424 int tgt_idx
= tgt_e
->dest_idx
;
426 /* Iterate over each PHI in e->dest. */
427 for (gphi_iterator gsi
= gsi_start_phis (src_e
->dest
),
428 gsi2
= gsi_start_phis (tgt_e
->dest
);
430 gsi_next (&gsi
), gsi_next (&gsi2
))
432 gphi
*src_phi
= gsi
.phi ();
433 gphi
*dest_phi
= gsi2
.phi ();
434 tree val
= gimple_phi_arg_def (src_phi
, src_idx
);
435 source_location locus
= gimple_phi_arg_location (src_phi
, src_idx
);
437 SET_PHI_ARG_DEF (dest_phi
, tgt_idx
, val
);
438 gimple_phi_arg_set_location (dest_phi
, tgt_idx
, locus
);
442 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
443 to see if it has constant value in a flow sensitive manner. Set
444 LOCUS to location of the constant phi arg and return the value.
445 Return DEF directly if either PATH or idx is ZERO. */
448 get_value_locus_in_path (tree def
, vec
<jump_thread_edge
*> *path
,
449 basic_block bb
, int idx
, source_location
*locus
)
455 if (path
== NULL
|| idx
== 0)
458 def_phi
= dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (def
));
462 def_bb
= gimple_bb (def_phi
);
463 /* Don't propagate loop invariants into deeper loops. */
464 if (!def_bb
|| bb_loop_depth (def_bb
) < bb_loop_depth (bb
))
467 /* Backtrack jump threading path from IDX to see if def has constant
469 for (int j
= idx
- 1; j
>= 0; j
--)
471 edge e
= (*path
)[j
]->e
;
472 if (e
->dest
== def_bb
)
474 arg
= gimple_phi_arg_def (def_phi
, e
->dest_idx
);
475 if (is_gimple_min_invariant (arg
))
477 *locus
= gimple_phi_arg_location (def_phi
, e
->dest_idx
);
487 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
488 Try to backtrack jump threading PATH from node IDX to see if the arg
489 has constant value, copy constant value instead of argument itself
493 copy_phi_args (basic_block bb
, edge src_e
, edge tgt_e
,
494 vec
<jump_thread_edge
*> *path
, int idx
)
497 int src_indx
= src_e
->dest_idx
;
499 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
501 gphi
*phi
= gsi
.phi ();
502 tree def
= gimple_phi_arg_def (phi
, src_indx
);
503 source_location locus
= gimple_phi_arg_location (phi
, src_indx
);
505 if (TREE_CODE (def
) == SSA_NAME
506 && !virtual_operand_p (gimple_phi_result (phi
)))
507 def
= get_value_locus_in_path (def
, path
, bb
, idx
, &locus
);
509 add_phi_arg (phi
, def
, tgt_e
, locus
);
513 /* We have recently made a copy of ORIG_BB, including its outgoing
514 edges. The copy is NEW_BB. Every PHI node in every direct successor of
515 ORIG_BB has a new argument associated with edge from NEW_BB to the
516 successor. Initialize the PHI argument so that it is equal to the PHI
517 argument associated with the edge from ORIG_BB to the successor.
518 PATH and IDX are used to check if the new PHI argument has constant
519 value in a flow sensitive manner. */
522 update_destination_phis (basic_block orig_bb
, basic_block new_bb
,
523 vec
<jump_thread_edge
*> *path
, int idx
)
528 FOR_EACH_EDGE (e
, ei
, orig_bb
->succs
)
530 edge e2
= find_edge (new_bb
, e
->dest
);
531 copy_phi_args (e
->dest
, e
, e2
, path
, idx
);
535 /* Given a duplicate block and its single destination (both stored
536 in RD). Create an edge between the duplicate and its single
539 Add an additional argument to any PHI nodes at the single
540 destination. IDX is the start node in jump threading path
541 we start to check to see if the new PHI argument has constant
542 value along the jump threading path. */
545 create_edge_and_update_destination_phis (struct redirection_data
*rd
,
546 basic_block bb
, int idx
)
548 edge e
= make_single_succ_edge (bb
, rd
->path
->last ()->e
->dest
, EDGE_FALLTHRU
);
550 rescan_loop_exit (e
, true, false);
552 /* We used to copy the thread path here. That was added in 2007
553 and dutifully updated through the representation changes in 2013.
555 In 2013 we added code to thread from an interior node through
556 the backedge to another interior node. That runs after the code
557 to thread through loop headers from outside the loop.
559 The latter may delete edges in the CFG, including those
560 which appeared in the jump threading path we copied here. Thus
561 we'd end up using a dangling pointer.
563 After reviewing the 2007/2011 code, I can't see how anything
564 depended on copying the AUX field and clearly copying the jump
565 threading path is problematical due to embedded edge pointers.
566 It has been removed. */
569 /* If there are any PHI nodes at the destination of the outgoing edge
570 from the duplicate block, then we will need to add a new argument
571 to them. The argument should have the same value as the argument
572 associated with the outgoing edge stored in RD. */
573 copy_phi_args (e
->dest
, rd
->path
->last ()->e
, e
, rd
->path
, idx
);
576 /* Look through PATH beginning at START and return TRUE if there are
577 any additional blocks that need to be duplicated. Otherwise,
580 any_remaining_duplicated_blocks (vec
<jump_thread_edge
*> *path
,
583 for (unsigned int i
= start
+ 1; i
< path
->length (); i
++)
585 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
586 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
593 /* Compute the amount of profile count/frequency coming into the jump threading
594 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
595 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
596 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
597 identify blocks duplicated for jump threading, which have duplicated
598 edges that need to be ignored in the analysis. Return true if path contains
599 a joiner, false otherwise.
601 In the non-joiner case, this is straightforward - all the counts/frequency
602 flowing into the jump threading path should flow through the duplicated
603 block and out of the duplicated path.
605 In the joiner case, it is very tricky. Some of the counts flowing into
606 the original path go offpath at the joiner. The problem is that while
607 we know how much total count goes off-path in the original control flow,
608 we don't know how many of the counts corresponding to just the jump
609 threading path go offpath at the joiner.
611 For example, assume we have the following control flow and identified
612 jump threading paths:
631 Jump threading paths: A -> J -> Son -> D (path 1)
632 C -> J -> Son -> E (path 2)
634 Note that the control flow could be more complicated:
635 - Each jump threading path may have more than one incoming edge. I.e. A and
636 Ea could represent multiple incoming blocks/edges that are included in
638 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
639 before or after the "normal" copy block). These are not duplicated onto
640 the jump threading path, as they are single-successor.
641 - Any of the blocks along the path may have other incoming edges that
642 are not part of any jump threading path, but add profile counts along
645 In the above example, after all jump threading is complete, we will
646 end up with the following control flow:
655 Eona/ \ ---/---\-------- \Eonc
660 \___________ / \ _____/
665 The main issue to notice here is that when we are processing path 1
666 (A->J->Son->D) we need to figure out the outgoing edge weights to
667 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
668 sum of the incoming weights to D remain Ed. The problem with simply
669 assuming that Ja (and Jc when processing path 2) has the same outgoing
670 probabilities to its successors as the original block J, is that after
671 all paths are processed and other edges/counts removed (e.g. none
672 of Ec will reach D after processing path 2), we may end up with not
673 enough count flowing along duplicated edge Sona->D.
675 Therefore, in the case of a joiner, we keep track of all counts
676 coming in along the current path, as well as from predecessors not
677 on any jump threading path (Eb in the above example). While we
678 first assume that the duplicated Eona for Ja->Sona has the same
679 probability as the original, we later compensate for other jump
680 threading paths that may eliminate edges. We do that by keep track
681 of all counts coming into the original path that are not in a jump
682 thread (Eb in the above example, but as noted earlier, there could
683 be other predecessors incoming to the path at various points, such
684 as at Son). Call this cumulative non-path count coming into the path
685 before D as Enonpath. We then ensure that the count from Sona->D is as at
686 least as big as (Ed - Enonpath), but no bigger than the minimum
687 weight along the jump threading path. The probabilities of both the
688 original and duplicated joiner block J and Ja will be adjusted
689 accordingly after the updates. */
692 compute_path_counts (struct redirection_data
*rd
,
693 ssa_local_info_t
*local_info
,
694 profile_count
*path_in_count_ptr
,
695 profile_count
*path_out_count_ptr
,
696 int *path_in_freq_ptr
)
698 edge e
= rd
->incoming_edges
->e
;
699 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
700 edge elast
= path
->last ()->e
;
701 profile_count nonpath_count
= profile_count::zero ();
702 bool has_joiner
= false;
703 profile_count path_in_count
= profile_count::zero ();
704 int path_in_freq
= 0;
706 /* Start by accumulating incoming edge counts to the path's first bb
707 into a couple buckets:
708 path_in_count: total count of incoming edges that flow into the
710 nonpath_count: total count of incoming edges that are not
711 flowing along *any* path. These are the counts
712 that will still flow along the original path after
713 all path duplication is done by potentially multiple
714 calls to this routine.
715 (any other incoming edge counts are for a different jump threading
716 path that will be handled by a later call to this routine.)
717 To make this easier, start by recording all incoming edges that flow into
718 the current path in a bitmap. We could add up the path's incoming edge
719 counts here, but we still need to walk all the first bb's incoming edges
720 below to add up the counts of the other edges not included in this jump
722 struct el
*next
, *el
;
723 auto_bitmap in_edge_srcs
;
724 for (el
= rd
->incoming_edges
; el
; el
= next
)
727 bitmap_set_bit (in_edge_srcs
, el
->e
->src
->index
);
731 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
733 vec
<jump_thread_edge
*> *ein_path
= THREAD_PATH (ein
);
734 /* Simply check the incoming edge src against the set captured above. */
736 && bitmap_bit_p (in_edge_srcs
, (*ein_path
)[0]->e
->src
->index
))
738 /* It is necessary but not sufficient that the last path edges
739 are identical. There may be different paths that share the
740 same last path edge in the case where the last edge has a nocopy
742 gcc_assert (ein_path
->last ()->e
== elast
);
743 path_in_count
+= ein
->count ();
744 path_in_freq
+= EDGE_FREQUENCY (ein
);
748 /* Keep track of the incoming edges that are not on any jump-threading
749 path. These counts will still flow out of original path after all
750 jump threading is complete. */
751 nonpath_count
+= ein
->count ();
755 /* This is needed due to insane incoming frequencies. */
756 if (path_in_freq
> BB_FREQ_MAX
)
757 path_in_freq
= BB_FREQ_MAX
;
759 /* Now compute the fraction of the total count coming into the first
760 path bb that is from the current threading path. */
761 profile_count total_count
= e
->dest
->count
;
762 /* Handle incoming profile insanities. */
763 if (total_count
< path_in_count
)
764 path_in_count
= total_count
;
765 profile_probability onpath_scale
= path_in_count
.probability_in (total_count
);
767 /* Walk the entire path to do some more computation in order to estimate
768 how much of the path_in_count will flow out of the duplicated threading
769 path. In the non-joiner case this is straightforward (it should be
770 the same as path_in_count, although we will handle incoming profile
771 insanities by setting it equal to the minimum count along the path).
773 In the joiner case, we need to estimate how much of the path_in_count
774 will stay on the threading path after the joiner's conditional branch.
775 We don't really know for sure how much of the counts
776 associated with this path go to each successor of the joiner, but we'll
777 estimate based on the fraction of the total count coming into the path
778 bb was from the threading paths (computed above in onpath_scale).
779 Afterwards, we will need to do some fixup to account for other threading
780 paths and possible profile insanities.
782 In order to estimate the joiner case's counts we also need to update
783 nonpath_count with any additional counts coming into the path. Other
784 blocks along the path may have additional predecessors from outside
786 profile_count path_out_count
= path_in_count
;
787 profile_count min_path_count
= path_in_count
;
788 for (unsigned int i
= 1; i
< path
->length (); i
++)
790 edge epath
= (*path
)[i
]->e
;
791 profile_count cur_count
= epath
->count ();
792 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
795 cur_count
= cur_count
.apply_probability (onpath_scale
);
797 /* In the joiner case we need to update nonpath_count for any edges
798 coming into the path that will contribute to the count flowing
799 into the path successor. */
800 if (has_joiner
&& epath
!= elast
)
802 /* Look for other incoming edges after joiner. */
803 FOR_EACH_EDGE (ein
, ei
, epath
->dest
->preds
)
806 /* Ignore in edges from blocks we have duplicated for a
807 threading path, which have duplicated edge counts until
808 they are redirected by an invocation of this routine. */
809 && !bitmap_bit_p (local_info
->duplicate_blocks
,
811 nonpath_count
+= ein
->count ();
814 if (cur_count
< path_out_count
)
815 path_out_count
= cur_count
;
816 if (epath
->count () < min_path_count
)
817 min_path_count
= epath
->count ();
820 /* We computed path_out_count above assuming that this path targeted
821 the joiner's on-path successor with the same likelihood as it
822 reached the joiner. However, other thread paths through the joiner
823 may take a different path through the normal copy source block
824 (i.e. they have a different elast), meaning that they do not
825 contribute any counts to this path's elast. As a result, it may
826 turn out that this path must have more count flowing to the on-path
827 successor of the joiner. Essentially, all of this path's elast
828 count must be contributed by this path and any nonpath counts
829 (since any path through the joiner with a different elast will not
830 include a copy of this elast in its duplicated path).
831 So ensure that this path's path_out_count is at least the
832 difference between elast->count () and nonpath_count. Otherwise the edge
833 counts after threading will not be sane. */
834 if (local_info
->need_profile_correction
835 && has_joiner
&& path_out_count
< elast
->count () - nonpath_count
)
837 path_out_count
= elast
->count () - nonpath_count
;
838 /* But neither can we go above the minimum count along the path
839 we are duplicating. This can be an issue due to profile
840 insanities coming in to this pass. */
841 if (path_out_count
> min_path_count
)
842 path_out_count
= min_path_count
;
845 *path_in_count_ptr
= path_in_count
;
846 *path_out_count_ptr
= path_out_count
;
847 *path_in_freq_ptr
= path_in_freq
;
852 /* Update the counts and frequencies for both an original path
853 edge EPATH and its duplicate EDUP. The duplicate source block
854 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
855 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
857 update_profile (edge epath
, edge edup
, profile_count path_in_count
,
858 profile_count path_out_count
, int path_in_freq
)
860 if (!(path_in_count
> 0))
863 /* First update the duplicated block's count / frequency. */
866 basic_block dup_block
= edup
->src
;
868 /* Edup's count is reduced by path_out_count. We need to redistribute
869 probabilities to the remaining edges. */
873 profile_probability edup_prob
874 = path_out_count
.probability_in (path_in_count
);
876 /* Either scale up or down the remaining edges.
877 probabilities are always in range <0,1> and thus we can't do
878 both by same loop. */
879 if (edup
->probability
> edup_prob
)
881 profile_probability rev_scale
882 = (profile_probability::always () - edup
->probability
)
883 / (profile_probability::always () - edup_prob
);
884 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
886 esucc
->probability
/= rev_scale
;
888 else if (edup
->probability
< edup_prob
)
890 profile_probability scale
891 = (profile_probability::always () - edup_prob
)
892 / (profile_probability::always () - edup
->probability
);
893 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
895 esucc
->probability
*= scale
;
897 edup
->probability
= edup_prob
;
899 /* FIXME once freqs_to_counts is dropped re-enable this check. */
900 gcc_assert (!dup_block
->count
.initialized_p () || 1);
901 gcc_assert (dup_block
->frequency
== 0);
902 dup_block
->count
= path_in_count
;
903 dup_block
->frequency
= path_in_freq
;
906 profile_count final_count
= epath
->count () - path_out_count
;
908 /* Now update the original block's count and frequency in the
909 opposite manner - remove the counts/freq that will flow
910 into the duplicated block. Handle underflow due to precision/
912 epath
->src
->count
-= path_in_count
;
913 epath
->src
->frequency
-= path_in_freq
;
914 if (epath
->src
->frequency
< 0)
915 epath
->src
->frequency
= 0;
917 /* Next update this path edge's original and duplicated counts. We know
918 that the duplicated path will have path_out_count flowing
919 out of it (in the joiner case this is the count along the duplicated path
920 out of the duplicated joiner). This count can then be removed from the
921 original path edge. */
922 if (epath
->src
->count
> 0)
926 profile_probability epath_prob
= final_count
.probability_in (epath
->src
->count
);
928 if (epath
->probability
> epath_prob
)
930 profile_probability rev_scale
931 = (profile_probability::always () - epath
->probability
)
932 / (profile_probability::always () - epath_prob
);
933 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
935 esucc
->probability
/= rev_scale
;
937 else if (epath
->probability
< epath_prob
)
939 profile_probability scale
940 = (profile_probability::always () - epath_prob
)
941 / (profile_probability::always () - epath
->probability
);
942 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
944 esucc
->probability
*= scale
;
946 epath
->probability
= epath_prob
;
951 /* Check if the paths through RD all have estimated frequencies but zero
952 profile counts. This is more accurate than checking the entry block
953 for a zero profile count, since profile insanities sometimes creep in. */
956 estimated_freqs_path (struct redirection_data
*rd
)
958 edge e
= rd
->incoming_edges
->e
;
959 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
962 bool non_zero_freq
= false;
963 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
965 if (ein
->count () > 0)
967 non_zero_freq
|= ein
->src
->frequency
!= 0;
970 for (unsigned int i
= 1; i
< path
->length (); i
++)
972 edge epath
= (*path
)[i
]->e
;
973 if (epath
->src
->count
> 0)
975 non_zero_freq
|= epath
->src
->frequency
!= 0;
977 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
979 if (esucc
->count () > 0)
981 non_zero_freq
|= esucc
->src
->frequency
!= 0;
984 return non_zero_freq
;
988 /* Invoked for routines that have guessed frequencies and no profile
989 counts to record the block and edge frequencies for paths through RD
990 in the profile count fields of those blocks and edges. This is because
991 ssa_fix_duplicate_block_edges incrementally updates the block and
992 edge counts as edges are redirected, and it is difficult to do that
993 for edge frequencies which are computed on the fly from the source
994 block frequency and probability. When a block frequency is updated
995 its outgoing edge frequencies are affected and become difficult to
999 freqs_to_counts_path (struct redirection_data
*rd
)
1001 edge e
= rd
->incoming_edges
->e
;
1002 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1006 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
1007 ein
->src
->count
= profile_count::from_gcov_type
1008 (ein
->src
->frequency
* REG_BR_PROB_BASE
);
1009 for (unsigned int i
= 1; i
< path
->length (); i
++)
1011 edge epath
= (*path
)[i
]->e
;
1012 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1013 errors applying the edge probability when the frequencies are very
1016 profile_count::from_gcov_type
1017 (epath
->src
->frequency
* REG_BR_PROB_BASE
);
1022 /* For routines that have guessed frequencies and no profile counts, where we
1023 used freqs_to_counts_path to record block and edge frequencies for paths
1024 through RD, we clear the counts after completing all updates for RD.
1025 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1026 but the block frequencies and edge probabilities were updated as well,
1027 so we can simply clear the count fields. */
1030 clear_counts_path (struct redirection_data
*rd
)
1032 edge e
= rd
->incoming_edges
->e
;
1033 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1034 profile_count val
= profile_count::uninitialized ();
1035 if (profile_status_for_fn (cfun
) == PROFILE_READ
)
1036 val
= profile_count::zero ();
1041 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
1042 ein
->src
->count
= val
;
1044 /* First clear counts along original path. */
1045 for (unsigned int i
= 1; i
< path
->length (); i
++)
1047 edge epath
= (*path
)[i
]->e
;
1048 epath
->src
->count
= val
;
1050 /* Also need to clear the counts along duplicated path. */
1051 for (unsigned int i
= 0; i
< 2; i
++)
1053 basic_block dup
= rd
->dup_blocks
[i
];
1060 /* Wire up the outgoing edges from the duplicate blocks and
1061 update any PHIs as needed. Also update the profile counts
1062 on the original and duplicate blocks and edges. */
1064 ssa_fix_duplicate_block_edges (struct redirection_data
*rd
,
1065 ssa_local_info_t
*local_info
)
1067 bool multi_incomings
= (rd
->incoming_edges
->next
!= NULL
);
1068 edge e
= rd
->incoming_edges
->e
;
1069 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1070 edge elast
= path
->last ()->e
;
1071 profile_count path_in_count
= profile_count::zero ();
1072 profile_count path_out_count
= profile_count::zero ();
1073 int path_in_freq
= 0;
1075 /* This routine updates profile counts, frequencies, and probabilities
1076 incrementally. Since it is difficult to do the incremental updates
1077 using frequencies/probabilities alone, for routines without profile
1078 data we first take a snapshot of the existing block and edge frequencies
1079 by copying them into the empty profile count fields. These counts are
1080 then used to do the incremental updates, and cleared at the end of this
1081 routine. If the function is marked as having a profile, we still check
1082 to see if the paths through RD are using estimated frequencies because
1083 the routine had zero profile counts. */
1084 bool do_freqs_to_counts
= (profile_status_for_fn (cfun
) != PROFILE_READ
1085 || estimated_freqs_path (rd
));
1086 if (do_freqs_to_counts
)
1087 freqs_to_counts_path (rd
);
1089 /* First determine how much profile count to move from original
1090 path to the duplicate path. This is tricky in the presence of
1091 a joiner (see comments for compute_path_counts), where some portion
1092 of the path's counts will flow off-path from the joiner. In the
1093 non-joiner case the path_in_count and path_out_count should be the
1095 bool has_joiner
= compute_path_counts (rd
, local_info
,
1096 &path_in_count
, &path_out_count
,
1099 int cur_path_freq
= path_in_freq
;
1100 for (unsigned int count
= 0, i
= 1; i
< path
->length (); i
++)
1102 edge epath
= (*path
)[i
]->e
;
1104 /* If we were threading through an joiner block, then we want
1105 to keep its control statement and redirect an outgoing edge.
1106 Else we want to remove the control statement & edges, then create
1107 a new outgoing edge. In both cases we may need to update PHIs. */
1108 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1113 gcc_assert (has_joiner
);
1115 /* This updates the PHIs at the destination of the duplicate
1116 block. Pass 0 instead of i if we are threading a path which
1117 has multiple incoming edges. */
1118 update_destination_phis (local_info
->bb
, rd
->dup_blocks
[count
],
1119 path
, multi_incomings
? 0 : i
);
1121 /* Find the edge from the duplicate block to the block we're
1122 threading through. That's the edge we want to redirect. */
1123 victim
= find_edge (rd
->dup_blocks
[count
], (*path
)[i
]->e
->dest
);
1125 /* If there are no remaining blocks on the path to duplicate,
1126 then redirect VICTIM to the final destination of the jump
1128 if (!any_remaining_duplicated_blocks (path
, i
))
1130 e2
= redirect_edge_and_branch (victim
, elast
->dest
);
1131 /* If we redirected the edge, then we need to copy PHI arguments
1132 at the target. If the edge already existed (e2 != victim
1133 case), then the PHIs in the target already have the correct
1136 copy_phi_args (e2
->dest
, elast
, e2
,
1137 path
, multi_incomings
? 0 : i
);
1141 /* Redirect VICTIM to the next duplicated block in the path. */
1142 e2
= redirect_edge_and_branch (victim
, rd
->dup_blocks
[count
+ 1]);
1144 /* We need to update the PHIs in the next duplicated block. We
1145 want the new PHI args to have the same value as they had
1146 in the source of the next duplicate block.
1148 Thus, we need to know which edge we traversed into the
1149 source of the duplicate. Furthermore, we may have
1150 traversed many edges to reach the source of the duplicate.
1152 Walk through the path starting at element I until we
1153 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1154 the edge from the prior element. */
1155 for (unsigned int j
= i
+ 1; j
< path
->length (); j
++)
1157 if ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
)
1159 copy_phi_arg_into_existing_phi ((*path
)[j
- 1]->e
, e2
);
1165 /* Update the counts and frequency of both the original block
1166 and path edge, and the duplicates. The path duplicate's
1167 incoming count and frequency are the totals for all edges
1168 incoming to this jump threading path computed earlier.
1169 And we know that the duplicated path will have path_out_count
1170 flowing out of it (i.e. along the duplicated path out of the
1171 duplicated joiner). */
1172 update_profile (epath
, e2
, path_in_count
, path_out_count
,
1175 /* Record the frequency flowing to the downstream duplicated
1177 cur_path_freq
= EDGE_FREQUENCY (e2
);
1179 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1181 remove_ctrl_stmt_and_useless_edges (rd
->dup_blocks
[count
], NULL
);
1182 create_edge_and_update_destination_phis (rd
, rd
->dup_blocks
[count
],
1183 multi_incomings
? 0 : i
);
1185 single_succ_edge (rd
->dup_blocks
[1])->aux
= NULL
;
1187 /* Update the counts and frequency of both the original block
1188 and path edge, and the duplicates. Since we are now after
1189 any joiner that may have existed on the path, the count
1190 flowing along the duplicated threaded path is path_out_count.
1191 If we didn't have a joiner, then cur_path_freq was the sum
1192 of the total frequencies along all incoming edges to the
1193 thread path (path_in_freq). If we had a joiner, it would have
1194 been updated at the end of that handling to the edge frequency
1195 along the duplicated joiner path edge. */
1196 update_profile (epath
, EDGE_SUCC (rd
->dup_blocks
[count
], 0),
1197 path_out_count
, path_out_count
, cur_path_freq
);
1201 /* No copy case. In this case we don't have an equivalent block
1202 on the duplicated thread path to update, but we do need
1203 to remove the portion of the counts/freqs that were moved
1204 to the duplicated path from the counts/freqs flowing through
1205 this block on the original path. Since all the no-copy edges
1206 are after any joiner, the removed count is the same as
1209 If we didn't have a joiner, then cur_path_freq was the sum
1210 of the total frequencies along all incoming edges to the
1211 thread path (path_in_freq). If we had a joiner, it would have
1212 been updated at the end of that handling to the edge frequency
1213 along the duplicated joiner path edge. */
1214 update_profile (epath
, NULL
, path_out_count
, path_out_count
,
1218 /* Increment the index into the duplicated path when we processed
1219 a duplicated block. */
1220 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
1221 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1227 /* Done with all profile and frequency updates, clear counts if they
1229 if (do_freqs_to_counts
)
1230 clear_counts_path (rd
);
1233 /* Hash table traversal callback routine to create duplicate blocks. */
1236 ssa_create_duplicates (struct redirection_data
**slot
,
1237 ssa_local_info_t
*local_info
)
1239 struct redirection_data
*rd
= *slot
;
1241 /* The second duplicated block in a jump threading path is specific
1242 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1244 Each time we're called, we have to look through the path and see
1245 if a second block needs to be duplicated.
1247 Note the search starts with the third edge on the path. The first
1248 edge is the incoming edge, the second edge always has its source
1249 duplicated. Thus we start our search with the third edge. */
1250 vec
<jump_thread_edge
*> *path
= rd
->path
;
1251 for (unsigned int i
= 2; i
< path
->length (); i
++)
1253 if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1254 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1256 create_block_for_threading ((*path
)[i
]->e
->src
, rd
, 1,
1257 &local_info
->duplicate_blocks
);
1262 /* Create a template block if we have not done so already. Otherwise
1263 use the template to create a new block. */
1264 if (local_info
->template_block
== NULL
)
1266 create_block_for_threading ((*path
)[1]->e
->src
, rd
, 0,
1267 &local_info
->duplicate_blocks
);
1268 local_info
->template_block
= rd
->dup_blocks
[0];
1270 /* We do not create any outgoing edges for the template. We will
1271 take care of that in a later traversal. That way we do not
1272 create edges that are going to just be deleted. */
1276 create_block_for_threading (local_info
->template_block
, rd
, 0,
1277 &local_info
->duplicate_blocks
);
1279 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1281 ssa_fix_duplicate_block_edges (rd
, local_info
);
1284 /* Keep walking the hash table. */
1288 /* We did not create any outgoing edges for the template block during
1289 block creation. This hash table traversal callback creates the
1290 outgoing edge for the template block. */
1293 ssa_fixup_template_block (struct redirection_data
**slot
,
1294 ssa_local_info_t
*local_info
)
1296 struct redirection_data
*rd
= *slot
;
1298 /* If this is the template block halt the traversal after updating
1301 If we were threading through an joiner block, then we want
1302 to keep its control statement and redirect an outgoing edge.
1303 Else we want to remove the control statement & edges, then create
1304 a new outgoing edge. In both cases we may need to update PHIs. */
1305 if (rd
->dup_blocks
[0] && rd
->dup_blocks
[0] == local_info
->template_block
)
1307 ssa_fix_duplicate_block_edges (rd
, local_info
);
1314 /* Hash table traversal callback to redirect each incoming edge
1315 associated with this hash table element to its new destination. */
1318 ssa_redirect_edges (struct redirection_data
**slot
,
1319 ssa_local_info_t
*local_info
)
1321 struct redirection_data
*rd
= *slot
;
1322 struct el
*next
, *el
;
1324 /* Walk over all the incoming edges associated with this hash table
1326 for (el
= rd
->incoming_edges
; el
; el
= next
)
1329 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1331 /* Go ahead and free this element from the list. Doing this now
1332 avoids the need for another list walk when we destroy the hash
1337 thread_stats
.num_threaded_edges
++;
1339 if (rd
->dup_blocks
[0])
1343 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1344 fprintf (dump_file
, " Threaded jump %d --> %d to %d\n",
1345 e
->src
->index
, e
->dest
->index
, rd
->dup_blocks
[0]->index
);
1347 /* Redirect the incoming edge (possibly to the joiner block) to the
1348 appropriate duplicate block. */
1349 e2
= redirect_edge_and_branch (e
, rd
->dup_blocks
[0]);
1350 gcc_assert (e
== e2
);
1351 flush_pending_stmts (e2
);
1354 /* Go ahead and clear E->aux. It's not needed anymore and failure
1355 to clear it will cause all kinds of unpleasant problems later. */
1356 delete_jump_thread_path (path
);
1361 /* Indicate that we actually threaded one or more jumps. */
1362 if (rd
->incoming_edges
)
1363 local_info
->jumps_threaded
= true;
1368 /* Return true if this block has no executable statements other than
1369 a simple ctrl flow instruction. When the number of outgoing edges
1370 is one, this is equivalent to a "forwarder" block. */
1373 redirection_block_p (basic_block bb
)
1375 gimple_stmt_iterator gsi
;
1377 /* Advance to the first executable statement. */
1378 gsi
= gsi_start_bb (bb
);
1379 while (!gsi_end_p (gsi
)
1380 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_LABEL
1381 || is_gimple_debug (gsi_stmt (gsi
))
1382 || gimple_nop_p (gsi_stmt (gsi
))
1383 || gimple_clobber_p (gsi_stmt (gsi
))))
1386 /* Check if this is an empty block. */
1387 if (gsi_end_p (gsi
))
1390 /* Test that we've reached the terminating control statement. */
1391 return gsi_stmt (gsi
)
1392 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
1393 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
1394 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
);
1397 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1398 is reached via one or more specific incoming edges, we know which
1399 outgoing edge from BB will be traversed.
1401 We want to redirect those incoming edges to the target of the
1402 appropriate outgoing edge. Doing so avoids a conditional branch
1403 and may expose new optimization opportunities. Note that we have
1404 to update dominator tree and SSA graph after such changes.
1406 The key to keeping the SSA graph update manageable is to duplicate
1407 the side effects occurring in BB so that those side effects still
1408 occur on the paths which bypass BB after redirecting edges.
1410 We accomplish this by creating duplicates of BB and arranging for
1411 the duplicates to unconditionally pass control to one specific
1412 successor of BB. We then revector the incoming edges into BB to
1413 the appropriate duplicate of BB.
1415 If NOLOOP_ONLY is true, we only perform the threading as long as it
1416 does not affect the structure of the loops in a nontrivial way.
1418 If JOINERS is true, then thread through joiner blocks as well. */
1421 thread_block_1 (basic_block bb
, bool noloop_only
, bool joiners
)
1423 /* E is an incoming edge into BB that we may or may not want to
1424 redirect to a duplicate of BB. */
1427 ssa_local_info_t local_info
;
1429 local_info
.duplicate_blocks
= BITMAP_ALLOC (NULL
);
1430 local_info
.need_profile_correction
= false;
1432 /* To avoid scanning a linear array for the element we need we instead
1433 use a hash table. For normal code there should be no noticeable
1434 difference. However, if we have a block with a large number of
1435 incoming and outgoing edges such linear searches can get expensive. */
1437 = new hash_table
<struct redirection_data
> (EDGE_COUNT (bb
->succs
));
1439 /* Record each unique threaded destination into a hash table for
1440 efficient lookups. */
1442 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1447 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1449 if (((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& !joiners
)
1450 || ((*path
)[1]->type
== EDGE_COPY_SRC_BLOCK
&& joiners
))
1453 e2
= path
->last ()->e
;
1454 if (!e2
|| noloop_only
)
1456 /* If NOLOOP_ONLY is true, we only allow threading through the
1457 header of a loop to exit edges. */
1459 /* One case occurs when there was loop header buried in a jump
1460 threading path that crosses loop boundaries. We do not try
1461 and thread this elsewhere, so just cancel the jump threading
1462 request by clearing the AUX field now. */
1463 if (bb
->loop_father
!= e2
->src
->loop_father
1464 && !loop_exit_edge_p (e2
->src
->loop_father
, e2
))
1466 /* Since this case is not handled by our special code
1467 to thread through a loop header, we must explicitly
1468 cancel the threading request here. */
1469 delete_jump_thread_path (path
);
1474 /* Another case occurs when trying to thread through our
1475 own loop header, possibly from inside the loop. We will
1476 thread these later. */
1478 for (i
= 1; i
< path
->length (); i
++)
1480 if ((*path
)[i
]->e
->src
== bb
->loop_father
->header
1481 && (!loop_exit_edge_p (bb
->loop_father
, e2
)
1482 || (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
))
1486 if (i
!= path
->length ())
1490 /* Insert the outgoing edge into the hash table if it is not
1491 already in the hash table. */
1492 lookup_redirection_data (e
, INSERT
);
1494 /* When we have thread paths through a common joiner with different
1495 final destinations, then we may need corrections to deal with
1496 profile insanities. See the big comment before compute_path_counts. */
1497 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1501 else if (e2
!= last
)
1502 local_info
.need_profile_correction
= true;
1506 /* We do not update dominance info. */
1507 free_dominance_info (CDI_DOMINATORS
);
1509 /* We know we only thread through the loop header to loop exits.
1510 Let the basic block duplication hook know we are not creating
1511 a multiple entry loop. */
1513 && bb
== bb
->loop_father
->header
)
1514 set_loop_copy (bb
->loop_father
, loop_outer (bb
->loop_father
));
1516 /* Now create duplicates of BB.
1518 Note that for a block with a high outgoing degree we can waste
1519 a lot of time and memory creating and destroying useless edges.
1521 So we first duplicate BB and remove the control structure at the
1522 tail of the duplicate as well as all outgoing edges from the
1523 duplicate. We then use that duplicate block as a template for
1524 the rest of the duplicates. */
1525 local_info
.template_block
= NULL
;
1527 local_info
.jumps_threaded
= false;
1528 redirection_data
->traverse
<ssa_local_info_t
*, ssa_create_duplicates
>
1531 /* The template does not have an outgoing edge. Create that outgoing
1532 edge and update PHI nodes as the edge's target as necessary.
1534 We do this after creating all the duplicates to avoid creating
1535 unnecessary edges. */
1536 redirection_data
->traverse
<ssa_local_info_t
*, ssa_fixup_template_block
>
1539 /* The hash table traversals above created the duplicate blocks (and the
1540 statements within the duplicate blocks). This loop creates PHI nodes for
1541 the duplicated blocks and redirects the incoming edges into BB to reach
1542 the duplicates of BB. */
1543 redirection_data
->traverse
<ssa_local_info_t
*, ssa_redirect_edges
>
1546 /* Done with this block. Clear REDIRECTION_DATA. */
1547 delete redirection_data
;
1548 redirection_data
= NULL
;
1551 && bb
== bb
->loop_father
->header
)
1552 set_loop_copy (bb
->loop_father
, NULL
);
1554 BITMAP_FREE (local_info
.duplicate_blocks
);
1555 local_info
.duplicate_blocks
= NULL
;
1557 /* Indicate to our caller whether or not any jumps were threaded. */
1558 return local_info
.jumps_threaded
;
1561 /* Wrapper for thread_block_1 so that we can first handle jump
1562 thread paths which do not involve copying joiner blocks, then
1563 handle jump thread paths which have joiner blocks.
1565 By doing things this way we can be as aggressive as possible and
1566 not worry that copying a joiner block will create a jump threading
1570 thread_block (basic_block bb
, bool noloop_only
)
1573 retval
= thread_block_1 (bb
, noloop_only
, false);
1574 retval
|= thread_block_1 (bb
, noloop_only
, true);
1578 /* Callback for dfs_enumerate_from. Returns true if BB is different
1579 from STOP and DBDS_CE_STOP. */
1581 static basic_block dbds_ce_stop
;
1583 dbds_continue_enumeration_p (const_basic_block bb
, const void *stop
)
1585 return (bb
!= (const_basic_block
) stop
1586 && bb
!= dbds_ce_stop
);
1589 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1590 returns the state. */
1593 determine_bb_domination_status (struct loop
*loop
, basic_block bb
)
1595 basic_block
*bblocks
;
1596 unsigned nblocks
, i
;
1597 bool bb_reachable
= false;
1601 /* This function assumes BB is a successor of LOOP->header.
1602 If that is not the case return DOMST_NONDOMINATING which
1607 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1609 if (e
->src
== loop
->header
)
1617 return DOMST_NONDOMINATING
;
1620 if (bb
== loop
->latch
)
1621 return DOMST_DOMINATING
;
1623 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1626 bblocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1627 dbds_ce_stop
= loop
->header
;
1628 nblocks
= dfs_enumerate_from (loop
->latch
, 1, dbds_continue_enumeration_p
,
1629 bblocks
, loop
->num_nodes
, bb
);
1630 for (i
= 0; i
< nblocks
; i
++)
1631 FOR_EACH_EDGE (e
, ei
, bblocks
[i
]->preds
)
1633 if (e
->src
== loop
->header
)
1636 return DOMST_NONDOMINATING
;
1639 bb_reachable
= true;
1643 return (bb_reachable
? DOMST_DOMINATING
: DOMST_LOOP_BROKEN
);
1646 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1647 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1648 to the inside of the loop. */
1651 thread_through_loop_header (struct loop
*loop
, bool may_peel_loop_headers
)
1653 basic_block header
= loop
->header
;
1654 edge e
, tgt_edge
, latch
= loop_latch_edge (loop
);
1656 basic_block tgt_bb
, atgt_bb
;
1657 enum bb_dom_status domst
;
1659 /* We have already threaded through headers to exits, so all the threading
1660 requests now are to the inside of the loop. We need to avoid creating
1661 irreducible regions (i.e., loops with more than one entry block), and
1662 also loop with several latch edges, or new subloops of the loop (although
1663 there are cases where it might be appropriate, it is difficult to decide,
1664 and doing it wrongly may confuse other optimizers).
1666 We could handle more general cases here. However, the intention is to
1667 preserve some information about the loop, which is impossible if its
1668 structure changes significantly, in a way that is not well understood.
1669 Thus we only handle few important special cases, in which also updating
1670 of the loop-carried information should be feasible:
1672 1) Propagation of latch edge to a block that dominates the latch block
1673 of a loop. This aims to handle the following idiom:
1684 After threading the latch edge, this becomes
1695 The original header of the loop is moved out of it, and we may thread
1696 the remaining edges through it without further constraints.
1698 2) All entry edges are propagated to a single basic block that dominates
1699 the latch block of the loop. This aims to handle the following idiom
1700 (normally created for "for" loops):
1723 /* Threading through the header won't improve the code if the header has just
1725 if (single_succ_p (header
))
1728 if (!may_peel_loop_headers
&& !redirection_block_p (loop
->header
))
1734 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1741 /* If latch is not threaded, and there is a header
1742 edge that is not threaded, we would create loop
1743 with multiple entries. */
1747 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1749 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1751 tgt_edge
= (*path
)[1]->e
;
1752 atgt_bb
= tgt_edge
->dest
;
1755 /* Two targets of threading would make us create loop
1756 with multiple entries. */
1757 else if (tgt_bb
!= atgt_bb
)
1763 /* There are no threading requests. */
1767 /* Redirecting to empty loop latch is useless. */
1768 if (tgt_bb
== loop
->latch
1769 && empty_block_p (loop
->latch
))
1773 /* The target block must dominate the loop latch, otherwise we would be
1774 creating a subloop. */
1775 domst
= determine_bb_domination_status (loop
, tgt_bb
);
1776 if (domst
== DOMST_NONDOMINATING
)
1778 if (domst
== DOMST_LOOP_BROKEN
)
1780 /* If the loop ceased to exist, mark it as such, and thread through its
1782 mark_loop_for_removal (loop
);
1783 return thread_block (header
, false);
1786 if (tgt_bb
->loop_father
->header
== tgt_bb
)
1788 /* If the target of the threading is a header of a subloop, we need
1789 to create a preheader for it, so that the headers of the two loops
1791 if (EDGE_COUNT (tgt_bb
->preds
) > 2)
1793 tgt_bb
= create_preheader (tgt_bb
->loop_father
, 0);
1794 gcc_assert (tgt_bb
!= NULL
);
1797 tgt_bb
= split_edge (tgt_edge
);
1800 basic_block new_preheader
;
1802 /* Now consider the case entry edges are redirected to the new entry
1803 block. Remember one entry edge, so that we can find the new
1804 preheader (its destination after threading). */
1805 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1811 /* The duplicate of the header is the new preheader of the loop. Ensure
1812 that it is placed correctly in the loop hierarchy. */
1813 set_loop_copy (loop
, loop_outer (loop
));
1815 thread_block (header
, false);
1816 set_loop_copy (loop
, NULL
);
1817 new_preheader
= e
->dest
;
1819 /* Create the new latch block. This is always necessary, as the latch
1820 must have only a single successor, but the original header had at
1821 least two successors. */
1823 mfb_kj_edge
= single_succ_edge (new_preheader
);
1824 loop
->header
= mfb_kj_edge
->dest
;
1825 latch
= make_forwarder_block (tgt_bb
, mfb_keep_just
, NULL
);
1826 loop
->header
= latch
->dest
;
1827 loop
->latch
= latch
->src
;
1831 /* We failed to thread anything. Cancel the requests. */
1832 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1834 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1838 delete_jump_thread_path (path
);
1845 /* E1 and E2 are edges into the same basic block. Return TRUE if the
1846 PHI arguments associated with those edges are equal or there are no
1847 PHI arguments, otherwise return FALSE. */
1850 phi_args_equal_on_edges (edge e1
, edge e2
)
1853 int indx1
= e1
->dest_idx
;
1854 int indx2
= e2
->dest_idx
;
1856 for (gsi
= gsi_start_phis (e1
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1858 gphi
*phi
= gsi
.phi ();
1860 if (!operand_equal_p (gimple_phi_arg_def (phi
, indx1
),
1861 gimple_phi_arg_def (phi
, indx2
), 0))
1867 /* Walk through the registered jump threads and convert them into a
1868 form convenient for this pass.
1870 Any block which has incoming edges threaded to outgoing edges
1871 will have its entry in THREADED_BLOCK set.
1873 Any threaded edge will have its new outgoing edge stored in the
1874 original edge's AUX field.
1876 This form avoids the need to walk all the edges in the CFG to
1877 discover blocks which need processing and avoids unnecessary
1878 hash table lookups to map from threaded edge to new target. */
1881 mark_threaded_blocks (bitmap threaded_blocks
)
1890 /* It is possible to have jump threads in which one is a subpath
1891 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1892 block and (B, C), (C, D) where no joiner block exists.
1894 When this occurs ignore the jump thread request with the joiner
1895 block. It's totally subsumed by the simpler jump thread request.
1897 This results in less block copying, simpler CFGs. More importantly,
1898 when we duplicate the joiner block, B, in this case we will create
1899 a new threading opportunity that we wouldn't be able to optimize
1900 until the next jump threading iteration.
1902 So first convert the jump thread requests which do not require a
1904 for (i
= 0; i
< paths
.length (); i
++)
1906 vec
<jump_thread_edge
*> *path
= paths
[i
];
1908 if ((*path
)[1]->type
!= EDGE_COPY_SRC_JOINER_BLOCK
)
1910 edge e
= (*path
)[0]->e
;
1911 e
->aux
= (void *)path
;
1912 bitmap_set_bit (tmp
, e
->dest
->index
);
1916 /* Now iterate again, converting cases where we want to thread
1917 through a joiner block, but only if no other edge on the path
1918 already has a jump thread attached to it. We do this in two passes,
1919 to avoid situations where the order in the paths vec can hide overlapping
1920 threads (the path is recorded on the incoming edge, so we would miss
1921 cases where the second path starts at a downstream edge on the same
1922 path). First record all joiner paths, deleting any in the unexpected
1923 case where there is already a path for that incoming edge. */
1924 for (i
= 0; i
< paths
.length ();)
1926 vec
<jump_thread_edge
*> *path
= paths
[i
];
1928 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1930 /* Attach the path to the starting edge if none is yet recorded. */
1931 if ((*path
)[0]->e
->aux
== NULL
)
1933 (*path
)[0]->e
->aux
= path
;
1938 paths
.unordered_remove (i
);
1939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1940 dump_jump_thread_path (dump_file
, *path
, false);
1941 delete_jump_thread_path (path
);
1950 /* Second, look for paths that have any other jump thread attached to
1951 them, and either finish converting them or cancel them. */
1952 for (i
= 0; i
< paths
.length ();)
1954 vec
<jump_thread_edge
*> *path
= paths
[i
];
1955 edge e
= (*path
)[0]->e
;
1957 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& e
->aux
== path
)
1960 for (j
= 1; j
< path
->length (); j
++)
1961 if ((*path
)[j
]->e
->aux
!= NULL
)
1964 /* If we iterated through the entire path without exiting the loop,
1965 then we are good to go, record it. */
1966 if (j
== path
->length ())
1968 bitmap_set_bit (tmp
, e
->dest
->index
);
1974 paths
.unordered_remove (i
);
1975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1976 dump_jump_thread_path (dump_file
, *path
, false);
1977 delete_jump_thread_path (path
);
1986 /* If optimizing for size, only thread through block if we don't have
1987 to duplicate it or it's an otherwise empty redirection block. */
1988 if (optimize_function_for_size_p (cfun
))
1990 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
1992 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
1993 if (EDGE_COUNT (bb
->preds
) > 1
1994 && !redirection_block_p (bb
))
1996 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2000 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2001 delete_jump_thread_path (path
);
2007 bitmap_set_bit (threaded_blocks
, i
);
2011 bitmap_copy (threaded_blocks
, tmp
);
2013 /* If we have a joiner block (J) which has two successors S1 and S2 and
2014 we are threading though S1 and the final destination of the thread
2015 is S2, then we must verify that any PHI nodes in S2 have the same
2016 PHI arguments for the edge J->S2 and J->S1->...->S2.
2018 We used to detect this prior to registering the jump thread, but
2019 that prohibits propagation of edge equivalences into non-dominated
2020 PHI nodes as the equivalency test might occur before propagation.
2022 This must also occur after we truncate any jump threading paths
2023 as this scenario may only show up after truncation.
2025 This works for now, but will need improvement as part of the FSA
2028 Note since we've moved the thread request data to the edges,
2029 we have to iterate on those rather than the threaded_edges vector. */
2030 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2032 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2033 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2037 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2038 bool have_joiner
= ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
);
2042 basic_block joiner
= e
->dest
;
2043 edge final_edge
= path
->last ()->e
;
2044 basic_block final_dest
= final_edge
->dest
;
2045 edge e2
= find_edge (joiner
, final_dest
);
2047 if (e2
&& !phi_args_equal_on_edges (e2
, final_edge
))
2049 delete_jump_thread_path (path
);
2057 /* Look for jump threading paths which cross multiple loop headers.
2059 The code to thread through loop headers will change the CFG in ways
2060 that invalidate the cached loop iteration information. So we must
2061 detect that case and wipe the cached information. */
2062 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2064 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2065 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2069 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2071 for (unsigned int i
= 0, crossed_headers
= 0;
2072 i
< path
->length ();
2075 basic_block dest
= (*path
)[i
]->e
->dest
;
2076 basic_block src
= (*path
)[i
]->e
->src
;
2077 /* If we enter a loop. */
2078 if (flow_loop_nested_p (src
->loop_father
, dest
->loop_father
))
2080 /* If we step from a block outside an irreducible region
2081 to a block inside an irreducible region, then we have
2082 crossed into a loop. */
2083 else if (! (src
->flags
& BB_IRREDUCIBLE_LOOP
)
2084 && (dest
->flags
& BB_IRREDUCIBLE_LOOP
))
2086 if (crossed_headers
> 1)
2088 vect_free_loop_info_assumptions
2089 ((*path
)[path
->length () - 1]->e
->dest
->loop_father
);
2099 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2100 case of SEME Single Entry Multiple Exits region in which all nodes in the
2101 REGION have exactly one incoming edge. The only exception is the first block
2102 that may not have been connected to the rest of the cfg yet. */
2105 verify_jump_thread (basic_block
*region
, unsigned n_region
)
2107 for (unsigned i
= 0; i
< n_region
; i
++)
2108 gcc_assert (EDGE_COUNT (region
[i
]->preds
) <= 1);
2111 /* Return true when BB is one of the first N items in BBS. */
2114 bb_in_bbs (basic_block bb
, basic_block
*bbs
, int n
)
2116 for (int i
= 0; i
< n
; i
++)
2123 /* Duplicates a jump-thread path of N_REGION basic blocks.
2124 The ENTRY edge is redirected to the duplicate of the region.
2126 Remove the last conditional statement in the last basic block in the REGION,
2127 and create a single fallthru edge pointing to the same destination as the
2130 Returns false if it is unable to copy the region, true otherwise. */
2133 duplicate_thread_path (edge entry
, edge exit
, basic_block
*region
,
2137 struct loop
*loop
= entry
->dest
->loop_father
;
2141 profile_count curr_count
;
2143 if (!can_copy_bbs_p (region
, n_region
))
2146 /* Some sanity checking. Note that we do not check for all possible
2147 missuses of the functions. I.e. if you ask to copy something weird,
2148 it will work, but the state of structures probably will not be
2150 for (i
= 0; i
< n_region
; i
++)
2152 /* We do not handle subloops, i.e. all the blocks must belong to the
2154 if (region
[i
]->loop_father
!= loop
)
2158 initialize_original_copy_tables ();
2160 set_loop_copy (loop
, loop
);
2162 basic_block
*region_copy
= XNEWVEC (basic_block
, n_region
);
2163 copy_bbs (region
, n_region
, region_copy
, &exit
, 1, &exit_copy
, loop
,
2164 split_edge_bb_loc (entry
), false);
2166 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2167 following code ensures that all the edges exiting the jump-thread path are
2168 redirected back to the original code: these edges are exceptions
2169 invalidating the property that is propagated by executing all the blocks of
2170 the jump-thread path in order. */
2172 curr_count
= entry
->count ();
2173 curr_freq
= EDGE_FREQUENCY (entry
);
2175 for (i
= 0; i
< n_region
; i
++)
2179 basic_block bb
= region_copy
[i
];
2181 /* Watch inconsistent profile. */
2182 if (curr_count
> region
[i
]->count
)
2183 curr_count
= region
[i
]->count
;
2184 if (curr_freq
> region
[i
]->frequency
)
2185 curr_freq
= region
[i
]->frequency
;
2186 /* Scale current BB. */
2187 if (region
[i
]->count
> 0 && curr_count
.initialized_p ())
2189 /* In the middle of the path we only scale the frequencies.
2190 In last BB we need to update probabilities of outgoing edges
2191 because we know which one is taken at the threaded path. */
2192 if (i
+ 1 != n_region
)
2193 scale_bbs_frequencies_profile_count (region
+ i
, 1,
2194 region
[i
]->count
- curr_count
,
2197 update_bb_profile_for_threading (region
[i
],
2198 curr_freq
, curr_count
,
2200 scale_bbs_frequencies_profile_count (region_copy
+ i
, 1, curr_count
,
2201 region_copy
[i
]->count
);
2203 else if (region
[i
]->frequency
)
2205 if (i
+ 1 != n_region
)
2206 scale_bbs_frequencies_int (region
+ i
, 1,
2207 region
[i
]->frequency
- curr_freq
,
2208 region
[i
]->frequency
);
2210 update_bb_profile_for_threading (region
[i
],
2211 curr_freq
, curr_count
,
2213 scale_bbs_frequencies_int (region_copy
+ i
, 1, curr_freq
,
2214 region_copy
[i
]->frequency
);
2217 if (single_succ_p (bb
))
2219 /* Make sure the successor is the next node in the path. */
2220 gcc_assert (i
+ 1 == n_region
2221 || region_copy
[i
+ 1] == single_succ_edge (bb
)->dest
);
2222 if (i
+ 1 != n_region
)
2224 curr_freq
= EDGE_FREQUENCY (single_succ_edge (bb
));
2225 curr_count
= single_succ_edge (bb
)->count ();
2230 /* Special case the last block on the path: make sure that it does not
2231 jump back on the copied path, including back to itself. */
2232 if (i
+ 1 == n_region
)
2234 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2235 if (bb_in_bbs (e
->dest
, region_copy
, n_region
))
2237 basic_block orig
= get_bb_original (e
->dest
);
2239 redirect_edge_and_branch_force (e
, orig
);
2244 /* Redirect all other edges jumping to non-adjacent blocks back to the
2246 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2247 if (region_copy
[i
+ 1] != e
->dest
)
2249 basic_block orig
= get_bb_original (e
->dest
);
2251 redirect_edge_and_branch_force (e
, orig
);
2255 curr_freq
= EDGE_FREQUENCY (e
);
2256 curr_count
= e
->count ();
2262 verify_jump_thread (region_copy
, n_region
);
2264 /* Remove the last branch in the jump thread path. */
2265 remove_ctrl_stmt_and_useless_edges (region_copy
[n_region
- 1], exit
->dest
);
2267 /* And fixup the flags on the single remaining edge. */
2268 edge fix_e
= find_edge (region_copy
[n_region
- 1], exit
->dest
);
2269 fix_e
->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
| EDGE_ABNORMAL
);
2270 fix_e
->flags
|= EDGE_FALLTHRU
;
2272 edge e
= make_edge (region_copy
[n_region
- 1], exit
->dest
, EDGE_FALLTHRU
);
2276 rescan_loop_exit (e
, true, false);
2277 e
->probability
= profile_probability::always ();
2280 /* Redirect the entry and add the phi node arguments. */
2281 if (entry
->dest
== loop
->header
)
2282 mark_loop_for_removal (loop
);
2283 redirected
= redirect_edge_and_branch (entry
, get_bb_copy (entry
->dest
));
2284 gcc_assert (redirected
!= NULL
);
2285 flush_pending_stmts (entry
);
2287 /* Add the other PHI node arguments. */
2288 add_phi_args_after_copy (region_copy
, n_region
, NULL
);
2292 free_original_copy_tables ();
2296 /* Return true when PATH is a valid jump-thread path. */
2299 valid_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2301 unsigned len
= path
->length ();
2303 /* Check that the path is connected. */
2304 for (unsigned int j
= 0; j
< len
- 1; j
++)
2306 edge e
= (*path
)[j
]->e
;
2307 if (e
->dest
!= (*path
)[j
+1]->e
->src
)
2313 /* Remove any queued jump threads that include edge E.
2315 We don't actually remove them here, just record the edges into ax
2316 hash table. That way we can do the search once per iteration of
2317 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2320 remove_jump_threads_including (edge_def
*e
)
2322 if (!paths
.exists ())
2326 removed_edges
= new hash_table
<struct removed_edges
> (17);
2328 edge
*slot
= removed_edges
->find_slot (e
, INSERT
);
2332 /* Walk through all blocks and thread incoming edges to the appropriate
2333 outgoing edge for each edge pair recorded in THREADED_EDGES.
2335 It is the caller's responsibility to fix the dominance information
2336 and rewrite duplicated SSA_NAMEs back into SSA form.
2338 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2339 loop headers if it does not simplify the loop.
2341 Returns true if one or more edges were threaded, false otherwise. */
2344 thread_through_all_blocks (bool may_peel_loop_headers
)
2346 bool retval
= false;
2350 auto_bitmap threaded_blocks
;
2352 if (!paths
.exists ())
2358 memset (&thread_stats
, 0, sizeof (thread_stats
));
2360 /* Remove any paths that referenced removed edges. */
2362 for (i
= 0; i
< paths
.length (); )
2365 vec
<jump_thread_edge
*> *path
= paths
[i
];
2367 for (j
= 0; j
< path
->length (); j
++)
2369 edge e
= (*path
)[j
]->e
;
2370 if (removed_edges
->find_slot (e
, NO_INSERT
))
2374 if (j
!= path
->length ())
2376 delete_jump_thread_path (path
);
2377 paths
.unordered_remove (i
);
2383 /* Jump-thread all FSM threads before other jump-threads. */
2384 for (i
= 0; i
< paths
.length ();)
2386 vec
<jump_thread_edge
*> *path
= paths
[i
];
2387 edge entry
= (*path
)[0]->e
;
2389 /* Only code-generate FSM jump-threads in this loop. */
2390 if ((*path
)[0]->type
!= EDGE_FSM_THREAD
)
2396 /* Do not jump-thread twice from the same block. */
2397 if (bitmap_bit_p (threaded_blocks
, entry
->src
->index
)
2398 /* We may not want to realize this jump thread path
2399 for various reasons. So check it first. */
2400 || !valid_jump_thread_path (path
))
2402 /* Remove invalid FSM jump-thread paths. */
2403 delete_jump_thread_path (path
);
2404 paths
.unordered_remove (i
);
2408 unsigned len
= path
->length ();
2409 edge exit
= (*path
)[len
- 1]->e
;
2410 basic_block
*region
= XNEWVEC (basic_block
, len
- 1);
2412 for (unsigned int j
= 0; j
< len
- 1; j
++)
2413 region
[j
] = (*path
)[j
]->e
->dest
;
2415 if (duplicate_thread_path (entry
, exit
, region
, len
- 1))
2417 /* We do not update dominance info. */
2418 free_dominance_info (CDI_DOMINATORS
);
2419 bitmap_set_bit (threaded_blocks
, entry
->src
->index
);
2421 thread_stats
.num_threaded_edges
++;
2424 delete_jump_thread_path (path
);
2425 paths
.unordered_remove (i
);
2429 /* Remove from PATHS all the jump-threads starting with an edge already
2431 for (i
= 0; i
< paths
.length ();)
2433 vec
<jump_thread_edge
*> *path
= paths
[i
];
2434 edge entry
= (*path
)[0]->e
;
2436 /* Do not jump-thread twice from the same block. */
2437 if (bitmap_bit_p (threaded_blocks
, entry
->src
->index
))
2439 delete_jump_thread_path (path
);
2440 paths
.unordered_remove (i
);
2446 bitmap_clear (threaded_blocks
);
2448 mark_threaded_blocks (threaded_blocks
);
2450 initialize_original_copy_tables ();
2452 /* First perform the threading requests that do not affect
2454 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks
, 0, i
, bi
)
2456 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2458 if (EDGE_COUNT (bb
->preds
) > 0)
2459 retval
|= thread_block (bb
, true);
2462 /* Then perform the threading through loop headers. We start with the
2463 innermost loop, so that the changes in cfg we perform won't affect
2464 further threading. */
2465 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
2468 || !bitmap_bit_p (threaded_blocks
, loop
->header
->index
))
2471 retval
|= thread_through_loop_header (loop
, may_peel_loop_headers
);
2474 /* All jump threading paths should have been resolved at this
2475 point. Verify that is the case. */
2477 FOR_EACH_BB_FN (bb
, cfun
)
2481 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2482 gcc_assert (e
->aux
== NULL
);
2485 statistics_counter_event (cfun
, "Jumps threaded",
2486 thread_stats
.num_threaded_edges
);
2488 free_original_copy_tables ();
2493 loops_state_set (LOOPS_NEED_FIXUP
);
2496 delete removed_edges
;
2497 removed_edges
= NULL
;
2501 /* Delete the jump threading path PATH. We have to explicitly delete
2502 each entry in the vector, then the container. */
2505 delete_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2507 for (unsigned int i
= 0; i
< path
->length (); i
++)
2513 /* Register a jump threading opportunity. We queue up all the jump
2514 threading opportunities discovered by a pass and update the CFG
2515 and SSA form all at once.
2517 E is the edge we can thread, E2 is the new target edge, i.e., we
2518 are effectively recording that E->dest can be changed to E2->dest
2519 after fixing the SSA graph. */
2522 register_jump_thread (vec
<jump_thread_edge
*> *path
)
2524 if (!dbg_cnt (registered_jump_thread
))
2526 delete_jump_thread_path (path
);
2530 /* First make sure there are no NULL outgoing edges on the jump threading
2531 path. That can happen for jumping to a constant address. */
2532 for (unsigned int i
= 0; i
< path
->length (); i
++)
2534 if ((*path
)[i
]->e
== NULL
)
2536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2539 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2540 dump_jump_thread_path (dump_file
, *path
, false);
2543 delete_jump_thread_path (path
);
2547 /* Only the FSM threader is allowed to thread across
2548 backedges in the CFG. */
2550 && (*path
)[0]->type
!= EDGE_FSM_THREAD
)
2551 gcc_assert (((*path
)[i
]->e
->flags
& EDGE_DFS_BACK
) == 0);
2554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2555 dump_jump_thread_path (dump_file
, *path
, true);
2557 if (!paths
.exists ())
2560 paths
.safe_push (path
);