1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "fold-const.h"
31 #include "gimple-iterator.h"
33 #include "tree-ssa-threadupdate.h"
37 #include "tree-vectorizer.h"
38 #include "tree-pass.h"
40 /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 one or more in-edges to B to instead reach the destination of an
42 out-edge from B while preserving any side effects in B.
44 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 side effects of executing B.
47 1. Make a copy of B (including its outgoing edges and statements). Call
48 the copy B'. Note B' has no incoming edges or PHIs at this time.
50 2. Remove the control statement at the end of B' and all outgoing edges
53 3. Add a new argument to each PHI in C with the same value as the existing
54 argument associated with edge B->C. Associate the new PHI arguments
57 4. For each PHI in B, find or create a PHI in B' with an identical
58 PHI_RESULT. Add an argument to the PHI in B' which has the same
59 value as the PHI in B associated with the edge A->B. Associate
60 the new argument in the PHI in B' with the edge A->B.
62 5. Change the edge A->B to A->B'.
64 5a. This automatically deletes any PHI arguments associated with the
67 5b. This automatically associates each new argument added in step 4
70 6. Repeat for other incoming edges into B.
72 7. Put the duplicated resources in B and all the B' blocks into SSA form.
74 Note that block duplication can be minimized by first collecting the
75 set of unique destination blocks that the incoming edges should
78 We reduce the number of edges and statements we create by not copying all
79 the outgoing edges and the control statement in step #1. We instead create
80 a template block without the outgoing edges and duplicate the template.
82 Another case this code handles is threading through a "joiner" block. In
83 this case, we do not know the destination of the joiner block, but one
84 of the outgoing edges from the joiner block leads to a threadable path. This
85 case largely works as outlined above, except the duplicate of the joiner
86 block still contains a full set of outgoing edges and its control statement.
87 We just redirect one of its outgoing edges to our jump threading path. */
90 /* Steps #5 and #6 of the above algorithm are best implemented by walking
91 all the incoming edges which thread to the same destination edge at
92 the same time. That avoids lots of table lookups to get information
93 for the destination edge.
95 To realize that implementation we create a list of incoming edges
96 which thread to the same outgoing edge. Thus to implement steps
97 #5 and #6 we traverse our hash table of outgoing edge information.
98 For each entry we walk the list of incoming edges which thread to
99 the current outgoing edge. */
107 /* Main data structure recording information regarding B's duplicate
110 /* We need to efficiently record the unique thread destinations of this
111 block and specific information associated with those destinations. We
112 may have many incoming edges threaded to the same outgoing edge. This
113 can be naturally implemented with a hash table. */
115 struct redirection_data
: free_ptr_hash
<redirection_data
>
117 /* We support wiring up two block duplicates in a jump threading path.
119 One is a normal block copy where we remove the control statement
120 and wire up its single remaining outgoing edge to the thread path.
122 The other is a joiner block where we leave the control statement
123 in place, but wire one of the outgoing edges to a thread path.
125 In theory we could have multiple block duplicates in a jump
126 threading path, but I haven't tried that.
128 The duplicate blocks appear in this array in the same order in
129 which they appear in the jump thread path. */
130 basic_block dup_blocks
[2];
132 vec
<jump_thread_edge
*> *path
;
134 /* A list of incoming edges which we want to thread to the
136 struct el
*incoming_edges
;
138 /* hash_table support. */
139 static inline hashval_t
hash (const redirection_data
*);
140 static inline int equal (const redirection_data
*, const redirection_data
*);
143 jump_thread_path_allocator::jump_thread_path_allocator ()
145 obstack_init (&m_obstack
);
148 jump_thread_path_allocator::~jump_thread_path_allocator ()
150 obstack_free (&m_obstack
, NULL
);
154 jump_thread_path_allocator::allocate_thread_edge (edge e
,
155 jump_thread_edge_type type
)
157 void *r
= obstack_alloc (&m_obstack
, sizeof (jump_thread_edge
));
158 return new (r
) jump_thread_edge (e
, type
);
161 vec
<jump_thread_edge
*> *
162 jump_thread_path_allocator::allocate_thread_path ()
164 // ?? Since the paths live in an obstack, we should be able to remove all
165 // references to path->release() throughout the code.
166 void *r
= obstack_alloc (&m_obstack
, sizeof (vec
<jump_thread_edge
*>));
167 return new (r
) vec
<jump_thread_edge
*> ();
170 jt_path_registry::jt_path_registry (bool backedge_threads
)
173 m_num_threaded_edges
= 0;
174 m_backedge_threads
= backedge_threads
;
177 jt_path_registry::~jt_path_registry ()
182 fwd_jt_path_registry::fwd_jt_path_registry ()
183 : jt_path_registry (/*backedge_threads=*/false)
185 m_removed_edges
= new hash_table
<struct removed_edges
> (17);
186 m_redirection_data
= NULL
;
189 fwd_jt_path_registry::~fwd_jt_path_registry ()
191 delete m_removed_edges
;
194 back_jt_path_registry::back_jt_path_registry ()
195 : jt_path_registry (/*backedge_threads=*/true)
200 jt_path_registry::push_edge (vec
<jump_thread_edge
*> *path
,
201 edge e
, jump_thread_edge_type type
)
203 jump_thread_edge
*x
= m_allocator
.allocate_thread_edge (e
, type
);
207 vec
<jump_thread_edge
*> *
208 jt_path_registry::allocate_thread_path ()
210 return m_allocator
.allocate_thread_path ();
213 /* Dump a jump threading path, including annotations about each
217 dump_jump_thread_path (FILE *dump_file
,
218 const vec
<jump_thread_edge
*> &path
,
223 " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 dbg_cnt_counter (registered_jump_thread
),
225 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
228 " Cancelling jump thread: (%d, %d) incoming edge; ",
229 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
231 for (unsigned int i
= 1; i
< path
.length (); i
++)
233 /* We can get paths with a NULL edge when the final destination
234 of a jump thread turns out to be a constant address. We dump
235 those paths when debugging, so we have to be prepared for that
237 if (path
[i
]->e
== NULL
)
240 fprintf (dump_file
, " (%d, %d) ",
241 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
242 switch (path
[i
]->type
)
244 case EDGE_COPY_SRC_JOINER_BLOCK
:
245 fprintf (dump_file
, "joiner");
247 case EDGE_COPY_SRC_BLOCK
:
248 fprintf (dump_file
, "normal");
250 case EDGE_NO_COPY_SRC_BLOCK
:
251 fprintf (dump_file
, "nocopy");
257 if ((path
[i
]->e
->flags
& EDGE_DFS_BACK
) != 0)
258 fprintf (dump_file
, " (back)");
260 fprintf (dump_file
, "; \n");
264 debug (const vec
<jump_thread_edge
*> &path
)
266 dump_jump_thread_path (stderr
, path
, true);
270 debug (const vec
<jump_thread_edge
*> *path
)
275 /* Release the memory associated with PATH, and if dumping is enabled,
276 dump out the reason why the thread was canceled. */
279 cancel_thread (vec
<jump_thread_edge
*> *path
, const char *reason
= NULL
)
281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
284 fprintf (dump_file
, "%s: ", reason
);
286 dump_jump_thread_path (dump_file
, *path
, false);
287 fprintf (dump_file
, "\n");
292 /* Simple hashing function. For any given incoming edge E, we're going
293 to be most concerned with the final destination of its jump thread
294 path. So hash on the block index of the final edge in the path. */
297 redirection_data::hash (const redirection_data
*p
)
299 vec
<jump_thread_edge
*> *path
= p
->path
;
300 return path
->last ()->e
->dest
->index
;
303 /* Given two hash table entries, return true if they have the same
304 jump threading path. */
306 redirection_data::equal (const redirection_data
*p1
, const redirection_data
*p2
)
308 vec
<jump_thread_edge
*> *path1
= p1
->path
;
309 vec
<jump_thread_edge
*> *path2
= p2
->path
;
311 if (path1
->length () != path2
->length ())
314 for (unsigned int i
= 1; i
< path1
->length (); i
++)
316 if ((*path1
)[i
]->type
!= (*path2
)[i
]->type
317 || (*path1
)[i
]->e
!= (*path2
)[i
]->e
)
324 /* Data structure of information to pass to hash table traversal routines. */
325 struct ssa_local_info_t
327 /* The current block we are working on. */
330 /* We only create a template block for the first duplicated block in a
331 jump threading path as we may need many duplicates of that block.
333 The second duplicate block in a path is specific to that path. Creating
334 and sharing a template for that block is considerably more difficult. */
335 basic_block template_block
;
337 /* If we append debug stmts to the template block after creating it,
338 this iterator won't be the last one in the block, and further
339 copies of the template block shouldn't get debug stmts after
341 gimple_stmt_iterator template_last_to_copy
;
343 /* Blocks duplicated for the thread. */
344 bitmap duplicate_blocks
;
346 /* TRUE if we thread one or more jumps, FALSE otherwise. */
349 /* When we have multiple paths through a joiner which reach different
350 final destinations, then we may need to correct for potential
351 profile insanities. */
352 bool need_profile_correction
;
354 // Jump threading statistics.
355 unsigned long num_threaded_edges
;
358 /* When we start updating the CFG for threading, data necessary for jump
359 threading is attached to the AUX field for the incoming edge. Use these
360 macros to access the underlying structure attached to the AUX field. */
361 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
363 /* Remove the last statement in block BB if it is a control statement
364 Also remove all outgoing edges except the edge which reaches DEST_BB.
365 If DEST_BB is NULL, then remove all outgoing edges. */
368 remove_ctrl_stmt_and_useless_edges (basic_block bb
, basic_block dest_bb
)
370 gimple_stmt_iterator gsi
;
374 gsi
= gsi_last_bb (bb
);
376 /* If the duplicate ends with a control statement, then remove it.
378 Note that if we are duplicating the template block rather than the
379 original basic block, then the duplicate might not have any real
383 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
384 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
385 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
))
386 gsi_remove (&gsi
, true);
388 for (ei
= ei_start (bb
->succs
); (e
= ei_safe_edge (ei
)); )
390 if (e
->dest
!= dest_bb
)
392 free_dom_edge_info (e
);
397 e
->probability
= profile_probability::always ();
402 /* If the remaining edge is a loop exit, there must have
403 a removed edge that was not a loop exit.
405 In that case BB and possibly other blocks were previously
406 in the loop, but are now outside the loop. Thus, we need
407 to update the loop structures. */
408 if (single_succ_p (bb
)
409 && loop_outer (bb
->loop_father
)
410 && loop_exit_edge_p (bb
->loop_father
, single_succ_edge (bb
)))
411 loops_state_set (LOOPS_NEED_FIXUP
);
414 /* Create a duplicate of BB. Record the duplicate block in an array
415 indexed by COUNT stored in RD. */
418 create_block_for_threading (basic_block bb
,
419 struct redirection_data
*rd
,
421 bitmap
*duplicate_blocks
)
426 /* We can use the generic block duplication code and simply remove
427 the stuff we do not need. */
428 rd
->dup_blocks
[count
] = duplicate_block (bb
, NULL
, NULL
);
430 FOR_EACH_EDGE (e
, ei
, rd
->dup_blocks
[count
]->succs
)
434 /* If we duplicate a block with an outgoing edge marked as
435 EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 leak out of the current pass.
438 It would be better to simplify switch statements and remove
439 the edges before we get here, but the sequencing is nontrivial. */
440 e
->flags
&= ~EDGE_IGNORE
;
443 /* Zero out the profile, since the block is unreachable for now. */
444 rd
->dup_blocks
[count
]->count
= profile_count::uninitialized ();
445 if (duplicate_blocks
)
446 bitmap_set_bit (*duplicate_blocks
, rd
->dup_blocks
[count
]->index
);
449 /* Given an outgoing edge E lookup and return its entry in our hash table.
451 If INSERT is true, then we insert the entry into the hash table if
452 it is not already present. INCOMING_EDGE is added to the list of incoming
453 edges associated with E in the hash table. */
456 fwd_jt_path_registry::lookup_redirection_data (edge e
, insert_option insert
)
458 struct redirection_data
**slot
;
459 struct redirection_data
*elt
;
460 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
462 /* Build a hash table element so we can see if E is already
464 elt
= XNEW (struct redirection_data
);
466 elt
->dup_blocks
[0] = NULL
;
467 elt
->dup_blocks
[1] = NULL
;
468 elt
->incoming_edges
= NULL
;
470 slot
= m_redirection_data
->find_slot (elt
, insert
);
472 /* This will only happen if INSERT is false and the entry is not
473 in the hash table. */
480 /* This will only happen if E was not in the hash table and
485 elt
->incoming_edges
= XNEW (struct el
);
486 elt
->incoming_edges
->e
= e
;
487 elt
->incoming_edges
->next
= NULL
;
490 /* E was in the hash table. */
493 /* Free ELT as we do not need it anymore, we will extract the
494 relevant entry from the hash table itself. */
497 /* Get the entry stored in the hash table. */
500 /* If insertion was requested, then we need to add INCOMING_EDGE
501 to the list of incoming edges associated with E. */
504 struct el
*el
= XNEW (struct el
);
505 el
->next
= elt
->incoming_edges
;
507 elt
->incoming_edges
= el
;
514 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 to see if it has constant value in a flow sensitive manner. Set
516 LOCUS to location of the constant phi arg and return the value.
517 Return DEF directly if either PATH or idx is ZERO. */
520 get_value_locus_in_path (tree def
, vec
<jump_thread_edge
*> *path
,
521 basic_block bb
, int idx
, location_t
*locus
)
527 if (path
== NULL
|| idx
== 0)
530 def_phi
= dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (def
));
534 def_bb
= gimple_bb (def_phi
);
535 /* Don't propagate loop invariants into deeper loops. */
536 if (!def_bb
|| bb_loop_depth (def_bb
) < bb_loop_depth (bb
))
539 /* Backtrack jump threading path from IDX to see if def has constant
541 for (int j
= idx
- 1; j
>= 0; j
--)
543 edge e
= (*path
)[j
]->e
;
544 if (e
->dest
== def_bb
)
546 arg
= gimple_phi_arg_def (def_phi
, e
->dest_idx
);
547 if (is_gimple_min_invariant (arg
))
549 *locus
= gimple_phi_arg_location (def_phi
, e
->dest_idx
);
559 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 Try to backtrack jump threading PATH from node IDX to see if the arg
561 has constant value, copy constant value instead of argument itself
565 copy_phi_args (basic_block bb
, edge src_e
, edge tgt_e
,
566 vec
<jump_thread_edge
*> *path
, int idx
)
569 int src_indx
= src_e
->dest_idx
;
571 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
573 gphi
*phi
= gsi
.phi ();
574 tree def
= gimple_phi_arg_def (phi
, src_indx
);
575 location_t locus
= gimple_phi_arg_location (phi
, src_indx
);
577 if (TREE_CODE (def
) == SSA_NAME
578 && !virtual_operand_p (gimple_phi_result (phi
)))
579 def
= get_value_locus_in_path (def
, path
, bb
, idx
, &locus
);
581 add_phi_arg (phi
, def
, tgt_e
, locus
);
585 /* We have recently made a copy of ORIG_BB, including its outgoing
586 edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 ORIG_BB has a new argument associated with edge from NEW_BB to the
588 successor. Initialize the PHI argument so that it is equal to the PHI
589 argument associated with the edge from ORIG_BB to the successor.
590 PATH and IDX are used to check if the new PHI argument has constant
591 value in a flow sensitive manner. */
594 update_destination_phis (basic_block orig_bb
, basic_block new_bb
,
595 vec
<jump_thread_edge
*> *path
, int idx
)
600 FOR_EACH_EDGE (e
, ei
, orig_bb
->succs
)
602 edge e2
= find_edge (new_bb
, e
->dest
);
603 copy_phi_args (e
->dest
, e
, e2
, path
, idx
);
607 /* Given a duplicate block and its single destination (both stored
608 in RD). Create an edge between the duplicate and its single
611 Add an additional argument to any PHI nodes at the single
612 destination. IDX is the start node in jump threading path
613 we start to check to see if the new PHI argument has constant
614 value along the jump threading path. */
617 create_edge_and_update_destination_phis (struct redirection_data
*rd
,
618 basic_block bb
, int idx
)
620 edge e
= make_single_succ_edge (bb
, rd
->path
->last ()->e
->dest
, EDGE_FALLTHRU
);
622 rescan_loop_exit (e
, true, false);
624 /* We used to copy the thread path here. That was added in 2007
625 and dutifully updated through the representation changes in 2013.
627 In 2013 we added code to thread from an interior node through
628 the backedge to another interior node. That runs after the code
629 to thread through loop headers from outside the loop.
631 The latter may delete edges in the CFG, including those
632 which appeared in the jump threading path we copied here. Thus
633 we'd end up using a dangling pointer.
635 After reviewing the 2007/2011 code, I can't see how anything
636 depended on copying the AUX field and clearly copying the jump
637 threading path is problematical due to embedded edge pointers.
638 It has been removed. */
641 /* If there are any PHI nodes at the destination of the outgoing edge
642 from the duplicate block, then we will need to add a new argument
643 to them. The argument should have the same value as the argument
644 associated with the outgoing edge stored in RD. */
645 copy_phi_args (e
->dest
, rd
->path
->last ()->e
, e
, rd
->path
, idx
);
648 /* Look through PATH beginning at START and return TRUE if there are
649 any additional blocks that need to be duplicated. Otherwise,
652 any_remaining_duplicated_blocks (vec
<jump_thread_edge
*> *path
,
655 for (unsigned int i
= start
+ 1; i
< path
->length (); i
++)
657 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
658 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
665 /* Compute the amount of profile count coming into the jump threading
666 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 identify blocks duplicated for jump threading, which have duplicated
670 edges that need to be ignored in the analysis. Return true if path contains
671 a joiner, false otherwise.
673 In the non-joiner case, this is straightforward - all the counts
674 flowing into the jump threading path should flow through the duplicated
675 block and out of the duplicated path.
677 In the joiner case, it is very tricky. Some of the counts flowing into
678 the original path go offpath at the joiner. The problem is that while
679 we know how much total count goes off-path in the original control flow,
680 we don't know how many of the counts corresponding to just the jump
681 threading path go offpath at the joiner.
683 For example, assume we have the following control flow and identified
684 jump threading paths:
703 Jump threading paths: A -> J -> Son -> D (path 1)
704 C -> J -> Son -> E (path 2)
706 Note that the control flow could be more complicated:
707 - Each jump threading path may have more than one incoming edge. I.e. A and
708 Ea could represent multiple incoming blocks/edges that are included in
710 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 before or after the "normal" copy block). These are not duplicated onto
712 the jump threading path, as they are single-successor.
713 - Any of the blocks along the path may have other incoming edges that
714 are not part of any jump threading path, but add profile counts along
717 In the above example, after all jump threading is complete, we will
718 end up with the following control flow:
727 Eona/ \ ---/---\-------- \Eonc
732 \___________ / \ _____/
737 The main issue to notice here is that when we are processing path 1
738 (A->J->Son->D) we need to figure out the outgoing edge weights to
739 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 sum of the incoming weights to D remain Ed. The problem with simply
741 assuming that Ja (and Jc when processing path 2) has the same outgoing
742 probabilities to its successors as the original block J, is that after
743 all paths are processed and other edges/counts removed (e.g. none
744 of Ec will reach D after processing path 2), we may end up with not
745 enough count flowing along duplicated edge Sona->D.
747 Therefore, in the case of a joiner, we keep track of all counts
748 coming in along the current path, as well as from predecessors not
749 on any jump threading path (Eb in the above example). While we
750 first assume that the duplicated Eona for Ja->Sona has the same
751 probability as the original, we later compensate for other jump
752 threading paths that may eliminate edges. We do that by keep track
753 of all counts coming into the original path that are not in a jump
754 thread (Eb in the above example, but as noted earlier, there could
755 be other predecessors incoming to the path at various points, such
756 as at Son). Call this cumulative non-path count coming into the path
757 before D as Enonpath. We then ensure that the count from Sona->D is as at
758 least as big as (Ed - Enonpath), but no bigger than the minimum
759 weight along the jump threading path. The probabilities of both the
760 original and duplicated joiner block J and Ja will be adjusted
761 accordingly after the updates. */
764 compute_path_counts (struct redirection_data
*rd
,
765 ssa_local_info_t
*local_info
,
766 profile_count
*path_in_count_ptr
,
767 profile_count
*path_out_count_ptr
)
769 edge e
= rd
->incoming_edges
->e
;
770 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
771 edge elast
= path
->last ()->e
;
772 profile_count nonpath_count
= profile_count::zero ();
773 bool has_joiner
= false;
774 profile_count path_in_count
= profile_count::zero ();
776 /* Start by accumulating incoming edge counts to the path's first bb
777 into a couple buckets:
778 path_in_count: total count of incoming edges that flow into the
780 nonpath_count: total count of incoming edges that are not
781 flowing along *any* path. These are the counts
782 that will still flow along the original path after
783 all path duplication is done by potentially multiple
784 calls to this routine.
785 (any other incoming edge counts are for a different jump threading
786 path that will be handled by a later call to this routine.)
787 To make this easier, start by recording all incoming edges that flow into
788 the current path in a bitmap. We could add up the path's incoming edge
789 counts here, but we still need to walk all the first bb's incoming edges
790 below to add up the counts of the other edges not included in this jump
792 struct el
*next
, *el
;
793 auto_bitmap in_edge_srcs
;
794 for (el
= rd
->incoming_edges
; el
; el
= next
)
797 bitmap_set_bit (in_edge_srcs
, el
->e
->src
->index
);
801 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
803 vec
<jump_thread_edge
*> *ein_path
= THREAD_PATH (ein
);
804 /* Simply check the incoming edge src against the set captured above. */
806 && bitmap_bit_p (in_edge_srcs
, (*ein_path
)[0]->e
->src
->index
))
808 /* It is necessary but not sufficient that the last path edges
809 are identical. There may be different paths that share the
810 same last path edge in the case where the last edge has a nocopy
812 gcc_assert (ein_path
->last ()->e
== elast
);
813 path_in_count
+= ein
->count ();
817 /* Keep track of the incoming edges that are not on any jump-threading
818 path. These counts will still flow out of original path after all
819 jump threading is complete. */
820 nonpath_count
+= ein
->count ();
824 /* Now compute the fraction of the total count coming into the first
825 path bb that is from the current threading path. */
826 profile_count total_count
= e
->dest
->count
;
827 /* Handle incoming profile insanities. */
828 if (total_count
< path_in_count
)
829 path_in_count
= total_count
;
830 profile_probability onpath_scale
= path_in_count
.probability_in (total_count
);
832 /* Walk the entire path to do some more computation in order to estimate
833 how much of the path_in_count will flow out of the duplicated threading
834 path. In the non-joiner case this is straightforward (it should be
835 the same as path_in_count, although we will handle incoming profile
836 insanities by setting it equal to the minimum count along the path).
838 In the joiner case, we need to estimate how much of the path_in_count
839 will stay on the threading path after the joiner's conditional branch.
840 We don't really know for sure how much of the counts
841 associated with this path go to each successor of the joiner, but we'll
842 estimate based on the fraction of the total count coming into the path
843 bb was from the threading paths (computed above in onpath_scale).
844 Afterwards, we will need to do some fixup to account for other threading
845 paths and possible profile insanities.
847 In order to estimate the joiner case's counts we also need to update
848 nonpath_count with any additional counts coming into the path. Other
849 blocks along the path may have additional predecessors from outside
851 profile_count path_out_count
= path_in_count
;
852 profile_count min_path_count
= path_in_count
;
853 for (unsigned int i
= 1; i
< path
->length (); i
++)
855 edge epath
= (*path
)[i
]->e
;
856 profile_count cur_count
= epath
->count ();
857 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
860 cur_count
= cur_count
.apply_probability (onpath_scale
);
862 /* In the joiner case we need to update nonpath_count for any edges
863 coming into the path that will contribute to the count flowing
864 into the path successor. */
865 if (has_joiner
&& epath
!= elast
)
867 /* Look for other incoming edges after joiner. */
868 FOR_EACH_EDGE (ein
, ei
, epath
->dest
->preds
)
871 /* Ignore in edges from blocks we have duplicated for a
872 threading path, which have duplicated edge counts until
873 they are redirected by an invocation of this routine. */
874 && !bitmap_bit_p (local_info
->duplicate_blocks
,
876 nonpath_count
+= ein
->count ();
879 if (cur_count
< path_out_count
)
880 path_out_count
= cur_count
;
881 if (epath
->count () < min_path_count
)
882 min_path_count
= epath
->count ();
885 /* We computed path_out_count above assuming that this path targeted
886 the joiner's on-path successor with the same likelihood as it
887 reached the joiner. However, other thread paths through the joiner
888 may take a different path through the normal copy source block
889 (i.e. they have a different elast), meaning that they do not
890 contribute any counts to this path's elast. As a result, it may
891 turn out that this path must have more count flowing to the on-path
892 successor of the joiner. Essentially, all of this path's elast
893 count must be contributed by this path and any nonpath counts
894 (since any path through the joiner with a different elast will not
895 include a copy of this elast in its duplicated path).
896 So ensure that this path's path_out_count is at least the
897 difference between elast->count () and nonpath_count. Otherwise the edge
898 counts after threading will not be sane. */
899 if (local_info
->need_profile_correction
900 && has_joiner
&& path_out_count
< elast
->count () - nonpath_count
)
902 path_out_count
= elast
->count () - nonpath_count
;
903 /* But neither can we go above the minimum count along the path
904 we are duplicating. This can be an issue due to profile
905 insanities coming in to this pass. */
906 if (path_out_count
> min_path_count
)
907 path_out_count
= min_path_count
;
910 *path_in_count_ptr
= path_in_count
;
911 *path_out_count_ptr
= path_out_count
;
916 /* Update the counts and frequencies for both an original path
917 edge EPATH and its duplicate EDUP. The duplicate source block
918 will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
921 update_profile (edge epath
, edge edup
, profile_count path_in_count
,
922 profile_count path_out_count
)
925 /* First update the duplicated block's count. */
928 basic_block dup_block
= edup
->src
;
930 /* Edup's count is reduced by path_out_count. We need to redistribute
931 probabilities to the remaining edges. */
935 profile_probability edup_prob
936 = path_out_count
.probability_in (path_in_count
);
938 /* Either scale up or down the remaining edges.
939 probabilities are always in range <0,1> and thus we can't do
940 both by same loop. */
941 if (edup
->probability
> edup_prob
)
943 profile_probability rev_scale
944 = (profile_probability::always () - edup
->probability
)
945 / (profile_probability::always () - edup_prob
);
946 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
948 esucc
->probability
/= rev_scale
;
950 else if (edup
->probability
< edup_prob
)
952 profile_probability scale
953 = (profile_probability::always () - edup_prob
)
954 / (profile_probability::always () - edup
->probability
);
955 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
957 esucc
->probability
*= scale
;
959 if (edup_prob
.initialized_p ())
960 edup
->probability
= edup_prob
;
962 gcc_assert (!dup_block
->count
.initialized_p ());
963 dup_block
->count
= path_in_count
;
966 if (path_in_count
== profile_count::zero ())
969 profile_count final_count
= epath
->count () - path_out_count
;
971 /* Now update the original block's count in the
972 opposite manner - remove the counts/freq that will flow
973 into the duplicated block. Handle underflow due to precision/
975 epath
->src
->count
-= path_in_count
;
977 /* Next update this path edge's original and duplicated counts. We know
978 that the duplicated path will have path_out_count flowing
979 out of it (in the joiner case this is the count along the duplicated path
980 out of the duplicated joiner). This count can then be removed from the
981 original path edge. */
985 profile_probability epath_prob
= final_count
.probability_in (epath
->src
->count
);
987 if (epath
->probability
> epath_prob
)
989 profile_probability rev_scale
990 = (profile_probability::always () - epath
->probability
)
991 / (profile_probability::always () - epath_prob
);
992 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
994 esucc
->probability
/= rev_scale
;
996 else if (epath
->probability
< epath_prob
)
998 profile_probability scale
999 = (profile_probability::always () - epath_prob
)
1000 / (profile_probability::always () - epath
->probability
);
1001 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
1003 esucc
->probability
*= scale
;
1005 if (epath_prob
.initialized_p ())
1006 epath
->probability
= epath_prob
;
1009 /* Wire up the outgoing edges from the duplicate blocks and
1010 update any PHIs as needed. Also update the profile counts
1011 on the original and duplicate blocks and edges. */
1013 ssa_fix_duplicate_block_edges (struct redirection_data
*rd
,
1014 ssa_local_info_t
*local_info
)
1016 bool multi_incomings
= (rd
->incoming_edges
->next
!= NULL
);
1017 edge e
= rd
->incoming_edges
->e
;
1018 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1019 edge elast
= path
->last ()->e
;
1020 profile_count path_in_count
= profile_count::zero ();
1021 profile_count path_out_count
= profile_count::zero ();
1023 /* First determine how much profile count to move from original
1024 path to the duplicate path. This is tricky in the presence of
1025 a joiner (see comments for compute_path_counts), where some portion
1026 of the path's counts will flow off-path from the joiner. In the
1027 non-joiner case the path_in_count and path_out_count should be the
1029 bool has_joiner
= compute_path_counts (rd
, local_info
,
1030 &path_in_count
, &path_out_count
);
1032 for (unsigned int count
= 0, i
= 1; i
< path
->length (); i
++)
1034 edge epath
= (*path
)[i
]->e
;
1036 /* If we were threading through an joiner block, then we want
1037 to keep its control statement and redirect an outgoing edge.
1038 Else we want to remove the control statement & edges, then create
1039 a new outgoing edge. In both cases we may need to update PHIs. */
1040 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1045 gcc_assert (has_joiner
);
1047 /* This updates the PHIs at the destination of the duplicate
1048 block. Pass 0 instead of i if we are threading a path which
1049 has multiple incoming edges. */
1050 update_destination_phis (local_info
->bb
, rd
->dup_blocks
[count
],
1051 path
, multi_incomings
? 0 : i
);
1053 /* Find the edge from the duplicate block to the block we're
1054 threading through. That's the edge we want to redirect. */
1055 victim
= find_edge (rd
->dup_blocks
[count
], (*path
)[i
]->e
->dest
);
1057 /* If there are no remaining blocks on the path to duplicate,
1058 then redirect VICTIM to the final destination of the jump
1060 if (!any_remaining_duplicated_blocks (path
, i
))
1062 e2
= redirect_edge_and_branch (victim
, elast
->dest
);
1063 /* If we redirected the edge, then we need to copy PHI arguments
1064 at the target. If the edge already existed (e2 != victim
1065 case), then the PHIs in the target already have the correct
1068 copy_phi_args (e2
->dest
, elast
, e2
,
1069 path
, multi_incomings
? 0 : i
);
1073 /* Redirect VICTIM to the next duplicated block in the path. */
1074 e2
= redirect_edge_and_branch (victim
, rd
->dup_blocks
[count
+ 1]);
1076 /* We need to update the PHIs in the next duplicated block. We
1077 want the new PHI args to have the same value as they had
1078 in the source of the next duplicate block.
1080 Thus, we need to know which edge we traversed into the
1081 source of the duplicate. Furthermore, we may have
1082 traversed many edges to reach the source of the duplicate.
1084 Walk through the path starting at element I until we
1085 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1086 the edge from the prior element. */
1087 for (unsigned int j
= i
+ 1; j
< path
->length (); j
++)
1089 if ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
)
1091 copy_phi_arg_into_existing_phi ((*path
)[j
- 1]->e
, e2
);
1097 /* Update the counts of both the original block
1098 and path edge, and the duplicates. The path duplicate's
1099 incoming count are the totals for all edges
1100 incoming to this jump threading path computed earlier.
1101 And we know that the duplicated path will have path_out_count
1102 flowing out of it (i.e. along the duplicated path out of the
1103 duplicated joiner). */
1104 update_profile (epath
, e2
, path_in_count
, path_out_count
);
1106 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1108 remove_ctrl_stmt_and_useless_edges (rd
->dup_blocks
[count
], NULL
);
1109 create_edge_and_update_destination_phis (rd
, rd
->dup_blocks
[count
],
1110 multi_incomings
? 0 : i
);
1112 single_succ_edge (rd
->dup_blocks
[1])->aux
= NULL
;
1114 /* Update the counts of both the original block
1115 and path edge, and the duplicates. Since we are now after
1116 any joiner that may have existed on the path, the count
1117 flowing along the duplicated threaded path is path_out_count.
1118 If we didn't have a joiner, then cur_path_freq was the sum
1119 of the total frequencies along all incoming edges to the
1120 thread path (path_in_freq). If we had a joiner, it would have
1121 been updated at the end of that handling to the edge frequency
1122 along the duplicated joiner path edge. */
1123 update_profile (epath
, EDGE_SUCC (rd
->dup_blocks
[count
], 0),
1124 path_out_count
, path_out_count
);
1128 /* No copy case. In this case we don't have an equivalent block
1129 on the duplicated thread path to update, but we do need
1130 to remove the portion of the counts/freqs that were moved
1131 to the duplicated path from the counts/freqs flowing through
1132 this block on the original path. Since all the no-copy edges
1133 are after any joiner, the removed count is the same as
1136 If we didn't have a joiner, then cur_path_freq was the sum
1137 of the total frequencies along all incoming edges to the
1138 thread path (path_in_freq). If we had a joiner, it would have
1139 been updated at the end of that handling to the edge frequency
1140 along the duplicated joiner path edge. */
1141 update_profile (epath
, NULL
, path_out_count
, path_out_count
);
1144 /* Increment the index into the duplicated path when we processed
1145 a duplicated block. */
1146 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
1147 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1154 /* Hash table traversal callback routine to create duplicate blocks. */
1157 ssa_create_duplicates (struct redirection_data
**slot
,
1158 ssa_local_info_t
*local_info
)
1160 struct redirection_data
*rd
= *slot
;
1162 /* The second duplicated block in a jump threading path is specific
1163 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1165 Each time we're called, we have to look through the path and see
1166 if a second block needs to be duplicated.
1168 Note the search starts with the third edge on the path. The first
1169 edge is the incoming edge, the second edge always has its source
1170 duplicated. Thus we start our search with the third edge. */
1171 vec
<jump_thread_edge
*> *path
= rd
->path
;
1172 for (unsigned int i
= 2; i
< path
->length (); i
++)
1174 if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1175 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1177 create_block_for_threading ((*path
)[i
]->e
->src
, rd
, 1,
1178 &local_info
->duplicate_blocks
);
1183 /* Create a template block if we have not done so already. Otherwise
1184 use the template to create a new block. */
1185 if (local_info
->template_block
== NULL
)
1187 create_block_for_threading ((*path
)[1]->e
->src
, rd
, 0,
1188 &local_info
->duplicate_blocks
);
1189 local_info
->template_block
= rd
->dup_blocks
[0];
1190 local_info
->template_last_to_copy
1191 = gsi_last_bb (local_info
->template_block
);
1193 /* We do not create any outgoing edges for the template. We will
1194 take care of that in a later traversal. That way we do not
1195 create edges that are going to just be deleted. */
1199 gimple_seq seq
= NULL
;
1200 if (gsi_stmt (local_info
->template_last_to_copy
)
1201 != gsi_stmt (gsi_last_bb (local_info
->template_block
)))
1203 if (gsi_end_p (local_info
->template_last_to_copy
))
1205 seq
= bb_seq (local_info
->template_block
);
1206 set_bb_seq (local_info
->template_block
, NULL
);
1209 seq
= gsi_split_seq_after (local_info
->template_last_to_copy
);
1211 create_block_for_threading (local_info
->template_block
, rd
, 0,
1212 &local_info
->duplicate_blocks
);
1215 if (gsi_end_p (local_info
->template_last_to_copy
))
1216 set_bb_seq (local_info
->template_block
, seq
);
1218 gsi_insert_seq_after (&local_info
->template_last_to_copy
,
1219 seq
, GSI_SAME_STMT
);
1222 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1224 ssa_fix_duplicate_block_edges (rd
, local_info
);
1227 if (MAY_HAVE_DEBUG_STMTS
)
1229 /* Copy debug stmts from each NO_COPY src block to the block
1230 that would have been its predecessor, if we can append to it
1231 (we can't add stmts after a block-ending stmt), or prepending
1232 to the duplicate of the successor, if there is one. If
1233 there's no duplicate successor, we'll mostly drop the blocks
1234 on the floor; propagate_threaded_block_debug_into, called
1235 elsewhere, will consolidate and preserve the effects of the
1236 binds, but none of the markers. */
1237 gimple_stmt_iterator copy_to
= gsi_last_bb (rd
->dup_blocks
[0]);
1238 if (!gsi_end_p (copy_to
))
1240 if (stmt_ends_bb_p (gsi_stmt (copy_to
)))
1242 if (rd
->dup_blocks
[1])
1243 copy_to
= gsi_after_labels (rd
->dup_blocks
[1]);
1245 copy_to
= gsi_none ();
1248 gsi_next (©_to
);
1250 for (unsigned int i
= 2, j
= 0; i
< path
->length (); i
++)
1251 if ((*path
)[i
]->type
== EDGE_NO_COPY_SRC_BLOCK
1252 && gsi_bb (copy_to
))
1254 for (gimple_stmt_iterator gsi
= gsi_start_bb ((*path
)[i
]->e
->src
);
1255 !gsi_end_p (gsi
); gsi_next (&gsi
))
1257 if (!is_gimple_debug (gsi_stmt (gsi
)))
1259 gimple
*stmt
= gsi_stmt (gsi
);
1260 gimple
*copy
= gimple_copy (stmt
);
1261 gsi_insert_before (©_to
, copy
, GSI_SAME_STMT
);
1264 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1265 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1269 copy_to
= gsi_last_bb (rd
->dup_blocks
[j
]);
1270 if (!gsi_end_p (copy_to
))
1272 if (stmt_ends_bb_p (gsi_stmt (copy_to
)))
1273 copy_to
= gsi_none ();
1275 gsi_next (©_to
);
1280 /* Keep walking the hash table. */
1284 /* We did not create any outgoing edges for the template block during
1285 block creation. This hash table traversal callback creates the
1286 outgoing edge for the template block. */
1289 ssa_fixup_template_block (struct redirection_data
**slot
,
1290 ssa_local_info_t
*local_info
)
1292 struct redirection_data
*rd
= *slot
;
1294 /* If this is the template block halt the traversal after updating
1297 If we were threading through an joiner block, then we want
1298 to keep its control statement and redirect an outgoing edge.
1299 Else we want to remove the control statement & edges, then create
1300 a new outgoing edge. In both cases we may need to update PHIs. */
1301 if (rd
->dup_blocks
[0] && rd
->dup_blocks
[0] == local_info
->template_block
)
1303 ssa_fix_duplicate_block_edges (rd
, local_info
);
1310 /* Hash table traversal callback to redirect each incoming edge
1311 associated with this hash table element to its new destination. */
1314 ssa_redirect_edges (struct redirection_data
**slot
,
1315 ssa_local_info_t
*local_info
)
1317 struct redirection_data
*rd
= *slot
;
1318 struct el
*next
, *el
;
1320 /* Walk over all the incoming edges associated with this hash table
1322 for (el
= rd
->incoming_edges
; el
; el
= next
)
1325 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1327 /* Go ahead and free this element from the list. Doing this now
1328 avoids the need for another list walk when we destroy the hash
1333 local_info
->num_threaded_edges
++;
1335 if (rd
->dup_blocks
[0])
1339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1340 fprintf (dump_file
, " Threaded jump %d --> %d to %d\n",
1341 e
->src
->index
, e
->dest
->index
, rd
->dup_blocks
[0]->index
);
1343 /* Redirect the incoming edge (possibly to the joiner block) to the
1344 appropriate duplicate block. */
1345 e2
= redirect_edge_and_branch (e
, rd
->dup_blocks
[0]);
1346 gcc_assert (e
== e2
);
1347 flush_pending_stmts (e2
);
1350 /* Go ahead and clear E->aux. It's not needed anymore and failure
1351 to clear it will cause all kinds of unpleasant problems later. */
1357 /* Indicate that we actually threaded one or more jumps. */
1358 if (rd
->incoming_edges
)
1359 local_info
->jumps_threaded
= true;
1364 /* Return true if this block has no executable statements other than
1365 a simple ctrl flow instruction. When the number of outgoing edges
1366 is one, this is equivalent to a "forwarder" block. */
1369 redirection_block_p (basic_block bb
)
1371 gimple_stmt_iterator gsi
;
1373 /* Advance to the first executable statement. */
1374 gsi
= gsi_start_bb (bb
);
1375 while (!gsi_end_p (gsi
)
1376 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_LABEL
1377 || is_gimple_debug (gsi_stmt (gsi
))
1378 || gimple_nop_p (gsi_stmt (gsi
))
1379 || gimple_clobber_p (gsi_stmt (gsi
))))
1382 /* Check if this is an empty block. */
1383 if (gsi_end_p (gsi
))
1386 /* Test that we've reached the terminating control statement. */
1387 return gsi_stmt (gsi
)
1388 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
1389 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
1390 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
);
1393 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1394 is reached via one or more specific incoming edges, we know which
1395 outgoing edge from BB will be traversed.
1397 We want to redirect those incoming edges to the target of the
1398 appropriate outgoing edge. Doing so avoids a conditional branch
1399 and may expose new optimization opportunities. Note that we have
1400 to update dominator tree and SSA graph after such changes.
1402 The key to keeping the SSA graph update manageable is to duplicate
1403 the side effects occurring in BB so that those side effects still
1404 occur on the paths which bypass BB after redirecting edges.
1406 We accomplish this by creating duplicates of BB and arranging for
1407 the duplicates to unconditionally pass control to one specific
1408 successor of BB. We then revector the incoming edges into BB to
1409 the appropriate duplicate of BB.
1411 If NOLOOP_ONLY is true, we only perform the threading as long as it
1412 does not affect the structure of the loops in a nontrivial way.
1414 If JOINERS is true, then thread through joiner blocks as well. */
1417 fwd_jt_path_registry::thread_block_1 (basic_block bb
,
1421 /* E is an incoming edge into BB that we may or may not want to
1422 redirect to a duplicate of BB. */
1425 ssa_local_info_t local_info
;
1427 local_info
.duplicate_blocks
= BITMAP_ALLOC (NULL
);
1428 local_info
.need_profile_correction
= false;
1429 local_info
.num_threaded_edges
= 0;
1431 /* To avoid scanning a linear array for the element we need we instead
1432 use a hash table. For normal code there should be no noticeable
1433 difference. However, if we have a block with a large number of
1434 incoming and outgoing edges such linear searches can get expensive. */
1436 = new hash_table
<struct redirection_data
> (EDGE_COUNT (bb
->succs
));
1438 /* Record each unique threaded destination into a hash table for
1439 efficient lookups. */
1441 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1446 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1448 if (((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& !joiners
)
1449 || ((*path
)[1]->type
== EDGE_COPY_SRC_BLOCK
&& joiners
))
1452 e2
= path
->last ()->e
;
1453 if (!e2
|| noloop_only
)
1455 /* If NOLOOP_ONLY is true, we only allow threading through the
1456 header of a loop to exit edges. */
1458 /* One case occurs when there was loop header buried in a jump
1459 threading path that crosses loop boundaries. We do not try
1460 and thread this elsewhere, so just cancel the jump threading
1461 request by clearing the AUX field now. */
1462 if (bb
->loop_father
!= e2
->src
->loop_father
1463 && (!loop_exit_edge_p (e2
->src
->loop_father
, e2
)
1464 || flow_loop_nested_p (bb
->loop_father
,
1465 e2
->dest
->loop_father
)))
1467 /* Since this case is not handled by our special code
1468 to thread through a loop header, we must explicitly
1469 cancel the threading request here. */
1470 cancel_thread (path
, "Threading through unhandled loop header");
1475 /* Another case occurs when trying to thread through our
1476 own loop header, possibly from inside the loop. We will
1477 thread these later. */
1479 for (i
= 1; i
< path
->length (); i
++)
1481 if ((*path
)[i
]->e
->src
== bb
->loop_father
->header
1482 && (!loop_exit_edge_p (bb
->loop_father
, e2
)
1483 || (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
))
1487 if (i
!= path
->length ())
1490 /* Loop parallelization can be confused by the result of
1491 threading through the loop exit test back into the loop.
1492 However, theading those jumps seems to help other codes.
1494 I have been unable to find anything related to the shape of
1495 the CFG, the contents of the affected blocks, etc which would
1496 allow a more sensible test than what we're using below which
1497 merely avoids the optimization when parallelizing loops. */
1498 if (flag_tree_parallelize_loops
> 1)
1500 for (i
= 1; i
< path
->length (); i
++)
1501 if (bb
->loop_father
== e2
->src
->loop_father
1502 && loop_exits_from_bb_p (bb
->loop_father
,
1504 && !loop_exit_edge_p (bb
->loop_father
, e2
))
1507 if (i
!= path
->length ())
1509 cancel_thread (path
, "Threading through loop exit");
1516 /* Insert the outgoing edge into the hash table if it is not
1517 already in the hash table. */
1518 lookup_redirection_data (e
, INSERT
);
1520 /* When we have thread paths through a common joiner with different
1521 final destinations, then we may need corrections to deal with
1522 profile insanities. See the big comment before compute_path_counts. */
1523 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1527 else if (e2
!= last
)
1528 local_info
.need_profile_correction
= true;
1532 /* We do not update dominance info. */
1533 free_dominance_info (CDI_DOMINATORS
);
1535 /* We know we only thread through the loop header to loop exits.
1536 Let the basic block duplication hook know we are not creating
1537 a multiple entry loop. */
1539 && bb
== bb
->loop_father
->header
)
1540 set_loop_copy (bb
->loop_father
, loop_outer (bb
->loop_father
));
1542 /* Now create duplicates of BB.
1544 Note that for a block with a high outgoing degree we can waste
1545 a lot of time and memory creating and destroying useless edges.
1547 So we first duplicate BB and remove the control structure at the
1548 tail of the duplicate as well as all outgoing edges from the
1549 duplicate. We then use that duplicate block as a template for
1550 the rest of the duplicates. */
1551 local_info
.template_block
= NULL
;
1553 local_info
.jumps_threaded
= false;
1554 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_create_duplicates
>
1557 /* The template does not have an outgoing edge. Create that outgoing
1558 edge and update PHI nodes as the edge's target as necessary.
1560 We do this after creating all the duplicates to avoid creating
1561 unnecessary edges. */
1562 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_fixup_template_block
>
1565 /* The hash table traversals above created the duplicate blocks (and the
1566 statements within the duplicate blocks). This loop creates PHI nodes for
1567 the duplicated blocks and redirects the incoming edges into BB to reach
1568 the duplicates of BB. */
1569 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_redirect_edges
>
1572 /* Done with this block. Clear REDIRECTION_DATA. */
1573 delete m_redirection_data
;
1574 m_redirection_data
= NULL
;
1577 && bb
== bb
->loop_father
->header
)
1578 set_loop_copy (bb
->loop_father
, NULL
);
1580 BITMAP_FREE (local_info
.duplicate_blocks
);
1581 local_info
.duplicate_blocks
= NULL
;
1583 m_num_threaded_edges
+= local_info
.num_threaded_edges
;
1585 /* Indicate to our caller whether or not any jumps were threaded. */
1586 return local_info
.jumps_threaded
;
1589 /* Wrapper for thread_block_1 so that we can first handle jump
1590 thread paths which do not involve copying joiner blocks, then
1591 handle jump thread paths which have joiner blocks.
1593 By doing things this way we can be as aggressive as possible and
1594 not worry that copying a joiner block will create a jump threading
1598 fwd_jt_path_registry::thread_block (basic_block bb
, bool noloop_only
)
1601 retval
= thread_block_1 (bb
, noloop_only
, false);
1602 retval
|= thread_block_1 (bb
, noloop_only
, true);
1606 /* Callback for dfs_enumerate_from. Returns true if BB is different
1607 from STOP and DBDS_CE_STOP. */
1609 static basic_block dbds_ce_stop
;
1611 dbds_continue_enumeration_p (const_basic_block bb
, const void *stop
)
1613 return (bb
!= (const_basic_block
) stop
1614 && bb
!= dbds_ce_stop
);
1617 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1618 returns the state. */
1621 determine_bb_domination_status (class loop
*loop
, basic_block bb
)
1623 basic_block
*bblocks
;
1624 unsigned nblocks
, i
;
1625 bool bb_reachable
= false;
1629 /* This function assumes BB is a successor of LOOP->header.
1630 If that is not the case return DOMST_NONDOMINATING which
1635 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1637 if (e
->src
== loop
->header
)
1645 return DOMST_NONDOMINATING
;
1648 if (bb
== loop
->latch
)
1649 return DOMST_DOMINATING
;
1651 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1654 bblocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1655 dbds_ce_stop
= loop
->header
;
1656 nblocks
= dfs_enumerate_from (loop
->latch
, 1, dbds_continue_enumeration_p
,
1657 bblocks
, loop
->num_nodes
, bb
);
1658 for (i
= 0; i
< nblocks
; i
++)
1659 FOR_EACH_EDGE (e
, ei
, bblocks
[i
]->preds
)
1661 if (e
->src
== loop
->header
)
1664 return DOMST_NONDOMINATING
;
1667 bb_reachable
= true;
1671 return (bb_reachable
? DOMST_DOMINATING
: DOMST_LOOP_BROKEN
);
1674 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1675 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1676 to the inside of the loop. */
1679 fwd_jt_path_registry::thread_through_loop_header (class loop
*loop
,
1680 bool may_peel_loop_headers
)
1682 basic_block header
= loop
->header
;
1683 edge e
, tgt_edge
, latch
= loop_latch_edge (loop
);
1685 basic_block tgt_bb
, atgt_bb
;
1686 enum bb_dom_status domst
;
1688 /* We have already threaded through headers to exits, so all the threading
1689 requests now are to the inside of the loop. We need to avoid creating
1690 irreducible regions (i.e., loops with more than one entry block), and
1691 also loop with several latch edges, or new subloops of the loop (although
1692 there are cases where it might be appropriate, it is difficult to decide,
1693 and doing it wrongly may confuse other optimizers).
1695 We could handle more general cases here. However, the intention is to
1696 preserve some information about the loop, which is impossible if its
1697 structure changes significantly, in a way that is not well understood.
1698 Thus we only handle few important special cases, in which also updating
1699 of the loop-carried information should be feasible:
1701 1) Propagation of latch edge to a block that dominates the latch block
1702 of a loop. This aims to handle the following idiom:
1713 After threading the latch edge, this becomes
1724 The original header of the loop is moved out of it, and we may thread
1725 the remaining edges through it without further constraints.
1727 2) All entry edges are propagated to a single basic block that dominates
1728 the latch block of the loop. This aims to handle the following idiom
1729 (normally created for "for" loops):
1752 /* Threading through the header won't improve the code if the header has just
1754 if (single_succ_p (header
))
1757 if (!may_peel_loop_headers
&& !redirection_block_p (loop
->header
))
1763 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1770 /* If latch is not threaded, and there is a header
1771 edge that is not threaded, we would create loop
1772 with multiple entries. */
1776 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1778 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1780 tgt_edge
= (*path
)[1]->e
;
1781 atgt_bb
= tgt_edge
->dest
;
1784 /* Two targets of threading would make us create loop
1785 with multiple entries. */
1786 else if (tgt_bb
!= atgt_bb
)
1792 /* There are no threading requests. */
1796 /* Redirecting to empty loop latch is useless. */
1797 if (tgt_bb
== loop
->latch
1798 && empty_block_p (loop
->latch
))
1802 /* The target block must dominate the loop latch, otherwise we would be
1803 creating a subloop. */
1804 domst
= determine_bb_domination_status (loop
, tgt_bb
);
1805 if (domst
== DOMST_NONDOMINATING
)
1807 if (domst
== DOMST_LOOP_BROKEN
)
1809 /* If the loop ceased to exist, mark it as such, and thread through its
1811 mark_loop_for_removal (loop
);
1812 return thread_block (header
, false);
1815 if (tgt_bb
->loop_father
->header
== tgt_bb
)
1817 /* If the target of the threading is a header of a subloop, we need
1818 to create a preheader for it, so that the headers of the two loops
1820 if (EDGE_COUNT (tgt_bb
->preds
) > 2)
1822 tgt_bb
= create_preheader (tgt_bb
->loop_father
, 0);
1823 gcc_assert (tgt_bb
!= NULL
);
1826 tgt_bb
= split_edge (tgt_edge
);
1829 basic_block new_preheader
;
1831 /* Now consider the case entry edges are redirected to the new entry
1832 block. Remember one entry edge, so that we can find the new
1833 preheader (its destination after threading). */
1834 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1840 /* The duplicate of the header is the new preheader of the loop. Ensure
1841 that it is placed correctly in the loop hierarchy. */
1842 set_loop_copy (loop
, loop_outer (loop
));
1844 thread_block (header
, false);
1845 set_loop_copy (loop
, NULL
);
1846 new_preheader
= e
->dest
;
1848 /* Create the new latch block. This is always necessary, as the latch
1849 must have only a single successor, but the original header had at
1850 least two successors. */
1852 mfb_kj_edge
= single_succ_edge (new_preheader
);
1853 loop
->header
= mfb_kj_edge
->dest
;
1854 latch
= make_forwarder_block (tgt_bb
, mfb_keep_just
, NULL
);
1855 loop
->header
= latch
->dest
;
1856 loop
->latch
= latch
->src
;
1860 /* We failed to thread anything. Cancel the requests. */
1861 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1863 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1867 cancel_thread (path
, "Failure in thread_through_loop_header");
1874 /* E1 and E2 are edges into the same basic block. Return TRUE if the
1875 PHI arguments associated with those edges are equal or there are no
1876 PHI arguments, otherwise return FALSE. */
1879 phi_args_equal_on_edges (edge e1
, edge e2
)
1882 int indx1
= e1
->dest_idx
;
1883 int indx2
= e2
->dest_idx
;
1885 for (gsi
= gsi_start_phis (e1
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1887 gphi
*phi
= gsi
.phi ();
1889 if (!operand_equal_p (gimple_phi_arg_def (phi
, indx1
),
1890 gimple_phi_arg_def (phi
, indx2
), 0))
1896 /* Return the number of non-debug statements and non-virtual PHIs in a
1900 count_stmts_and_phis_in_block (basic_block bb
)
1902 unsigned int num_stmts
= 0;
1905 for (gpi
= gsi_start_phis (bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
1906 if (!virtual_operand_p (PHI_RESULT (gpi
.phi ())))
1909 gimple_stmt_iterator gsi
;
1910 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1912 gimple
*stmt
= gsi_stmt (gsi
);
1913 if (!is_gimple_debug (stmt
))
1921 /* Walk through the registered jump threads and convert them into a
1922 form convenient for this pass.
1924 Any block which has incoming edges threaded to outgoing edges
1925 will have its entry in THREADED_BLOCK set.
1927 Any threaded edge will have its new outgoing edge stored in the
1928 original edge's AUX field.
1930 This form avoids the need to walk all the edges in the CFG to
1931 discover blocks which need processing and avoids unnecessary
1932 hash table lookups to map from threaded edge to new target. */
1935 fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks
)
1944 /* It is possible to have jump threads in which one is a subpath
1945 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1946 block and (B, C), (C, D) where no joiner block exists.
1948 When this occurs ignore the jump thread request with the joiner
1949 block. It's totally subsumed by the simpler jump thread request.
1951 This results in less block copying, simpler CFGs. More importantly,
1952 when we duplicate the joiner block, B, in this case we will create
1953 a new threading opportunity that we wouldn't be able to optimize
1954 until the next jump threading iteration.
1956 So first convert the jump thread requests which do not require a
1958 for (i
= 0; i
< m_paths
.length (); i
++)
1960 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
1962 if (path
->length () > 1
1963 && (*path
)[1]->type
!= EDGE_COPY_SRC_JOINER_BLOCK
)
1965 edge e
= (*path
)[0]->e
;
1966 e
->aux
= (void *)path
;
1967 bitmap_set_bit (tmp
, e
->dest
->index
);
1971 /* Now iterate again, converting cases where we want to thread
1972 through a joiner block, but only if no other edge on the path
1973 already has a jump thread attached to it. We do this in two passes,
1974 to avoid situations where the order in the paths vec can hide overlapping
1975 threads (the path is recorded on the incoming edge, so we would miss
1976 cases where the second path starts at a downstream edge on the same
1977 path). First record all joiner paths, deleting any in the unexpected
1978 case where there is already a path for that incoming edge. */
1979 for (i
= 0; i
< m_paths
.length ();)
1981 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
1983 if (path
->length () > 1
1984 && (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1986 /* Attach the path to the starting edge if none is yet recorded. */
1987 if ((*path
)[0]->e
->aux
== NULL
)
1989 (*path
)[0]->e
->aux
= path
;
1994 m_paths
.unordered_remove (i
);
1995 cancel_thread (path
);
2004 /* Second, look for paths that have any other jump thread attached to
2005 them, and either finish converting them or cancel them. */
2006 for (i
= 0; i
< m_paths
.length ();)
2008 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
2009 edge e
= (*path
)[0]->e
;
2011 if (path
->length () > 1
2012 && (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& e
->aux
== path
)
2015 for (j
= 1; j
< path
->length (); j
++)
2016 if ((*path
)[j
]->e
->aux
!= NULL
)
2019 /* If we iterated through the entire path without exiting the loop,
2020 then we are good to go, record it. */
2021 if (j
== path
->length ())
2023 bitmap_set_bit (tmp
, e
->dest
->index
);
2029 m_paths
.unordered_remove (i
);
2030 cancel_thread (path
);
2039 /* When optimizing for size, prune all thread paths where statement
2040 duplication is necessary.
2042 We walk the jump thread path looking for copied blocks. There's
2043 two types of copied blocks.
2045 EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2046 cancel the jump threading request when optimizing for size.
2048 EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2049 will be killed by threading. If threading does not kill all of
2050 its statements, then we should cancel the jump threading request
2051 when optimizing for size. */
2052 if (optimize_function_for_size_p (cfun
))
2054 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2056 FOR_EACH_EDGE (e
, ei
, BASIC_BLOCK_FOR_FN (cfun
, i
)->preds
)
2059 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2062 for (j
= 1; j
< path
->length (); j
++)
2064 bb
= (*path
)[j
]->e
->src
;
2065 if (redirection_block_p (bb
))
2067 else if ((*path
)[j
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
2068 || ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
2069 && (count_stmts_and_phis_in_block (bb
)
2070 != estimate_threading_killed_stmts (bb
))))
2074 if (j
!= path
->length ())
2076 cancel_thread (path
);
2080 bitmap_set_bit (threaded_blocks
, i
);
2085 bitmap_copy (threaded_blocks
, tmp
);
2087 /* If we have a joiner block (J) which has two successors S1 and S2 and
2088 we are threading though S1 and the final destination of the thread
2089 is S2, then we must verify that any PHI nodes in S2 have the same
2090 PHI arguments for the edge J->S2 and J->S1->...->S2.
2092 We used to detect this prior to registering the jump thread, but
2093 that prohibits propagation of edge equivalences into non-dominated
2094 PHI nodes as the equivalency test might occur before propagation.
2096 This must also occur after we truncate any jump threading paths
2097 as this scenario may only show up after truncation.
2099 This works for now, but will need improvement as part of the FSA
2102 Note since we've moved the thread request data to the edges,
2103 we have to iterate on those rather than the threaded_edges vector. */
2104 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2106 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2107 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2111 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2112 bool have_joiner
= ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
);
2116 basic_block joiner
= e
->dest
;
2117 edge final_edge
= path
->last ()->e
;
2118 basic_block final_dest
= final_edge
->dest
;
2119 edge e2
= find_edge (joiner
, final_dest
);
2121 if (e2
&& !phi_args_equal_on_edges (e2
, final_edge
))
2123 cancel_thread (path
);
2131 /* Look for jump threading paths which cross multiple loop headers.
2133 The code to thread through loop headers will change the CFG in ways
2134 that invalidate the cached loop iteration information. So we must
2135 detect that case and wipe the cached information. */
2136 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2138 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2139 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2143 gcc_assert (loops_state_satisfies_p
2144 (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
));
2145 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2147 for (unsigned int i
= 0, crossed_headers
= 0;
2148 i
< path
->length ();
2151 basic_block dest
= (*path
)[i
]->e
->dest
;
2152 basic_block src
= (*path
)[i
]->e
->src
;
2153 /* If we enter a loop. */
2154 if (flow_loop_nested_p (src
->loop_father
, dest
->loop_father
))
2156 /* If we step from a block outside an irreducible region
2157 to a block inside an irreducible region, then we have
2158 crossed into a loop. */
2159 else if (! (src
->flags
& BB_IRREDUCIBLE_LOOP
)
2160 && (dest
->flags
& BB_IRREDUCIBLE_LOOP
))
2162 if (crossed_headers
> 1)
2164 vect_free_loop_info_assumptions
2165 ((*path
)[path
->length () - 1]->e
->dest
->loop_father
);
2175 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2176 case of SEME Single Entry Multiple Exits region in which all nodes in the
2177 REGION have exactly one incoming edge. The only exception is the first block
2178 that may not have been connected to the rest of the cfg yet. */
2181 verify_jump_thread (basic_block
*region
, unsigned n_region
)
2183 for (unsigned i
= 0; i
< n_region
; i
++)
2184 gcc_assert (EDGE_COUNT (region
[i
]->preds
) <= 1);
2187 /* Return true when BB is one of the first N items in BBS. */
2190 bb_in_bbs (basic_block bb
, basic_block
*bbs
, int n
)
2192 for (int i
= 0; i
< n
; i
++)
2200 jt_path_registry::debug_path (FILE *dump_file
, int pathno
)
2202 vec
<jump_thread_edge
*> *p
= m_paths
[pathno
];
2203 fprintf (dump_file
, "path: ");
2204 for (unsigned i
= 0; i
< p
->length (); ++i
)
2205 fprintf (dump_file
, "%d -> %d, ",
2206 (*p
)[i
]->e
->src
->index
, (*p
)[i
]->e
->dest
->index
);
2207 fprintf (dump_file
, "\n");
2211 jt_path_registry::debug ()
2213 for (unsigned i
= 0; i
< m_paths
.length (); ++i
)
2214 debug_path (stderr
, i
);
2217 /* Rewire a jump_thread_edge so that the source block is now a
2218 threaded source block.
2220 PATH_NUM is an index into the global path table PATHS.
2221 EDGE_NUM is the jump thread edge number into said path.
2223 Returns TRUE if we were able to successfully rewire the edge. */
2226 back_jt_path_registry::rewire_first_differing_edge (unsigned path_num
,
2229 vec
<jump_thread_edge
*> *path
= m_paths
[path_num
];
2230 edge
&e
= (*path
)[edge_num
]->e
;
2231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2232 fprintf (dump_file
, "rewiring edge candidate: %d -> %d\n",
2233 e
->src
->index
, e
->dest
->index
);
2234 basic_block src_copy
= get_bb_copy (e
->src
);
2235 if (src_copy
== NULL
)
2237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2238 fprintf (dump_file
, "ignoring candidate: there is no src COPY\n");
2241 edge new_edge
= find_edge (src_copy
, e
->dest
);
2242 /* If the previously threaded paths created a flow graph where we
2243 can no longer figure out where to go, give up. */
2244 if (new_edge
== NULL
)
2246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2247 fprintf (dump_file
, "ignoring candidate: we lost our way\n");
2254 /* After a path has been jump threaded, adjust the remaining paths
2255 that are subsets of this path, so these paths can be safely
2256 threaded within the context of the new threaded path.
2258 For example, suppose we have just threaded:
2260 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2262 And we have an upcoming threading candidate:
2263 5 -> 6 -> 7 -> 8 -> 15 -> 20
2265 This function adjusts the upcoming path into:
2268 CURR_PATH_NUM is an index into the global paths table. It
2269 specifies the path that was just threaded. */
2272 back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num
)
2274 vec
<jump_thread_edge
*> *curr_path
= m_paths
[curr_path_num
];
2276 /* Iterate through all the other paths and adjust them. */
2277 for (unsigned cand_path_num
= 0; cand_path_num
< m_paths
.length (); )
2279 if (cand_path_num
== curr_path_num
)
2284 /* Make sure the candidate to adjust starts with the same path
2285 as the recently threaded path. */
2286 vec
<jump_thread_edge
*> *cand_path
= m_paths
[cand_path_num
];
2287 if ((*cand_path
)[0]->e
!= (*curr_path
)[0]->e
)
2292 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2294 fprintf (dump_file
, "adjusting candidate: ");
2295 debug_path (dump_file
, cand_path_num
);
2298 /* Chop off from the candidate path any prefix it shares with
2299 the recently threaded path. */
2300 unsigned minlength
= MIN (curr_path
->length (), cand_path
->length ());
2302 for (j
= 0; j
< minlength
; ++j
)
2304 edge cand_edge
= (*cand_path
)[j
]->e
;
2305 edge curr_edge
= (*curr_path
)[j
]->e
;
2307 /* Once the prefix no longer matches, adjust the first
2308 non-matching edge to point from an adjusted edge to
2309 wherever it was going. */
2310 if (cand_edge
!= curr_edge
)
2312 gcc_assert (cand_edge
->src
== curr_edge
->src
);
2313 if (!rewire_first_differing_edge (cand_path_num
, j
))
2314 goto remove_candidate_from_list
;
2320 /* If we consumed the max subgraph we could look at, and
2321 still didn't find any different edges, it's the
2322 last edge after MINLENGTH. */
2323 if (cand_path
->length () > minlength
)
2325 if (!rewire_first_differing_edge (cand_path_num
, j
))
2326 goto remove_candidate_from_list
;
2328 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2329 fprintf (dump_file
, "adjusting first edge after MINLENGTH.\n");
2333 /* If we are removing everything, delete the entire candidate. */
2334 if (j
== cand_path
->length ())
2336 remove_candidate_from_list
:
2337 cancel_thread (cand_path
, "Adjusted candidate is EMPTY");
2338 m_paths
.unordered_remove (cand_path_num
);
2341 /* Otherwise, just remove the redundant sub-path. */
2342 if (cand_path
->length () - j
> 1)
2343 cand_path
->block_remove (0, j
);
2344 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2345 fprintf (dump_file
, "Dropping illformed candidate.\n");
2347 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2349 fprintf (dump_file
, "adjusted candidate: ");
2350 debug_path (dump_file
, cand_path_num
);
2356 /* Duplicates a jump-thread path of N_REGION basic blocks.
2357 The ENTRY edge is redirected to the duplicate of the region.
2359 Remove the last conditional statement in the last basic block in the REGION,
2360 and create a single fallthru edge pointing to the same destination as the
2363 CURRENT_PATH_NO is an index into the global paths[] table
2364 specifying the jump-thread path.
2366 Returns false if it is unable to copy the region, true otherwise. */
2369 back_jt_path_registry::duplicate_thread_path (edge entry
,
2371 basic_block
*region
,
2373 unsigned current_path_no
)
2376 class loop
*loop
= entry
->dest
->loop_father
;
2379 profile_count curr_count
;
2381 if (!can_copy_bbs_p (region
, n_region
))
2384 /* Some sanity checking. Note that we do not check for all possible
2385 missuses of the functions. I.e. if you ask to copy something weird,
2386 it will work, but the state of structures probably will not be
2388 for (i
= 0; i
< n_region
; i
++)
2390 /* We do not handle subloops, i.e. all the blocks must belong to the
2392 if (region
[i
]->loop_father
!= loop
)
2396 initialize_original_copy_tables ();
2398 set_loop_copy (loop
, loop
);
2400 basic_block
*region_copy
= XNEWVEC (basic_block
, n_region
);
2401 copy_bbs (region
, n_region
, region_copy
, &exit
, 1, &exit_copy
, loop
,
2402 split_edge_bb_loc (entry
), false);
2404 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2405 following code ensures that all the edges exiting the jump-thread path are
2406 redirected back to the original code: these edges are exceptions
2407 invalidating the property that is propagated by executing all the blocks of
2408 the jump-thread path in order. */
2410 curr_count
= entry
->count ();
2412 for (i
= 0; i
< n_region
; i
++)
2416 basic_block bb
= region_copy
[i
];
2418 /* Watch inconsistent profile. */
2419 if (curr_count
> region
[i
]->count
)
2420 curr_count
= region
[i
]->count
;
2421 /* Scale current BB. */
2422 if (region
[i
]->count
.nonzero_p () && curr_count
.initialized_p ())
2424 /* In the middle of the path we only scale the frequencies.
2425 In last BB we need to update probabilities of outgoing edges
2426 because we know which one is taken at the threaded path. */
2427 if (i
+ 1 != n_region
)
2428 scale_bbs_frequencies_profile_count (region
+ i
, 1,
2429 region
[i
]->count
- curr_count
,
2432 update_bb_profile_for_threading (region
[i
],
2435 scale_bbs_frequencies_profile_count (region_copy
+ i
, 1, curr_count
,
2436 region_copy
[i
]->count
);
2439 if (single_succ_p (bb
))
2441 /* Make sure the successor is the next node in the path. */
2442 gcc_assert (i
+ 1 == n_region
2443 || region_copy
[i
+ 1] == single_succ_edge (bb
)->dest
);
2444 if (i
+ 1 != n_region
)
2446 curr_count
= single_succ_edge (bb
)->count ();
2451 /* Special case the last block on the path: make sure that it does not
2452 jump back on the copied path, including back to itself. */
2453 if (i
+ 1 == n_region
)
2455 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2456 if (bb_in_bbs (e
->dest
, region_copy
, n_region
))
2458 basic_block orig
= get_bb_original (e
->dest
);
2460 redirect_edge_and_branch_force (e
, orig
);
2465 /* Redirect all other edges jumping to non-adjacent blocks back to the
2467 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2468 if (region_copy
[i
+ 1] != e
->dest
)
2470 basic_block orig
= get_bb_original (e
->dest
);
2472 redirect_edge_and_branch_force (e
, orig
);
2476 curr_count
= e
->count ();
2482 verify_jump_thread (region_copy
, n_region
);
2484 /* Remove the last branch in the jump thread path. */
2485 remove_ctrl_stmt_and_useless_edges (region_copy
[n_region
- 1], exit
->dest
);
2487 /* And fixup the flags on the single remaining edge. */
2488 edge fix_e
= find_edge (region_copy
[n_region
- 1], exit
->dest
);
2489 fix_e
->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
| EDGE_ABNORMAL
);
2490 fix_e
->flags
|= EDGE_FALLTHRU
;
2492 edge e
= make_edge (region_copy
[n_region
- 1], exit
->dest
, EDGE_FALLTHRU
);
2496 rescan_loop_exit (e
, true, false);
2497 e
->probability
= profile_probability::always ();
2500 /* Redirect the entry and add the phi node arguments. */
2501 if (entry
->dest
== loop
->header
)
2502 mark_loop_for_removal (loop
);
2503 redirected
= redirect_edge_and_branch (entry
, get_bb_copy (entry
->dest
));
2504 gcc_assert (redirected
!= NULL
);
2505 flush_pending_stmts (entry
);
2507 /* Add the other PHI node arguments. */
2508 add_phi_args_after_copy (region_copy
, n_region
, NULL
);
2512 adjust_paths_after_duplication (current_path_no
);
2514 free_original_copy_tables ();
2518 /* Return true when PATH is a valid jump-thread path. */
2521 valid_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2523 unsigned len
= path
->length ();
2525 /* Check that the path is connected. */
2526 for (unsigned int j
= 0; j
< len
- 1; j
++)
2528 edge e
= (*path
)[j
]->e
;
2529 if (e
->dest
!= (*path
)[j
+1]->e
->src
)
2535 /* Remove any queued jump threads that include edge E.
2537 We don't actually remove them here, just record the edges into ax
2538 hash table. That way we can do the search once per iteration of
2539 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2542 fwd_jt_path_registry::remove_jump_threads_including (edge_def
*e
)
2544 if (!m_paths
.exists () || !flag_thread_jumps
)
2547 edge
*slot
= m_removed_edges
->find_slot (e
, INSERT
);
2551 /* Thread all paths that have been queued for jump threading, and
2552 update the CFG accordingly.
2554 It is the caller's responsibility to fix the dominance information
2555 and rewrite duplicated SSA_NAMEs back into SSA form.
2557 If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2558 headers if it does not simplify the loop.
2560 Returns true if one or more edges were threaded. */
2563 jt_path_registry::thread_through_all_blocks (bool peel_loop_headers
)
2565 if (m_paths
.length () == 0)
2568 m_num_threaded_edges
= 0;
2570 bool retval
= update_cfg (peel_loop_headers
);
2572 statistics_counter_event (cfun
, "Jumps threaded", m_num_threaded_edges
);
2576 loops_state_set (LOOPS_NEED_FIXUP
);
2582 /* This is the backward threader version of thread_through_all_blocks
2583 using a generic BB copier. */
2586 back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2588 bool retval
= false;
2589 hash_set
<edge
> visited_starting_edges
;
2591 while (m_paths
.length ())
2593 vec
<jump_thread_edge
*> *path
= m_paths
[0];
2594 edge entry
= (*path
)[0]->e
;
2596 /* Do not jump-thread twice from the same starting edge.
2598 Previously we only checked that we weren't threading twice
2599 from the same BB, but that was too restrictive. Imagine a
2600 path that starts from GIMPLE_COND(x_123 == 0,...), where both
2601 edges out of this conditional yield paths that can be
2602 threaded (for example, both lead to an x_123==0 or x_123!=0
2603 conditional further down the line. */
2604 if (visited_starting_edges
.contains (entry
)
2605 /* We may not want to realize this jump thread path for
2606 various reasons. So check it first. */
2607 || !valid_jump_thread_path (path
))
2609 /* Remove invalid jump-thread paths. */
2610 cancel_thread (path
, "Avoiding threading twice from same edge");
2611 m_paths
.unordered_remove (0);
2615 unsigned len
= path
->length ();
2616 edge exit
= (*path
)[len
- 1]->e
;
2617 basic_block
*region
= XNEWVEC (basic_block
, len
- 1);
2619 for (unsigned int j
= 0; j
< len
- 1; j
++)
2620 region
[j
] = (*path
)[j
]->e
->dest
;
2622 if (duplicate_thread_path (entry
, exit
, region
, len
- 1, 0))
2624 /* We do not update dominance info. */
2625 free_dominance_info (CDI_DOMINATORS
);
2626 visited_starting_edges
.add (entry
);
2628 m_num_threaded_edges
++;
2632 m_paths
.unordered_remove (0);
2638 /* This is the forward threader version of thread_through_all_blocks,
2639 using a custom BB copier. */
2642 fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers
)
2644 bool retval
= false;
2646 /* Remove any paths that referenced removed edges. */
2647 if (m_removed_edges
)
2648 for (unsigned i
= 0; i
< m_paths
.length (); )
2651 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
2653 for (j
= 0; j
< path
->length (); j
++)
2655 edge e
= (*path
)[j
]->e
;
2656 if (m_removed_edges
->find_slot (e
, NO_INSERT
)
2657 || (((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
2658 || (*path
)[j
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
2659 && !can_duplicate_block_p (e
->src
)))
2663 if (j
!= path
->length ())
2665 cancel_thread (path
, "Thread references removed edge");
2666 m_paths
.unordered_remove (i
);
2672 auto_bitmap threaded_blocks
;
2673 mark_threaded_blocks (threaded_blocks
);
2675 initialize_original_copy_tables ();
2677 /* The order in which we process jump threads can be important.
2679 Consider if we have two jump threading paths A and B. If the
2680 target edge of A is the starting edge of B and we thread path A
2681 first, then we create an additional incoming edge into B->dest that
2682 we cannot discover as a jump threading path on this iteration.
2684 If we instead thread B first, then the edge into B->dest will have
2685 already been redirected before we process path A and path A will
2686 natually, with no further work, target the redirected path for B.
2688 An post-order is sufficient here. Compute the ordering first, then
2689 process the blocks. */
2690 if (!bitmap_empty_p (threaded_blocks
))
2692 int *postorder
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2693 unsigned int postorder_num
= post_order_compute (postorder
, false, false);
2694 for (unsigned int i
= 0; i
< postorder_num
; i
++)
2696 unsigned int indx
= postorder
[i
];
2697 if (bitmap_bit_p (threaded_blocks
, indx
))
2699 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, indx
);
2700 retval
|= thread_block (bb
, true);
2706 /* Then perform the threading through loop headers. We start with the
2707 innermost loop, so that the changes in cfg we perform won't affect
2708 further threading. */
2709 for (auto loop
: loops_list (cfun
, LI_FROM_INNERMOST
))
2712 || !bitmap_bit_p (threaded_blocks
, loop
->header
->index
))
2715 retval
|= thread_through_loop_header (loop
, may_peel_loop_headers
);
2718 /* All jump threading paths should have been resolved at this
2719 point. Verify that is the case. */
2721 FOR_EACH_BB_FN (bb
, cfun
)
2725 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2726 gcc_assert (e
->aux
== NULL
);
2729 free_original_copy_tables ();
2735 jt_path_registry::cancel_invalid_paths (vec
<jump_thread_edge
*> &path
)
2737 gcc_checking_assert (!path
.is_empty ());
2738 edge entry
= path
[0]->e
;
2739 edge exit
= path
[path
.length () - 1]->e
;
2740 bool seen_latch
= false;
2741 int loops_crossed
= 0;
2742 bool crossed_latch
= false;
2743 bool crossed_loop_header
= false;
2744 // Use ->dest here instead of ->src to ignore the first block. The
2745 // first block is allowed to be in a different loop, since it'll be
2746 // redirected. See similar comment in profitable_path_p: "we don't
2747 // care about that block...".
2748 loop_p loop
= entry
->dest
->loop_father
;
2749 loop_p curr_loop
= loop
;
2751 for (unsigned int i
= 0; i
< path
.length (); i
++)
2753 edge e
= path
[i
]->e
;
2757 // NULL outgoing edges on a path can happen for jumping to a
2758 // constant address.
2759 cancel_thread (&path
, "Found NULL edge in jump threading path");
2763 if (loop
->latch
== e
->src
|| loop
->latch
== e
->dest
)
2766 // Like seen_latch, but excludes the first block.
2767 if (e
->src
!= entry
->src
)
2768 crossed_latch
= true;
2771 if (e
->dest
->loop_father
!= curr_loop
)
2773 curr_loop
= e
->dest
->loop_father
;
2777 // ?? Avoid threading through loop headers that remain in the
2778 // loop, as such threadings tend to create sub-loops which
2779 // _might_ be OK ??.
2780 if (e
->dest
->loop_father
->header
== e
->dest
2781 && !flow_loop_nested_p (exit
->dest
->loop_father
,
2782 e
->dest
->loop_father
))
2783 crossed_loop_header
= true;
2785 if (flag_checking
&& !m_backedge_threads
)
2786 gcc_assert ((path
[i
]->e
->flags
& EDGE_DFS_BACK
) == 0);
2789 // If we crossed a loop into an outer loop without crossing the
2790 // latch, this is just an early exit from the loop.
2791 if (loops_crossed
== 1
2793 && flow_loop_nested_p (exit
->dest
->loop_father
, exit
->src
->loop_father
))
2796 if (cfun
->curr_properties
& PROP_loop_opts_done
)
2799 if (seen_latch
&& empty_block_p (loop
->latch
))
2801 cancel_thread (&path
, "Threading through latch before loop opts "
2802 "would create non-empty latch");
2807 cancel_thread (&path
, "Path crosses loops");
2810 // The path should either start and end in the same loop or exit the
2811 // loop it starts in but never enter a loop. This also catches
2812 // creating irreducible loops, not only rotation.
2813 if (entry
->src
->loop_father
!= exit
->dest
->loop_father
2814 && !flow_loop_nested_p (exit
->src
->loop_father
,
2815 entry
->dest
->loop_father
))
2817 cancel_thread (&path
, "Path rotates loop");
2820 if (crossed_loop_header
)
2822 cancel_thread (&path
, "Path crosses loop header but does not exit it");
2828 /* Register a jump threading opportunity. We queue up all the jump
2829 threading opportunities discovered by a pass and update the CFG
2830 and SSA form all at once.
2832 E is the edge we can thread, E2 is the new target edge, i.e., we
2833 are effectively recording that E->dest can be changed to E2->dest
2834 after fixing the SSA graph.
2836 Return TRUE if PATH was successfully threaded. */
2839 jt_path_registry::register_jump_thread (vec
<jump_thread_edge
*> *path
)
2841 gcc_checking_assert (flag_thread_jumps
);
2843 if (!dbg_cnt (registered_jump_thread
))
2849 if (cancel_invalid_paths (*path
))
2852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2853 dump_jump_thread_path (dump_file
, *path
, true);
2855 m_paths
.safe_push (path
);
2859 /* Return how many uses of T there are within BB, as long as there
2860 aren't any uses outside BB. If there are any uses outside BB,
2861 return -1 if there's at most one use within BB, or -2 if there is
2862 more than one use within BB. */
2865 uses_in_bb (tree t
, basic_block bb
)
2868 bool outside_bb
= false;
2870 imm_use_iterator iter
;
2871 use_operand_p use_p
;
2872 FOR_EACH_IMM_USE_FAST (use_p
, iter
, t
)
2874 if (is_gimple_debug (USE_STMT (use_p
)))
2877 if (gimple_bb (USE_STMT (use_p
)) != bb
)
2882 if (outside_bb
&& uses
> 1)
2892 /* Starting from the final control flow stmt in BB, assuming it will
2893 be removed, follow uses in to-be-removed stmts back to their defs
2894 and count how many defs are to become dead and be removed as
2898 estimate_threading_killed_stmts (basic_block bb
)
2900 int killed_stmts
= 0;
2901 hash_map
<tree
, int> ssa_remaining_uses
;
2902 auto_vec
<gimple
*, 4> dead_worklist
;
2904 /* If the block has only two predecessors, threading will turn phi
2905 dsts into either src, so count them as dead stmts. */
2906 bool drop_all_phis
= EDGE_COUNT (bb
->preds
) == 2;
2909 for (gphi_iterator gsi
= gsi_start_phis (bb
);
2910 !gsi_end_p (gsi
); gsi_next (&gsi
))
2912 gphi
*phi
= gsi
.phi ();
2913 tree dst
= gimple_phi_result (phi
);
2915 /* We don't count virtual PHIs as stmts in
2916 record_temporary_equivalences_from_phis. */
2917 if (virtual_operand_p (dst
))
2923 if (gsi_end_p (gsi_last_bb (bb
)))
2924 return killed_stmts
;
2926 gimple
*stmt
= gsi_stmt (gsi_last_bb (bb
));
2927 if (gimple_code (stmt
) != GIMPLE_COND
2928 && gimple_code (stmt
) != GIMPLE_GOTO
2929 && gimple_code (stmt
) != GIMPLE_SWITCH
)
2930 return killed_stmts
;
2932 /* The control statement is always dead. */
2934 dead_worklist
.quick_push (stmt
);
2935 while (!dead_worklist
.is_empty ())
2937 stmt
= dead_worklist
.pop ();
2940 use_operand_p use_p
;
2941 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_USE
)
2943 tree t
= USE_FROM_PTR (use_p
);
2944 gimple
*def
= SSA_NAME_DEF_STMT (t
);
2946 if (gimple_bb (def
) == bb
2947 && (gimple_code (def
) != GIMPLE_PHI
2949 && !gimple_has_side_effects (def
))
2951 int *usesp
= ssa_remaining_uses
.get (t
);
2957 uses
= uses_in_bb (t
, bb
);
2961 /* Don't bother recording the expected use count if we
2962 won't find any further uses within BB. */
2963 if (!usesp
&& (uses
< -1 || uses
> 1))
2965 usesp
= &ssa_remaining_uses
.get_or_insert (t
);
2980 ssa_remaining_uses
.remove (t
);
2981 if (gimple_code (def
) != GIMPLE_PHI
)
2982 dead_worklist
.safe_push (def
);
2989 fprintf (dump_file
, "threading bb %i kills %i stmts\n",
2990 bb
->index
, killed_stmts
);
2992 return killed_stmts
;