1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
29 #include "fold-const.h"
31 #include "gimple-iterator.h"
33 #include "tree-ssa-threadupdate.h"
37 #include "tree-vectorizer.h"
38 #include "tree-pass.h"
40 /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 one or more in-edges to B to instead reach the destination of an
42 out-edge from B while preserving any side effects in B.
44 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 side effects of executing B.
47 1. Make a copy of B (including its outgoing edges and statements). Call
48 the copy B'. Note B' has no incoming edges or PHIs at this time.
50 2. Remove the control statement at the end of B' and all outgoing edges
53 3. Add a new argument to each PHI in C with the same value as the existing
54 argument associated with edge B->C. Associate the new PHI arguments
57 4. For each PHI in B, find or create a PHI in B' with an identical
58 PHI_RESULT. Add an argument to the PHI in B' which has the same
59 value as the PHI in B associated with the edge A->B. Associate
60 the new argument in the PHI in B' with the edge A->B.
62 5. Change the edge A->B to A->B'.
64 5a. This automatically deletes any PHI arguments associated with the
67 5b. This automatically associates each new argument added in step 4
70 6. Repeat for other incoming edges into B.
72 7. Put the duplicated resources in B and all the B' blocks into SSA form.
74 Note that block duplication can be minimized by first collecting the
75 set of unique destination blocks that the incoming edges should
78 We reduce the number of edges and statements we create by not copying all
79 the outgoing edges and the control statement in step #1. We instead create
80 a template block without the outgoing edges and duplicate the template.
82 Another case this code handles is threading through a "joiner" block. In
83 this case, we do not know the destination of the joiner block, but one
84 of the outgoing edges from the joiner block leads to a threadable path. This
85 case largely works as outlined above, except the duplicate of the joiner
86 block still contains a full set of outgoing edges and its control statement.
87 We just redirect one of its outgoing edges to our jump threading path. */
90 /* Steps #5 and #6 of the above algorithm are best implemented by walking
91 all the incoming edges which thread to the same destination edge at
92 the same time. That avoids lots of table lookups to get information
93 for the destination edge.
95 To realize that implementation we create a list of incoming edges
96 which thread to the same outgoing edge. Thus to implement steps
97 #5 and #6 we traverse our hash table of outgoing edge information.
98 For each entry we walk the list of incoming edges which thread to
99 the current outgoing edge. */
107 /* Main data structure recording information regarding B's duplicate
110 /* We need to efficiently record the unique thread destinations of this
111 block and specific information associated with those destinations. We
112 may have many incoming edges threaded to the same outgoing edge. This
113 can be naturally implemented with a hash table. */
115 struct redirection_data
: free_ptr_hash
<redirection_data
>
117 /* We support wiring up two block duplicates in a jump threading path.
119 One is a normal block copy where we remove the control statement
120 and wire up its single remaining outgoing edge to the thread path.
122 The other is a joiner block where we leave the control statement
123 in place, but wire one of the outgoing edges to a thread path.
125 In theory we could have multiple block duplicates in a jump
126 threading path, but I haven't tried that.
128 The duplicate blocks appear in this array in the same order in
129 which they appear in the jump thread path. */
130 basic_block dup_blocks
[2];
132 vec
<jump_thread_edge
*> *path
;
134 /* A list of incoming edges which we want to thread to the
136 struct el
*incoming_edges
;
138 /* hash_table support. */
139 static inline hashval_t
hash (const redirection_data
*);
140 static inline int equal (const redirection_data
*, const redirection_data
*);
143 jump_thread_path_allocator::jump_thread_path_allocator ()
145 obstack_init (&m_obstack
);
148 jump_thread_path_allocator::~jump_thread_path_allocator ()
150 obstack_free (&m_obstack
, NULL
);
154 jump_thread_path_allocator::allocate_thread_edge (edge e
,
155 jump_thread_edge_type type
)
157 void *r
= obstack_alloc (&m_obstack
, sizeof (jump_thread_edge
));
158 return new (r
) jump_thread_edge (e
, type
);
161 vec
<jump_thread_edge
*> *
162 jump_thread_path_allocator::allocate_thread_path ()
164 // ?? Since the paths live in an obstack, we should be able to remove all
165 // references to path->release() throughout the code.
166 void *r
= obstack_alloc (&m_obstack
, sizeof (vec
<jump_thread_edge
*>));
167 return new (r
) vec
<jump_thread_edge
*> ();
170 jt_path_registry::jt_path_registry (bool backedge_threads
)
173 m_num_threaded_edges
= 0;
174 m_backedge_threads
= backedge_threads
;
177 jt_path_registry::~jt_path_registry ()
182 fwd_jt_path_registry::fwd_jt_path_registry ()
183 : jt_path_registry (/*backedge_threads=*/false)
185 m_removed_edges
= new hash_table
<struct removed_edges
> (17);
186 m_redirection_data
= NULL
;
189 fwd_jt_path_registry::~fwd_jt_path_registry ()
191 delete m_removed_edges
;
194 back_jt_path_registry::back_jt_path_registry ()
195 : jt_path_registry (/*backedge_threads=*/true)
200 jt_path_registry::push_edge (vec
<jump_thread_edge
*> *path
,
201 edge e
, jump_thread_edge_type type
)
203 jump_thread_edge
*x
= m_allocator
.allocate_thread_edge (e
, type
);
207 vec
<jump_thread_edge
*> *
208 jt_path_registry::allocate_thread_path ()
210 return m_allocator
.allocate_thread_path ();
213 /* Dump a jump threading path, including annotations about each
217 dump_jump_thread_path (FILE *dump_file
,
218 const vec
<jump_thread_edge
*> &path
,
223 " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 dbg_cnt_counter (registered_jump_thread
),
225 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
228 " Cancelling jump thread: (%d, %d) incoming edge; ",
229 path
[0]->e
->src
->index
, path
[0]->e
->dest
->index
);
231 for (unsigned int i
= 1; i
< path
.length (); i
++)
233 /* We can get paths with a NULL edge when the final destination
234 of a jump thread turns out to be a constant address. We dump
235 those paths when debugging, so we have to be prepared for that
237 if (path
[i
]->e
== NULL
)
240 fprintf (dump_file
, " (%d, %d) ",
241 path
[i
]->e
->src
->index
, path
[i
]->e
->dest
->index
);
242 switch (path
[i
]->type
)
244 case EDGE_COPY_SRC_JOINER_BLOCK
:
245 fprintf (dump_file
, "joiner");
247 case EDGE_COPY_SRC_BLOCK
:
248 fprintf (dump_file
, "normal");
250 case EDGE_NO_COPY_SRC_BLOCK
:
251 fprintf (dump_file
, "nocopy");
257 if ((path
[i
]->e
->flags
& EDGE_DFS_BACK
) != 0)
258 fprintf (dump_file
, " (back)");
260 fprintf (dump_file
, "; \n");
264 debug (const vec
<jump_thread_edge
*> &path
)
266 dump_jump_thread_path (stderr
, path
, true);
270 debug (const vec
<jump_thread_edge
*> *path
)
275 /* Release the memory associated with PATH, and if dumping is enabled,
276 dump out the reason why the thread was canceled. */
279 cancel_thread (vec
<jump_thread_edge
*> *path
, const char *reason
= NULL
)
281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
284 fprintf (dump_file
, "%s: ", reason
);
286 dump_jump_thread_path (dump_file
, *path
, false);
287 fprintf (dump_file
, "\n");
292 /* Simple hashing function. For any given incoming edge E, we're going
293 to be most concerned with the final destination of its jump thread
294 path. So hash on the block index of the final edge in the path. */
297 redirection_data::hash (const redirection_data
*p
)
299 vec
<jump_thread_edge
*> *path
= p
->path
;
300 return path
->last ()->e
->dest
->index
;
303 /* Given two hash table entries, return true if they have the same
304 jump threading path. */
306 redirection_data::equal (const redirection_data
*p1
, const redirection_data
*p2
)
308 vec
<jump_thread_edge
*> *path1
= p1
->path
;
309 vec
<jump_thread_edge
*> *path2
= p2
->path
;
311 if (path1
->length () != path2
->length ())
314 for (unsigned int i
= 1; i
< path1
->length (); i
++)
316 if ((*path1
)[i
]->type
!= (*path2
)[i
]->type
317 || (*path1
)[i
]->e
!= (*path2
)[i
]->e
)
324 /* Data structure of information to pass to hash table traversal routines. */
325 struct ssa_local_info_t
327 /* The current block we are working on. */
330 /* We only create a template block for the first duplicated block in a
331 jump threading path as we may need many duplicates of that block.
333 The second duplicate block in a path is specific to that path. Creating
334 and sharing a template for that block is considerably more difficult. */
335 basic_block template_block
;
337 /* If we append debug stmts to the template block after creating it,
338 this iterator won't be the last one in the block, and further
339 copies of the template block shouldn't get debug stmts after
341 gimple_stmt_iterator template_last_to_copy
;
343 /* Blocks duplicated for the thread. */
344 bitmap duplicate_blocks
;
346 /* TRUE if we thread one or more jumps, FALSE otherwise. */
349 /* When we have multiple paths through a joiner which reach different
350 final destinations, then we may need to correct for potential
351 profile insanities. */
352 bool need_profile_correction
;
354 // Jump threading statistics.
355 unsigned long num_threaded_edges
;
358 /* When we start updating the CFG for threading, data necessary for jump
359 threading is attached to the AUX field for the incoming edge. Use these
360 macros to access the underlying structure attached to the AUX field. */
361 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
363 /* Remove the last statement in block BB if it is a control statement
364 Also remove all outgoing edges except the edge which reaches DEST_BB.
365 If DEST_BB is NULL, then remove all outgoing edges. */
368 remove_ctrl_stmt_and_useless_edges (basic_block bb
, basic_block dest_bb
)
370 gimple_stmt_iterator gsi
;
374 gsi
= gsi_last_bb (bb
);
376 /* If the duplicate ends with a control statement, then remove it.
378 Note that if we are duplicating the template block rather than the
379 original basic block, then the duplicate might not have any real
383 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
384 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
385 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
))
386 gsi_remove (&gsi
, true);
388 for (ei
= ei_start (bb
->succs
); (e
= ei_safe_edge (ei
)); )
390 if (e
->dest
!= dest_bb
)
392 free_dom_edge_info (e
);
397 e
->probability
= profile_probability::always ();
402 /* If the remaining edge is a loop exit, there must have
403 a removed edge that was not a loop exit.
405 In that case BB and possibly other blocks were previously
406 in the loop, but are now outside the loop. Thus, we need
407 to update the loop structures. */
408 if (single_succ_p (bb
)
409 && loop_outer (bb
->loop_father
)
410 && loop_exit_edge_p (bb
->loop_father
, single_succ_edge (bb
)))
411 loops_state_set (LOOPS_NEED_FIXUP
);
414 /* Create a duplicate of BB. Record the duplicate block in an array
415 indexed by COUNT stored in RD. */
418 create_block_for_threading (basic_block bb
,
419 struct redirection_data
*rd
,
421 bitmap
*duplicate_blocks
)
426 /* We can use the generic block duplication code and simply remove
427 the stuff we do not need. */
428 rd
->dup_blocks
[count
] = duplicate_block (bb
, NULL
, NULL
);
430 FOR_EACH_EDGE (e
, ei
, rd
->dup_blocks
[count
]->succs
)
434 /* If we duplicate a block with an outgoing edge marked as
435 EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 leak out of the current pass.
438 It would be better to simplify switch statements and remove
439 the edges before we get here, but the sequencing is nontrivial. */
440 e
->flags
&= ~EDGE_IGNORE
;
443 /* Zero out the profile, since the block is unreachable for now. */
444 rd
->dup_blocks
[count
]->count
= profile_count::uninitialized ();
445 if (duplicate_blocks
)
446 bitmap_set_bit (*duplicate_blocks
, rd
->dup_blocks
[count
]->index
);
449 /* Given an outgoing edge E lookup and return its entry in our hash table.
451 If INSERT is true, then we insert the entry into the hash table if
452 it is not already present. INCOMING_EDGE is added to the list of incoming
453 edges associated with E in the hash table. */
456 fwd_jt_path_registry::lookup_redirection_data (edge e
, insert_option insert
)
458 struct redirection_data
**slot
;
459 struct redirection_data
*elt
;
460 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
462 /* Build a hash table element so we can see if E is already
464 elt
= XNEW (struct redirection_data
);
466 elt
->dup_blocks
[0] = NULL
;
467 elt
->dup_blocks
[1] = NULL
;
468 elt
->incoming_edges
= NULL
;
470 slot
= m_redirection_data
->find_slot (elt
, insert
);
472 /* This will only happen if INSERT is false and the entry is not
473 in the hash table. */
480 /* This will only happen if E was not in the hash table and
485 elt
->incoming_edges
= XNEW (struct el
);
486 elt
->incoming_edges
->e
= e
;
487 elt
->incoming_edges
->next
= NULL
;
490 /* E was in the hash table. */
493 /* Free ELT as we do not need it anymore, we will extract the
494 relevant entry from the hash table itself. */
497 /* Get the entry stored in the hash table. */
500 /* If insertion was requested, then we need to add INCOMING_EDGE
501 to the list of incoming edges associated with E. */
504 struct el
*el
= XNEW (struct el
);
505 el
->next
= elt
->incoming_edges
;
507 elt
->incoming_edges
= el
;
514 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 to see if it has constant value in a flow sensitive manner. Set
516 LOCUS to location of the constant phi arg and return the value.
517 Return DEF directly if either PATH or idx is ZERO. */
520 get_value_locus_in_path (tree def
, vec
<jump_thread_edge
*> *path
,
521 basic_block bb
, int idx
, location_t
*locus
)
527 if (path
== NULL
|| idx
== 0)
530 def_phi
= dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (def
));
534 def_bb
= gimple_bb (def_phi
);
535 /* Don't propagate loop invariants into deeper loops. */
536 if (!def_bb
|| bb_loop_depth (def_bb
) < bb_loop_depth (bb
))
539 /* Backtrack jump threading path from IDX to see if def has constant
541 for (int j
= idx
- 1; j
>= 0; j
--)
543 edge e
= (*path
)[j
]->e
;
544 if (e
->dest
== def_bb
)
546 arg
= gimple_phi_arg_def (def_phi
, e
->dest_idx
);
547 if (is_gimple_min_invariant (arg
))
549 *locus
= gimple_phi_arg_location (def_phi
, e
->dest_idx
);
559 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 Try to backtrack jump threading PATH from node IDX to see if the arg
561 has constant value, copy constant value instead of argument itself
565 copy_phi_args (basic_block bb
, edge src_e
, edge tgt_e
,
566 vec
<jump_thread_edge
*> *path
, int idx
)
569 int src_indx
= src_e
->dest_idx
;
571 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
573 gphi
*phi
= gsi
.phi ();
574 tree def
= gimple_phi_arg_def (phi
, src_indx
);
575 location_t locus
= gimple_phi_arg_location (phi
, src_indx
);
577 if (TREE_CODE (def
) == SSA_NAME
578 && !virtual_operand_p (gimple_phi_result (phi
)))
579 def
= get_value_locus_in_path (def
, path
, bb
, idx
, &locus
);
581 add_phi_arg (phi
, def
, tgt_e
, locus
);
585 /* We have recently made a copy of ORIG_BB, including its outgoing
586 edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 ORIG_BB has a new argument associated with edge from NEW_BB to the
588 successor. Initialize the PHI argument so that it is equal to the PHI
589 argument associated with the edge from ORIG_BB to the successor.
590 PATH and IDX are used to check if the new PHI argument has constant
591 value in a flow sensitive manner. */
594 update_destination_phis (basic_block orig_bb
, basic_block new_bb
,
595 vec
<jump_thread_edge
*> *path
, int idx
)
600 FOR_EACH_EDGE (e
, ei
, orig_bb
->succs
)
602 edge e2
= find_edge (new_bb
, e
->dest
);
603 copy_phi_args (e
->dest
, e
, e2
, path
, idx
);
607 /* Given a duplicate block and its single destination (both stored
608 in RD). Create an edge between the duplicate and its single
611 Add an additional argument to any PHI nodes at the single
612 destination. IDX is the start node in jump threading path
613 we start to check to see if the new PHI argument has constant
614 value along the jump threading path. */
617 create_edge_and_update_destination_phis (struct redirection_data
*rd
,
618 basic_block bb
, int idx
)
620 edge e
= make_single_succ_edge (bb
, rd
->path
->last ()->e
->dest
, EDGE_FALLTHRU
);
622 rescan_loop_exit (e
, true, false);
624 /* We used to copy the thread path here. That was added in 2007
625 and dutifully updated through the representation changes in 2013.
627 In 2013 we added code to thread from an interior node through
628 the backedge to another interior node. That runs after the code
629 to thread through loop headers from outside the loop.
631 The latter may delete edges in the CFG, including those
632 which appeared in the jump threading path we copied here. Thus
633 we'd end up using a dangling pointer.
635 After reviewing the 2007/2011 code, I can't see how anything
636 depended on copying the AUX field and clearly copying the jump
637 threading path is problematical due to embedded edge pointers.
638 It has been removed. */
641 /* If there are any PHI nodes at the destination of the outgoing edge
642 from the duplicate block, then we will need to add a new argument
643 to them. The argument should have the same value as the argument
644 associated with the outgoing edge stored in RD. */
645 copy_phi_args (e
->dest
, rd
->path
->last ()->e
, e
, rd
->path
, idx
);
648 /* Look through PATH beginning at START and return TRUE if there are
649 any additional blocks that need to be duplicated. Otherwise,
652 any_remaining_duplicated_blocks (vec
<jump_thread_edge
*> *path
,
655 for (unsigned int i
= start
+ 1; i
< path
->length (); i
++)
657 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
658 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
665 /* Compute the amount of profile count coming into the jump threading
666 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 identify blocks duplicated for jump threading, which have duplicated
670 edges that need to be ignored in the analysis. Return true if path contains
671 a joiner, false otherwise.
673 In the non-joiner case, this is straightforward - all the counts
674 flowing into the jump threading path should flow through the duplicated
675 block and out of the duplicated path.
677 In the joiner case, it is very tricky. Some of the counts flowing into
678 the original path go offpath at the joiner. The problem is that while
679 we know how much total count goes off-path in the original control flow,
680 we don't know how many of the counts corresponding to just the jump
681 threading path go offpath at the joiner.
683 For example, assume we have the following control flow and identified
684 jump threading paths:
703 Jump threading paths: A -> J -> Son -> D (path 1)
704 C -> J -> Son -> E (path 2)
706 Note that the control flow could be more complicated:
707 - Each jump threading path may have more than one incoming edge. I.e. A and
708 Ea could represent multiple incoming blocks/edges that are included in
710 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 before or after the "normal" copy block). These are not duplicated onto
712 the jump threading path, as they are single-successor.
713 - Any of the blocks along the path may have other incoming edges that
714 are not part of any jump threading path, but add profile counts along
717 In the above example, after all jump threading is complete, we will
718 end up with the following control flow:
727 Eona/ \ ---/---\-------- \Eonc
732 \___________ / \ _____/
737 The main issue to notice here is that when we are processing path 1
738 (A->J->Son->D) we need to figure out the outgoing edge weights to
739 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 sum of the incoming weights to D remain Ed. The problem with simply
741 assuming that Ja (and Jc when processing path 2) has the same outgoing
742 probabilities to its successors as the original block J, is that after
743 all paths are processed and other edges/counts removed (e.g. none
744 of Ec will reach D after processing path 2), we may end up with not
745 enough count flowing along duplicated edge Sona->D.
747 Therefore, in the case of a joiner, we keep track of all counts
748 coming in along the current path, as well as from predecessors not
749 on any jump threading path (Eb in the above example). While we
750 first assume that the duplicated Eona for Ja->Sona has the same
751 probability as the original, we later compensate for other jump
752 threading paths that may eliminate edges. We do that by keep track
753 of all counts coming into the original path that are not in a jump
754 thread (Eb in the above example, but as noted earlier, there could
755 be other predecessors incoming to the path at various points, such
756 as at Son). Call this cumulative non-path count coming into the path
757 before D as Enonpath. We then ensure that the count from Sona->D is as at
758 least as big as (Ed - Enonpath), but no bigger than the minimum
759 weight along the jump threading path. The probabilities of both the
760 original and duplicated joiner block J and Ja will be adjusted
761 accordingly after the updates. */
764 compute_path_counts (struct redirection_data
*rd
,
765 ssa_local_info_t
*local_info
,
766 profile_count
*path_in_count_ptr
,
767 profile_count
*path_out_count_ptr
)
769 edge e
= rd
->incoming_edges
->e
;
770 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
771 edge elast
= path
->last ()->e
;
772 profile_count nonpath_count
= profile_count::zero ();
773 bool has_joiner
= false;
774 profile_count path_in_count
= profile_count::zero ();
776 /* Start by accumulating incoming edge counts to the path's first bb
777 into a couple buckets:
778 path_in_count: total count of incoming edges that flow into the
780 nonpath_count: total count of incoming edges that are not
781 flowing along *any* path. These are the counts
782 that will still flow along the original path after
783 all path duplication is done by potentially multiple
784 calls to this routine.
785 (any other incoming edge counts are for a different jump threading
786 path that will be handled by a later call to this routine.)
787 To make this easier, start by recording all incoming edges that flow into
788 the current path in a bitmap. We could add up the path's incoming edge
789 counts here, but we still need to walk all the first bb's incoming edges
790 below to add up the counts of the other edges not included in this jump
792 struct el
*next
, *el
;
793 auto_bitmap in_edge_srcs
;
794 for (el
= rd
->incoming_edges
; el
; el
= next
)
797 bitmap_set_bit (in_edge_srcs
, el
->e
->src
->index
);
801 FOR_EACH_EDGE (ein
, ei
, e
->dest
->preds
)
803 vec
<jump_thread_edge
*> *ein_path
= THREAD_PATH (ein
);
804 /* Simply check the incoming edge src against the set captured above. */
806 && bitmap_bit_p (in_edge_srcs
, (*ein_path
)[0]->e
->src
->index
))
808 /* It is necessary but not sufficient that the last path edges
809 are identical. There may be different paths that share the
810 same last path edge in the case where the last edge has a nocopy
812 gcc_assert (ein_path
->last ()->e
== elast
);
813 path_in_count
+= ein
->count ();
817 /* Keep track of the incoming edges that are not on any jump-threading
818 path. These counts will still flow out of original path after all
819 jump threading is complete. */
820 nonpath_count
+= ein
->count ();
824 /* Now compute the fraction of the total count coming into the first
825 path bb that is from the current threading path. */
826 profile_count total_count
= e
->dest
->count
;
827 /* Handle incoming profile insanities. */
828 if (total_count
< path_in_count
)
829 path_in_count
= total_count
;
830 profile_probability onpath_scale
= path_in_count
.probability_in (total_count
);
832 /* Walk the entire path to do some more computation in order to estimate
833 how much of the path_in_count will flow out of the duplicated threading
834 path. In the non-joiner case this is straightforward (it should be
835 the same as path_in_count, although we will handle incoming profile
836 insanities by setting it equal to the minimum count along the path).
838 In the joiner case, we need to estimate how much of the path_in_count
839 will stay on the threading path after the joiner's conditional branch.
840 We don't really know for sure how much of the counts
841 associated with this path go to each successor of the joiner, but we'll
842 estimate based on the fraction of the total count coming into the path
843 bb was from the threading paths (computed above in onpath_scale).
844 Afterwards, we will need to do some fixup to account for other threading
845 paths and possible profile insanities.
847 In order to estimate the joiner case's counts we also need to update
848 nonpath_count with any additional counts coming into the path. Other
849 blocks along the path may have additional predecessors from outside
851 profile_count path_out_count
= path_in_count
;
852 profile_count min_path_count
= path_in_count
;
853 for (unsigned int i
= 1; i
< path
->length (); i
++)
855 edge epath
= (*path
)[i
]->e
;
856 profile_count cur_count
= epath
->count ();
857 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
860 cur_count
= cur_count
.apply_probability (onpath_scale
);
862 /* In the joiner case we need to update nonpath_count for any edges
863 coming into the path that will contribute to the count flowing
864 into the path successor. */
865 if (has_joiner
&& epath
!= elast
)
867 /* Look for other incoming edges after joiner. */
868 FOR_EACH_EDGE (ein
, ei
, epath
->dest
->preds
)
871 /* Ignore in edges from blocks we have duplicated for a
872 threading path, which have duplicated edge counts until
873 they are redirected by an invocation of this routine. */
874 && !bitmap_bit_p (local_info
->duplicate_blocks
,
876 nonpath_count
+= ein
->count ();
879 if (cur_count
< path_out_count
)
880 path_out_count
= cur_count
;
881 if (epath
->count () < min_path_count
)
882 min_path_count
= epath
->count ();
885 /* We computed path_out_count above assuming that this path targeted
886 the joiner's on-path successor with the same likelihood as it
887 reached the joiner. However, other thread paths through the joiner
888 may take a different path through the normal copy source block
889 (i.e. they have a different elast), meaning that they do not
890 contribute any counts to this path's elast. As a result, it may
891 turn out that this path must have more count flowing to the on-path
892 successor of the joiner. Essentially, all of this path's elast
893 count must be contributed by this path and any nonpath counts
894 (since any path through the joiner with a different elast will not
895 include a copy of this elast in its duplicated path).
896 So ensure that this path's path_out_count is at least the
897 difference between elast->count () and nonpath_count. Otherwise the edge
898 counts after threading will not be sane. */
899 if (local_info
->need_profile_correction
900 && has_joiner
&& path_out_count
< elast
->count () - nonpath_count
)
902 path_out_count
= elast
->count () - nonpath_count
;
903 /* But neither can we go above the minimum count along the path
904 we are duplicating. This can be an issue due to profile
905 insanities coming in to this pass. */
906 if (path_out_count
> min_path_count
)
907 path_out_count
= min_path_count
;
910 *path_in_count_ptr
= path_in_count
;
911 *path_out_count_ptr
= path_out_count
;
916 /* Update the counts and frequencies for both an original path
917 edge EPATH and its duplicate EDUP. The duplicate source block
918 will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
921 update_profile (edge epath
, edge edup
, profile_count path_in_count
,
922 profile_count path_out_count
)
925 /* First update the duplicated block's count. */
928 basic_block dup_block
= edup
->src
;
930 /* Edup's count is reduced by path_out_count. We need to redistribute
931 probabilities to the remaining edges. */
935 profile_probability edup_prob
936 = path_out_count
.probability_in (path_in_count
);
938 /* Either scale up or down the remaining edges.
939 probabilities are always in range <0,1> and thus we can't do
940 both by same loop. */
941 if (edup
->probability
> edup_prob
)
943 profile_probability rev_scale
944 = (profile_probability::always () - edup
->probability
)
945 / (profile_probability::always () - edup_prob
);
946 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
948 esucc
->probability
/= rev_scale
;
950 else if (edup
->probability
< edup_prob
)
952 profile_probability scale
953 = (profile_probability::always () - edup_prob
)
954 / (profile_probability::always () - edup
->probability
);
955 FOR_EACH_EDGE (esucc
, ei
, dup_block
->succs
)
957 esucc
->probability
*= scale
;
959 if (edup_prob
.initialized_p ())
960 edup
->probability
= edup_prob
;
962 gcc_assert (!dup_block
->count
.initialized_p ());
963 dup_block
->count
= path_in_count
;
966 if (path_in_count
== profile_count::zero ())
969 profile_count final_count
= epath
->count () - path_out_count
;
971 /* Now update the original block's count in the
972 opposite manner - remove the counts/freq that will flow
973 into the duplicated block. Handle underflow due to precision/
975 epath
->src
->count
-= path_in_count
;
977 /* Next update this path edge's original and duplicated counts. We know
978 that the duplicated path will have path_out_count flowing
979 out of it (in the joiner case this is the count along the duplicated path
980 out of the duplicated joiner). This count can then be removed from the
981 original path edge. */
985 profile_probability epath_prob
= final_count
.probability_in (epath
->src
->count
);
987 if (epath
->probability
> epath_prob
)
989 profile_probability rev_scale
990 = (profile_probability::always () - epath
->probability
)
991 / (profile_probability::always () - epath_prob
);
992 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
994 esucc
->probability
/= rev_scale
;
996 else if (epath
->probability
< epath_prob
)
998 profile_probability scale
999 = (profile_probability::always () - epath_prob
)
1000 / (profile_probability::always () - epath
->probability
);
1001 FOR_EACH_EDGE (esucc
, ei
, epath
->src
->succs
)
1003 esucc
->probability
*= scale
;
1005 if (epath_prob
.initialized_p ())
1006 epath
->probability
= epath_prob
;
1009 /* Wire up the outgoing edges from the duplicate blocks and
1010 update any PHIs as needed. Also update the profile counts
1011 on the original and duplicate blocks and edges. */
1013 ssa_fix_duplicate_block_edges (struct redirection_data
*rd
,
1014 ssa_local_info_t
*local_info
)
1016 bool multi_incomings
= (rd
->incoming_edges
->next
!= NULL
);
1017 edge e
= rd
->incoming_edges
->e
;
1018 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1019 edge elast
= path
->last ()->e
;
1020 profile_count path_in_count
= profile_count::zero ();
1021 profile_count path_out_count
= profile_count::zero ();
1023 /* First determine how much profile count to move from original
1024 path to the duplicate path. This is tricky in the presence of
1025 a joiner (see comments for compute_path_counts), where some portion
1026 of the path's counts will flow off-path from the joiner. In the
1027 non-joiner case the path_in_count and path_out_count should be the
1029 bool has_joiner
= compute_path_counts (rd
, local_info
,
1030 &path_in_count
, &path_out_count
);
1032 for (unsigned int count
= 0, i
= 1; i
< path
->length (); i
++)
1034 edge epath
= (*path
)[i
]->e
;
1036 /* If we were threading through an joiner block, then we want
1037 to keep its control statement and redirect an outgoing edge.
1038 Else we want to remove the control statement & edges, then create
1039 a new outgoing edge. In both cases we may need to update PHIs. */
1040 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1045 gcc_assert (has_joiner
);
1047 /* This updates the PHIs at the destination of the duplicate
1048 block. Pass 0 instead of i if we are threading a path which
1049 has multiple incoming edges. */
1050 update_destination_phis (local_info
->bb
, rd
->dup_blocks
[count
],
1051 path
, multi_incomings
? 0 : i
);
1053 /* Find the edge from the duplicate block to the block we're
1054 threading through. That's the edge we want to redirect. */
1055 victim
= find_edge (rd
->dup_blocks
[count
], (*path
)[i
]->e
->dest
);
1057 /* If there are no remaining blocks on the path to duplicate,
1058 then redirect VICTIM to the final destination of the jump
1060 if (!any_remaining_duplicated_blocks (path
, i
))
1062 if (victim
->dest
!= elast
->dest
)
1064 e2
= redirect_edge_and_branch (victim
, elast
->dest
);
1065 /* If we redirected the edge, then we need to copy PHI arguments
1066 at the target. If the edge already existed (e2 != victim
1067 case), then the PHIs in the target already have the correct
1070 copy_phi_args (e2
->dest
, elast
, e2
,
1071 path
, multi_incomings
? 0 : i
);
1078 /* Redirect VICTIM to the next duplicated block in the path. */
1079 e2
= redirect_edge_and_branch (victim
, rd
->dup_blocks
[count
+ 1]);
1081 /* We need to update the PHIs in the next duplicated block. We
1082 want the new PHI args to have the same value as they had
1083 in the source of the next duplicate block.
1085 Thus, we need to know which edge we traversed into the
1086 source of the duplicate. Furthermore, we may have
1087 traversed many edges to reach the source of the duplicate.
1089 Walk through the path starting at element I until we
1090 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1091 the edge from the prior element. */
1092 for (unsigned int j
= i
+ 1; j
< path
->length (); j
++)
1094 if ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
)
1096 copy_phi_arg_into_existing_phi ((*path
)[j
- 1]->e
, e2
);
1102 /* Update the counts of both the original block
1103 and path edge, and the duplicates. The path duplicate's
1104 incoming count are the totals for all edges
1105 incoming to this jump threading path computed earlier.
1106 And we know that the duplicated path will have path_out_count
1107 flowing out of it (i.e. along the duplicated path out of the
1108 duplicated joiner). */
1109 update_profile (epath
, e2
, path_in_count
, path_out_count
);
1111 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1113 remove_ctrl_stmt_and_useless_edges (rd
->dup_blocks
[count
], NULL
);
1114 create_edge_and_update_destination_phis (rd
, rd
->dup_blocks
[count
],
1115 multi_incomings
? 0 : i
);
1117 single_succ_edge (rd
->dup_blocks
[1])->aux
= NULL
;
1119 /* Update the counts of both the original block
1120 and path edge, and the duplicates. Since we are now after
1121 any joiner that may have existed on the path, the count
1122 flowing along the duplicated threaded path is path_out_count.
1123 If we didn't have a joiner, then cur_path_freq was the sum
1124 of the total frequencies along all incoming edges to the
1125 thread path (path_in_freq). If we had a joiner, it would have
1126 been updated at the end of that handling to the edge frequency
1127 along the duplicated joiner path edge. */
1128 update_profile (epath
, EDGE_SUCC (rd
->dup_blocks
[count
], 0),
1129 path_out_count
, path_out_count
);
1133 /* No copy case. In this case we don't have an equivalent block
1134 on the duplicated thread path to update, but we do need
1135 to remove the portion of the counts/freqs that were moved
1136 to the duplicated path from the counts/freqs flowing through
1137 this block on the original path. Since all the no-copy edges
1138 are after any joiner, the removed count is the same as
1141 If we didn't have a joiner, then cur_path_freq was the sum
1142 of the total frequencies along all incoming edges to the
1143 thread path (path_in_freq). If we had a joiner, it would have
1144 been updated at the end of that handling to the edge frequency
1145 along the duplicated joiner path edge. */
1146 update_profile (epath
, NULL
, path_out_count
, path_out_count
);
1149 /* Increment the index into the duplicated path when we processed
1150 a duplicated block. */
1151 if ((*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
1152 || (*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
)
1159 /* Hash table traversal callback routine to create duplicate blocks. */
1162 ssa_create_duplicates (struct redirection_data
**slot
,
1163 ssa_local_info_t
*local_info
)
1165 struct redirection_data
*rd
= *slot
;
1167 /* The second duplicated block in a jump threading path is specific
1168 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1170 Each time we're called, we have to look through the path and see
1171 if a second block needs to be duplicated.
1173 Note the search starts with the third edge on the path. The first
1174 edge is the incoming edge, the second edge always has its source
1175 duplicated. Thus we start our search with the third edge. */
1176 vec
<jump_thread_edge
*> *path
= rd
->path
;
1177 for (unsigned int i
= 2; i
< path
->length (); i
++)
1179 if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1180 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1182 create_block_for_threading ((*path
)[i
]->e
->src
, rd
, 1,
1183 &local_info
->duplicate_blocks
);
1188 /* Create a template block if we have not done so already. Otherwise
1189 use the template to create a new block. */
1190 if (local_info
->template_block
== NULL
)
1192 create_block_for_threading ((*path
)[1]->e
->src
, rd
, 0,
1193 &local_info
->duplicate_blocks
);
1194 local_info
->template_block
= rd
->dup_blocks
[0];
1195 local_info
->template_last_to_copy
1196 = gsi_last_bb (local_info
->template_block
);
1198 /* We do not create any outgoing edges for the template. We will
1199 take care of that in a later traversal. That way we do not
1200 create edges that are going to just be deleted. */
1204 gimple_seq seq
= NULL
;
1205 if (gsi_stmt (local_info
->template_last_to_copy
)
1206 != gsi_stmt (gsi_last_bb (local_info
->template_block
)))
1208 if (gsi_end_p (local_info
->template_last_to_copy
))
1210 seq
= bb_seq (local_info
->template_block
);
1211 set_bb_seq (local_info
->template_block
, NULL
);
1214 seq
= gsi_split_seq_after (local_info
->template_last_to_copy
);
1216 create_block_for_threading (local_info
->template_block
, rd
, 0,
1217 &local_info
->duplicate_blocks
);
1220 if (gsi_end_p (local_info
->template_last_to_copy
))
1221 set_bb_seq (local_info
->template_block
, seq
);
1223 gsi_insert_seq_after (&local_info
->template_last_to_copy
,
1224 seq
, GSI_SAME_STMT
);
1227 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1229 ssa_fix_duplicate_block_edges (rd
, local_info
);
1232 if (MAY_HAVE_DEBUG_STMTS
)
1234 /* Copy debug stmts from each NO_COPY src block to the block
1235 that would have been its predecessor, if we can append to it
1236 (we can't add stmts after a block-ending stmt), or prepending
1237 to the duplicate of the successor, if there is one. If
1238 there's no duplicate successor, we'll mostly drop the blocks
1239 on the floor; propagate_threaded_block_debug_into, called
1240 elsewhere, will consolidate and preserve the effects of the
1241 binds, but none of the markers. */
1242 gimple_stmt_iterator copy_to
= gsi_last_bb (rd
->dup_blocks
[0]);
1243 if (!gsi_end_p (copy_to
))
1245 if (stmt_ends_bb_p (gsi_stmt (copy_to
)))
1247 if (rd
->dup_blocks
[1])
1248 copy_to
= gsi_after_labels (rd
->dup_blocks
[1]);
1250 copy_to
= gsi_none ();
1253 gsi_next (©_to
);
1255 for (unsigned int i
= 2, j
= 0; i
< path
->length (); i
++)
1256 if ((*path
)[i
]->type
== EDGE_NO_COPY_SRC_BLOCK
1257 && gsi_bb (copy_to
))
1259 for (gimple_stmt_iterator gsi
= gsi_start_bb ((*path
)[i
]->e
->src
);
1260 !gsi_end_p (gsi
); gsi_next (&gsi
))
1262 if (!is_gimple_debug (gsi_stmt (gsi
)))
1264 gimple
*stmt
= gsi_stmt (gsi
);
1265 gimple
*copy
= gimple_copy (stmt
);
1266 gsi_insert_before (©_to
, copy
, GSI_SAME_STMT
);
1269 else if ((*path
)[i
]->type
== EDGE_COPY_SRC_BLOCK
1270 || (*path
)[i
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1274 copy_to
= gsi_last_bb (rd
->dup_blocks
[j
]);
1275 if (!gsi_end_p (copy_to
))
1277 if (stmt_ends_bb_p (gsi_stmt (copy_to
)))
1278 copy_to
= gsi_none ();
1280 gsi_next (©_to
);
1285 /* Keep walking the hash table. */
1289 /* We did not create any outgoing edges for the template block during
1290 block creation. This hash table traversal callback creates the
1291 outgoing edge for the template block. */
1294 ssa_fixup_template_block (struct redirection_data
**slot
,
1295 ssa_local_info_t
*local_info
)
1297 struct redirection_data
*rd
= *slot
;
1299 /* If this is the template block halt the traversal after updating
1302 If we were threading through an joiner block, then we want
1303 to keep its control statement and redirect an outgoing edge.
1304 Else we want to remove the control statement & edges, then create
1305 a new outgoing edge. In both cases we may need to update PHIs. */
1306 if (rd
->dup_blocks
[0] && rd
->dup_blocks
[0] == local_info
->template_block
)
1308 ssa_fix_duplicate_block_edges (rd
, local_info
);
1315 /* Hash table traversal callback to redirect each incoming edge
1316 associated with this hash table element to its new destination. */
1319 ssa_redirect_edges (struct redirection_data
**slot
,
1320 ssa_local_info_t
*local_info
)
1322 struct redirection_data
*rd
= *slot
;
1323 struct el
*next
, *el
;
1325 /* Walk over all the incoming edges associated with this hash table
1327 for (el
= rd
->incoming_edges
; el
; el
= next
)
1330 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1332 /* Go ahead and free this element from the list. Doing this now
1333 avoids the need for another list walk when we destroy the hash
1338 local_info
->num_threaded_edges
++;
1340 if (rd
->dup_blocks
[0])
1344 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1345 fprintf (dump_file
, " Threaded jump %d --> %d to %d\n",
1346 e
->src
->index
, e
->dest
->index
, rd
->dup_blocks
[0]->index
);
1348 /* Redirect the incoming edge (possibly to the joiner block) to the
1349 appropriate duplicate block. */
1350 e2
= redirect_edge_and_branch (e
, rd
->dup_blocks
[0]);
1351 gcc_assert (e
== e2
);
1352 flush_pending_stmts (e2
);
1355 /* Go ahead and clear E->aux. It's not needed anymore and failure
1356 to clear it will cause all kinds of unpleasant problems later. */
1362 /* Indicate that we actually threaded one or more jumps. */
1363 if (rd
->incoming_edges
)
1364 local_info
->jumps_threaded
= true;
1369 /* Return true if this block has no executable statements other than
1370 a simple ctrl flow instruction. When the number of outgoing edges
1371 is one, this is equivalent to a "forwarder" block. */
1374 redirection_block_p (basic_block bb
)
1376 gimple_stmt_iterator gsi
;
1378 /* Advance to the first executable statement. */
1379 gsi
= gsi_start_bb (bb
);
1380 while (!gsi_end_p (gsi
)
1381 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_LABEL
1382 || is_gimple_debug (gsi_stmt (gsi
))
1383 || gimple_nop_p (gsi_stmt (gsi
))
1384 || gimple_clobber_p (gsi_stmt (gsi
))))
1387 /* Check if this is an empty block. */
1388 if (gsi_end_p (gsi
))
1391 /* Test that we've reached the terminating control statement. */
1392 return gsi_stmt (gsi
)
1393 && (gimple_code (gsi_stmt (gsi
)) == GIMPLE_COND
1394 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_GOTO
1395 || gimple_code (gsi_stmt (gsi
)) == GIMPLE_SWITCH
);
1398 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1399 is reached via one or more specific incoming edges, we know which
1400 outgoing edge from BB will be traversed.
1402 We want to redirect those incoming edges to the target of the
1403 appropriate outgoing edge. Doing so avoids a conditional branch
1404 and may expose new optimization opportunities. Note that we have
1405 to update dominator tree and SSA graph after such changes.
1407 The key to keeping the SSA graph update manageable is to duplicate
1408 the side effects occurring in BB so that those side effects still
1409 occur on the paths which bypass BB after redirecting edges.
1411 We accomplish this by creating duplicates of BB and arranging for
1412 the duplicates to unconditionally pass control to one specific
1413 successor of BB. We then revector the incoming edges into BB to
1414 the appropriate duplicate of BB.
1416 If NOLOOP_ONLY is true, we only perform the threading as long as it
1417 does not affect the structure of the loops in a nontrivial way.
1419 If JOINERS is true, then thread through joiner blocks as well. */
1422 fwd_jt_path_registry::thread_block_1 (basic_block bb
,
1426 /* E is an incoming edge into BB that we may or may not want to
1427 redirect to a duplicate of BB. */
1430 ssa_local_info_t local_info
;
1432 local_info
.duplicate_blocks
= BITMAP_ALLOC (NULL
);
1433 local_info
.need_profile_correction
= false;
1434 local_info
.num_threaded_edges
= 0;
1436 /* To avoid scanning a linear array for the element we need we instead
1437 use a hash table. For normal code there should be no noticeable
1438 difference. However, if we have a block with a large number of
1439 incoming and outgoing edges such linear searches can get expensive. */
1441 = new hash_table
<struct redirection_data
> (EDGE_COUNT (bb
->succs
));
1443 /* Record each unique threaded destination into a hash table for
1444 efficient lookups. */
1446 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1451 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1453 if (((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& !joiners
)
1454 || ((*path
)[1]->type
== EDGE_COPY_SRC_BLOCK
&& joiners
))
1457 /* When a NO_COPY_SRC block became non-empty cancel the path. */
1458 if (path
->last ()->type
== EDGE_NO_COPY_SRC_BLOCK
)
1460 auto gsi
= gsi_start_nondebug_bb (path
->last ()->e
->src
);
1461 if (!gsi_end_p (gsi
)
1462 && !is_ctrl_stmt (gsi_stmt (gsi
)))
1464 cancel_thread (path
, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
1470 e2
= path
->last ()->e
;
1471 if (!e2
|| noloop_only
)
1473 /* If NOLOOP_ONLY is true, we only allow threading through the
1474 header of a loop to exit edges. */
1476 /* One case occurs when there was loop header buried in a jump
1477 threading path that crosses loop boundaries. We do not try
1478 and thread this elsewhere, so just cancel the jump threading
1479 request by clearing the AUX field now. */
1480 if (bb
->loop_father
!= e2
->src
->loop_father
1481 && (!loop_exit_edge_p (e2
->src
->loop_father
, e2
)
1482 || flow_loop_nested_p (bb
->loop_father
,
1483 e2
->dest
->loop_father
)))
1485 /* Since this case is not handled by our special code
1486 to thread through a loop header, we must explicitly
1487 cancel the threading request here. */
1488 cancel_thread (path
, "Threading through unhandled loop header");
1493 /* Another case occurs when trying to thread through our
1494 own loop header, possibly from inside the loop. We will
1495 thread these later. */
1497 for (i
= 1; i
< path
->length (); i
++)
1499 if ((*path
)[i
]->e
->src
== bb
->loop_father
->header
1500 && (!loop_exit_edge_p (bb
->loop_father
, e2
)
1501 || (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
))
1505 if (i
!= path
->length ())
1508 /* Loop parallelization can be confused by the result of
1509 threading through the loop exit test back into the loop.
1510 However, theading those jumps seems to help other codes.
1512 I have been unable to find anything related to the shape of
1513 the CFG, the contents of the affected blocks, etc which would
1514 allow a more sensible test than what we're using below which
1515 merely avoids the optimization when parallelizing loops. */
1516 if (flag_tree_parallelize_loops
> 1)
1518 for (i
= 1; i
< path
->length (); i
++)
1519 if (bb
->loop_father
== e2
->src
->loop_father
1520 && loop_exits_from_bb_p (bb
->loop_father
,
1522 && !loop_exit_edge_p (bb
->loop_father
, e2
))
1525 if (i
!= path
->length ())
1527 cancel_thread (path
, "Threading through loop exit");
1534 /* Insert the outgoing edge into the hash table if it is not
1535 already in the hash table. */
1536 lookup_redirection_data (e
, INSERT
);
1538 /* When we have thread paths through a common joiner with different
1539 final destinations, then we may need corrections to deal with
1540 profile insanities. See the big comment before compute_path_counts. */
1541 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1545 else if (e2
!= last
)
1546 local_info
.need_profile_correction
= true;
1550 /* We do not update dominance info. */
1551 free_dominance_info (CDI_DOMINATORS
);
1553 /* We know we only thread through the loop header to loop exits.
1554 Let the basic block duplication hook know we are not creating
1555 a multiple entry loop. */
1557 && bb
== bb
->loop_father
->header
)
1558 set_loop_copy (bb
->loop_father
, loop_outer (bb
->loop_father
));
1560 /* Now create duplicates of BB.
1562 Note that for a block with a high outgoing degree we can waste
1563 a lot of time and memory creating and destroying useless edges.
1565 So we first duplicate BB and remove the control structure at the
1566 tail of the duplicate as well as all outgoing edges from the
1567 duplicate. We then use that duplicate block as a template for
1568 the rest of the duplicates. */
1569 local_info
.template_block
= NULL
;
1571 local_info
.jumps_threaded
= false;
1572 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_create_duplicates
>
1575 /* The template does not have an outgoing edge. Create that outgoing
1576 edge and update PHI nodes as the edge's target as necessary.
1578 We do this after creating all the duplicates to avoid creating
1579 unnecessary edges. */
1580 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_fixup_template_block
>
1583 /* The hash table traversals above created the duplicate blocks (and the
1584 statements within the duplicate blocks). This loop creates PHI nodes for
1585 the duplicated blocks and redirects the incoming edges into BB to reach
1586 the duplicates of BB. */
1587 m_redirection_data
->traverse
<ssa_local_info_t
*, ssa_redirect_edges
>
1590 /* Done with this block. Clear REDIRECTION_DATA. */
1591 delete m_redirection_data
;
1592 m_redirection_data
= NULL
;
1595 && bb
== bb
->loop_father
->header
)
1596 set_loop_copy (bb
->loop_father
, NULL
);
1598 BITMAP_FREE (local_info
.duplicate_blocks
);
1599 local_info
.duplicate_blocks
= NULL
;
1601 m_num_threaded_edges
+= local_info
.num_threaded_edges
;
1603 /* Indicate to our caller whether or not any jumps were threaded. */
1604 return local_info
.jumps_threaded
;
1607 /* Wrapper for thread_block_1 so that we can first handle jump
1608 thread paths which do not involve copying joiner blocks, then
1609 handle jump thread paths which have joiner blocks.
1611 By doing things this way we can be as aggressive as possible and
1612 not worry that copying a joiner block will create a jump threading
1616 fwd_jt_path_registry::thread_block (basic_block bb
, bool noloop_only
)
1619 retval
= thread_block_1 (bb
, noloop_only
, false);
1620 retval
|= thread_block_1 (bb
, noloop_only
, true);
1624 /* Callback for dfs_enumerate_from. Returns true if BB is different
1625 from STOP and DBDS_CE_STOP. */
1627 static basic_block dbds_ce_stop
;
1629 dbds_continue_enumeration_p (const_basic_block bb
, const void *stop
)
1631 return (bb
!= (const_basic_block
) stop
1632 && bb
!= dbds_ce_stop
);
1635 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1636 returns the state. */
1639 determine_bb_domination_status (class loop
*loop
, basic_block bb
)
1641 basic_block
*bblocks
;
1642 unsigned nblocks
, i
;
1643 bool bb_reachable
= false;
1647 /* This function assumes BB is a successor of LOOP->header.
1648 If that is not the case return DOMST_NONDOMINATING which
1653 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
1655 if (e
->src
== loop
->header
)
1663 return DOMST_NONDOMINATING
;
1666 if (bb
== loop
->latch
)
1667 return DOMST_DOMINATING
;
1669 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1672 bblocks
= XCNEWVEC (basic_block
, loop
->num_nodes
);
1673 dbds_ce_stop
= loop
->header
;
1674 nblocks
= dfs_enumerate_from (loop
->latch
, 1, dbds_continue_enumeration_p
,
1675 bblocks
, loop
->num_nodes
, bb
);
1676 for (i
= 0; i
< nblocks
; i
++)
1677 FOR_EACH_EDGE (e
, ei
, bblocks
[i
]->preds
)
1679 if (e
->src
== loop
->header
)
1682 return DOMST_NONDOMINATING
;
1685 bb_reachable
= true;
1689 return (bb_reachable
? DOMST_DOMINATING
: DOMST_LOOP_BROKEN
);
1692 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1693 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1694 to the inside of the loop. */
1697 fwd_jt_path_registry::thread_through_loop_header (class loop
*loop
,
1698 bool may_peel_loop_headers
)
1700 basic_block header
= loop
->header
;
1701 edge e
, tgt_edge
, latch
= loop_latch_edge (loop
);
1703 basic_block tgt_bb
, atgt_bb
;
1704 enum bb_dom_status domst
;
1706 /* We have already threaded through headers to exits, so all the threading
1707 requests now are to the inside of the loop. We need to avoid creating
1708 irreducible regions (i.e., loops with more than one entry block), and
1709 also loop with several latch edges, or new subloops of the loop (although
1710 there are cases where it might be appropriate, it is difficult to decide,
1711 and doing it wrongly may confuse other optimizers).
1713 We could handle more general cases here. However, the intention is to
1714 preserve some information about the loop, which is impossible if its
1715 structure changes significantly, in a way that is not well understood.
1716 Thus we only handle few important special cases, in which also updating
1717 of the loop-carried information should be feasible:
1719 1) Propagation of latch edge to a block that dominates the latch block
1720 of a loop. This aims to handle the following idiom:
1731 After threading the latch edge, this becomes
1742 The original header of the loop is moved out of it, and we may thread
1743 the remaining edges through it without further constraints.
1745 2) All entry edges are propagated to a single basic block that dominates
1746 the latch block of the loop. This aims to handle the following idiom
1747 (normally created for "for" loops):
1770 /* Threading through the header won't improve the code if the header has just
1772 if (single_succ_p (header
))
1775 if (!may_peel_loop_headers
&& !redirection_block_p (loop
->header
))
1781 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1788 /* If latch is not threaded, and there is a header
1789 edge that is not threaded, we would create loop
1790 with multiple entries. */
1794 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1796 if ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
1798 tgt_edge
= (*path
)[1]->e
;
1799 atgt_bb
= tgt_edge
->dest
;
1802 /* Two targets of threading would make us create loop
1803 with multiple entries. */
1804 else if (tgt_bb
!= atgt_bb
)
1810 /* There are no threading requests. */
1814 /* Redirecting to empty loop latch is useless. */
1815 if (tgt_bb
== loop
->latch
1816 && empty_block_p (loop
->latch
))
1820 /* The target block must dominate the loop latch, otherwise we would be
1821 creating a subloop. */
1822 domst
= determine_bb_domination_status (loop
, tgt_bb
);
1823 if (domst
== DOMST_NONDOMINATING
)
1825 if (domst
== DOMST_LOOP_BROKEN
)
1827 /* If the loop ceased to exist, mark it as such, and thread through its
1829 mark_loop_for_removal (loop
);
1830 return thread_block (header
, false);
1833 if (tgt_bb
->loop_father
->header
== tgt_bb
)
1835 /* If the target of the threading is a header of a subloop, we need
1836 to create a preheader for it, so that the headers of the two loops
1838 if (EDGE_COUNT (tgt_bb
->preds
) > 2)
1840 tgt_bb
= create_preheader (tgt_bb
->loop_father
, 0);
1841 gcc_assert (tgt_bb
!= NULL
);
1844 tgt_bb
= split_edge (tgt_edge
);
1847 basic_block new_preheader
;
1849 /* Now consider the case entry edges are redirected to the new entry
1850 block. Remember one entry edge, so that we can find the new
1851 preheader (its destination after threading). */
1852 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1858 /* The duplicate of the header is the new preheader of the loop. Ensure
1859 that it is placed correctly in the loop hierarchy. */
1860 set_loop_copy (loop
, loop_outer (loop
));
1862 thread_block (header
, false);
1863 set_loop_copy (loop
, NULL
);
1864 new_preheader
= e
->dest
;
1866 /* Create the new latch block. This is always necessary, as the latch
1867 must have only a single successor, but the original header had at
1868 least two successors. */
1870 mfb_kj_edge
= single_succ_edge (new_preheader
);
1871 loop
->header
= mfb_kj_edge
->dest
;
1872 latch
= make_forwarder_block (tgt_bb
, mfb_keep_just
, NULL
);
1873 loop
->header
= latch
->dest
;
1874 loop
->latch
= latch
->src
;
1878 /* We failed to thread anything. Cancel the requests. */
1879 FOR_EACH_EDGE (e
, ei
, header
->preds
)
1881 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
1885 cancel_thread (path
, "Failure in thread_through_loop_header");
1892 /* E1 and E2 are edges into the same basic block. Return TRUE if the
1893 PHI arguments associated with those edges are equal or there are no
1894 PHI arguments, otherwise return FALSE. */
1897 phi_args_equal_on_edges (edge e1
, edge e2
)
1900 int indx1
= e1
->dest_idx
;
1901 int indx2
= e2
->dest_idx
;
1903 for (gsi
= gsi_start_phis (e1
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1905 gphi
*phi
= gsi
.phi ();
1907 if (!operand_equal_p (gimple_phi_arg_def (phi
, indx1
),
1908 gimple_phi_arg_def (phi
, indx2
), 0))
1914 /* Return the number of non-debug statements and non-virtual PHIs in a
1918 count_stmts_and_phis_in_block (basic_block bb
)
1920 unsigned int num_stmts
= 0;
1923 for (gpi
= gsi_start_phis (bb
); !gsi_end_p (gpi
); gsi_next (&gpi
))
1924 if (!virtual_operand_p (PHI_RESULT (gpi
.phi ())))
1927 gimple_stmt_iterator gsi
;
1928 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1930 gimple
*stmt
= gsi_stmt (gsi
);
1931 if (!is_gimple_debug (stmt
))
1939 /* Walk through the registered jump threads and convert them into a
1940 form convenient for this pass.
1942 Any block which has incoming edges threaded to outgoing edges
1943 will have its entry in THREADED_BLOCK set.
1945 Any threaded edge will have its new outgoing edge stored in the
1946 original edge's AUX field.
1948 This form avoids the need to walk all the edges in the CFG to
1949 discover blocks which need processing and avoids unnecessary
1950 hash table lookups to map from threaded edge to new target. */
1953 fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks
)
1962 /* It is possible to have jump threads in which one is a subpath
1963 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1964 block and (B, C), (C, D) where no joiner block exists.
1966 When this occurs ignore the jump thread request with the joiner
1967 block. It's totally subsumed by the simpler jump thread request.
1969 This results in less block copying, simpler CFGs. More importantly,
1970 when we duplicate the joiner block, B, in this case we will create
1971 a new threading opportunity that we wouldn't be able to optimize
1972 until the next jump threading iteration.
1974 So first convert the jump thread requests which do not require a
1976 for (i
= 0; i
< m_paths
.length (); i
++)
1978 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
1980 if (path
->length () > 1
1981 && (*path
)[1]->type
!= EDGE_COPY_SRC_JOINER_BLOCK
)
1983 edge e
= (*path
)[0]->e
;
1984 e
->aux
= (void *)path
;
1985 bitmap_set_bit (tmp
, e
->dest
->index
);
1989 /* Now iterate again, converting cases where we want to thread
1990 through a joiner block, but only if no other edge on the path
1991 already has a jump thread attached to it. We do this in two passes,
1992 to avoid situations where the order in the paths vec can hide overlapping
1993 threads (the path is recorded on the incoming edge, so we would miss
1994 cases where the second path starts at a downstream edge on the same
1995 path). First record all joiner paths, deleting any in the unexpected
1996 case where there is already a path for that incoming edge. */
1997 for (i
= 0; i
< m_paths
.length ();)
1999 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
2001 if (path
->length () > 1
2002 && (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
2004 /* Attach the path to the starting edge if none is yet recorded. */
2005 if ((*path
)[0]->e
->aux
== NULL
)
2007 (*path
)[0]->e
->aux
= path
;
2012 m_paths
.unordered_remove (i
);
2013 cancel_thread (path
);
2022 /* Second, look for paths that have any other jump thread attached to
2023 them, and either finish converting them or cancel them. */
2024 for (i
= 0; i
< m_paths
.length ();)
2026 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
2027 edge e
= (*path
)[0]->e
;
2029 if (path
->length () > 1
2030 && (*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
&& e
->aux
== path
)
2033 for (j
= 1; j
< path
->length (); j
++)
2034 if ((*path
)[j
]->e
->aux
!= NULL
)
2037 /* If we iterated through the entire path without exiting the loop,
2038 then we are good to go, record it. */
2039 if (j
== path
->length ())
2041 bitmap_set_bit (tmp
, e
->dest
->index
);
2047 m_paths
.unordered_remove (i
);
2048 cancel_thread (path
);
2057 /* When optimizing for size, prune all thread paths where statement
2058 duplication is necessary.
2060 We walk the jump thread path looking for copied blocks. There's
2061 two types of copied blocks.
2063 EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2064 cancel the jump threading request when optimizing for size.
2066 EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2067 will be killed by threading. If threading does not kill all of
2068 its statements, then we should cancel the jump threading request
2069 when optimizing for size. */
2070 if (optimize_function_for_size_p (cfun
))
2072 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2074 FOR_EACH_EDGE (e
, ei
, BASIC_BLOCK_FOR_FN (cfun
, i
)->preds
)
2077 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2080 for (j
= 1; j
< path
->length (); j
++)
2082 bb
= (*path
)[j
]->e
->src
;
2083 if (redirection_block_p (bb
))
2085 else if ((*path
)[j
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
2086 || ((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
2087 && (count_stmts_and_phis_in_block (bb
)
2088 != estimate_threading_killed_stmts (bb
))))
2092 if (j
!= path
->length ())
2094 cancel_thread (path
);
2098 bitmap_set_bit (threaded_blocks
, i
);
2103 bitmap_copy (threaded_blocks
, tmp
);
2105 /* If we have a joiner block (J) which has two successors S1 and S2 and
2106 we are threading though S1 and the final destination of the thread
2107 is S2, then we must verify that any PHI nodes in S2 have the same
2108 PHI arguments for the edge J->S2 and J->S1->...->S2.
2110 We used to detect this prior to registering the jump thread, but
2111 that prohibits propagation of edge equivalences into non-dominated
2112 PHI nodes as the equivalency test might occur before propagation.
2114 This must also occur after we truncate any jump threading paths
2115 as this scenario may only show up after truncation.
2117 This works for now, but will need improvement as part of the FSA
2120 Note since we've moved the thread request data to the edges,
2121 we have to iterate on those rather than the threaded_edges vector. */
2122 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2124 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2125 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2129 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2130 bool have_joiner
= ((*path
)[1]->type
== EDGE_COPY_SRC_JOINER_BLOCK
);
2134 basic_block joiner
= e
->dest
;
2135 edge final_edge
= path
->last ()->e
;
2136 basic_block final_dest
= final_edge
->dest
;
2137 edge e2
= find_edge (joiner
, final_dest
);
2139 if (e2
&& !phi_args_equal_on_edges (e2
, final_edge
))
2141 cancel_thread (path
);
2149 /* Look for jump threading paths which cross multiple loop headers.
2151 The code to thread through loop headers will change the CFG in ways
2152 that invalidate the cached loop iteration information. So we must
2153 detect that case and wipe the cached information. */
2154 EXECUTE_IF_SET_IN_BITMAP (tmp
, 0, i
, bi
)
2156 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2157 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2161 gcc_assert (loops_state_satisfies_p
2162 (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
));
2163 vec
<jump_thread_edge
*> *path
= THREAD_PATH (e
);
2165 for (unsigned int i
= 0, crossed_headers
= 0;
2166 i
< path
->length ();
2169 basic_block dest
= (*path
)[i
]->e
->dest
;
2170 basic_block src
= (*path
)[i
]->e
->src
;
2171 /* If we enter a loop. */
2172 if (flow_loop_nested_p (src
->loop_father
, dest
->loop_father
))
2174 /* If we step from a block outside an irreducible region
2175 to a block inside an irreducible region, then we have
2176 crossed into a loop. */
2177 else if (! (src
->flags
& BB_IRREDUCIBLE_LOOP
)
2178 && (dest
->flags
& BB_IRREDUCIBLE_LOOP
))
2180 if (crossed_headers
> 1)
2182 vect_free_loop_info_assumptions
2183 ((*path
)[path
->length () - 1]->e
->dest
->loop_father
);
2193 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2194 case of SEME Single Entry Multiple Exits region in which all nodes in the
2195 REGION have exactly one incoming edge. The only exception is the first block
2196 that may not have been connected to the rest of the cfg yet. */
2199 verify_jump_thread (basic_block
*region
, unsigned n_region
)
2201 for (unsigned i
= 0; i
< n_region
; i
++)
2202 gcc_assert (EDGE_COUNT (region
[i
]->preds
) <= 1);
2205 /* Return true when BB is one of the first N items in BBS. */
2208 bb_in_bbs (basic_block bb
, basic_block
*bbs
, int n
)
2210 for (int i
= 0; i
< n
; i
++)
2218 jt_path_registry::debug_path (FILE *dump_file
, int pathno
)
2220 vec
<jump_thread_edge
*> *p
= m_paths
[pathno
];
2221 fprintf (dump_file
, "path: ");
2222 for (unsigned i
= 0; i
< p
->length (); ++i
)
2223 fprintf (dump_file
, "%d -> %d, ",
2224 (*p
)[i
]->e
->src
->index
, (*p
)[i
]->e
->dest
->index
);
2225 fprintf (dump_file
, "\n");
2229 jt_path_registry::debug ()
2231 for (unsigned i
= 0; i
< m_paths
.length (); ++i
)
2232 debug_path (stderr
, i
);
2235 /* Rewire a jump_thread_edge so that the source block is now a
2236 threaded source block.
2238 PATH_NUM is an index into the global path table PATHS.
2239 EDGE_NUM is the jump thread edge number into said path.
2241 Returns TRUE if we were able to successfully rewire the edge. */
2244 back_jt_path_registry::rewire_first_differing_edge (unsigned path_num
,
2247 vec
<jump_thread_edge
*> *path
= m_paths
[path_num
];
2248 edge
&e
= (*path
)[edge_num
]->e
;
2249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2250 fprintf (dump_file
, "rewiring edge candidate: %d -> %d\n",
2251 e
->src
->index
, e
->dest
->index
);
2252 basic_block src_copy
= get_bb_copy (e
->src
);
2253 if (src_copy
== NULL
)
2255 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2256 fprintf (dump_file
, "ignoring candidate: there is no src COPY\n");
2259 edge new_edge
= find_edge (src_copy
, e
->dest
);
2260 /* If the previously threaded paths created a flow graph where we
2261 can no longer figure out where to go, give up. */
2262 if (new_edge
== NULL
)
2264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2265 fprintf (dump_file
, "ignoring candidate: we lost our way\n");
2272 /* After a path has been jump threaded, adjust the remaining paths
2273 that are subsets of this path, so these paths can be safely
2274 threaded within the context of the new threaded path.
2276 For example, suppose we have just threaded:
2278 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2280 And we have an upcoming threading candidate:
2281 5 -> 6 -> 7 -> 8 -> 15 -> 20
2283 This function adjusts the upcoming path into:
2286 CURR_PATH_NUM is an index into the global paths table. It
2287 specifies the path that was just threaded. */
2290 back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num
)
2292 vec
<jump_thread_edge
*> *curr_path
= m_paths
[curr_path_num
];
2294 /* Iterate through all the other paths and adjust them. */
2295 for (unsigned cand_path_num
= 0; cand_path_num
< m_paths
.length (); )
2297 if (cand_path_num
== curr_path_num
)
2302 /* Make sure the candidate to adjust starts with the same path
2303 as the recently threaded path. */
2304 vec
<jump_thread_edge
*> *cand_path
= m_paths
[cand_path_num
];
2305 if ((*cand_path
)[0]->e
!= (*curr_path
)[0]->e
)
2310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2312 fprintf (dump_file
, "adjusting candidate: ");
2313 debug_path (dump_file
, cand_path_num
);
2316 /* Chop off from the candidate path any prefix it shares with
2317 the recently threaded path. */
2318 unsigned minlength
= MIN (curr_path
->length (), cand_path
->length ());
2320 for (j
= 0; j
< minlength
; ++j
)
2322 edge cand_edge
= (*cand_path
)[j
]->e
;
2323 edge curr_edge
= (*curr_path
)[j
]->e
;
2325 /* Once the prefix no longer matches, adjust the first
2326 non-matching edge to point from an adjusted edge to
2327 wherever it was going. */
2328 if (cand_edge
!= curr_edge
)
2330 gcc_assert (cand_edge
->src
== curr_edge
->src
);
2331 if (!rewire_first_differing_edge (cand_path_num
, j
))
2332 goto remove_candidate_from_list
;
2338 /* If we consumed the max subgraph we could look at, and
2339 still didn't find any different edges, it's the
2340 last edge after MINLENGTH. */
2341 if (cand_path
->length () > minlength
)
2343 if (!rewire_first_differing_edge (cand_path_num
, j
))
2344 goto remove_candidate_from_list
;
2346 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2347 fprintf (dump_file
, "adjusting first edge after MINLENGTH.\n");
2351 /* If we are removing everything, delete the entire candidate. */
2352 if (j
== cand_path
->length ())
2354 remove_candidate_from_list
:
2355 cancel_thread (cand_path
, "Adjusted candidate is EMPTY");
2356 m_paths
.unordered_remove (cand_path_num
);
2359 /* Otherwise, just remove the redundant sub-path. */
2360 if (cand_path
->length () - j
> 1)
2361 cand_path
->block_remove (0, j
);
2362 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2363 fprintf (dump_file
, "Dropping illformed candidate.\n");
2365 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2367 fprintf (dump_file
, "adjusted candidate: ");
2368 debug_path (dump_file
, cand_path_num
);
2374 /* Duplicates a jump-thread path of N_REGION basic blocks.
2375 The ENTRY edge is redirected to the duplicate of the region.
2377 Remove the last conditional statement in the last basic block in the REGION,
2378 and create a single fallthru edge pointing to the same destination as the
2381 CURRENT_PATH_NO is an index into the global paths[] table
2382 specifying the jump-thread path.
2384 Returns false if it is unable to copy the region, true otherwise. */
2387 back_jt_path_registry::duplicate_thread_path (edge entry
,
2389 basic_block
*region
,
2391 unsigned current_path_no
)
2394 class loop
*loop
= entry
->dest
->loop_father
;
2397 profile_count curr_count
;
2399 if (!can_copy_bbs_p (region
, n_region
))
2402 /* Some sanity checking. Note that we do not check for all possible
2403 missuses of the functions. I.e. if you ask to copy something weird,
2404 it will work, but the state of structures probably will not be
2406 for (i
= 0; i
< n_region
; i
++)
2408 /* We do not handle subloops, i.e. all the blocks must belong to the
2410 if (region
[i
]->loop_father
!= loop
)
2414 initialize_original_copy_tables ();
2416 set_loop_copy (loop
, loop
);
2418 basic_block
*region_copy
= XNEWVEC (basic_block
, n_region
);
2419 copy_bbs (region
, n_region
, region_copy
, &exit
, 1, &exit_copy
, loop
,
2420 split_edge_bb_loc (entry
), false);
2422 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2423 following code ensures that all the edges exiting the jump-thread path are
2424 redirected back to the original code: these edges are exceptions
2425 invalidating the property that is propagated by executing all the blocks of
2426 the jump-thread path in order. */
2428 curr_count
= entry
->count ();
2430 for (i
= 0; i
< n_region
; i
++)
2434 basic_block bb
= region_copy
[i
];
2436 /* Watch inconsistent profile. */
2437 if (curr_count
> region
[i
]->count
)
2438 curr_count
= region
[i
]->count
;
2439 /* Scale current BB. */
2440 if (region
[i
]->count
.nonzero_p () && curr_count
.initialized_p ())
2442 /* In the middle of the path we only scale the frequencies.
2443 In last BB we need to update probabilities of outgoing edges
2444 because we know which one is taken at the threaded path. */
2445 if (i
+ 1 != n_region
)
2446 scale_bbs_frequencies_profile_count (region
+ i
, 1,
2447 region
[i
]->count
- curr_count
,
2450 update_bb_profile_for_threading (region
[i
],
2453 scale_bbs_frequencies_profile_count (region_copy
+ i
, 1, curr_count
,
2454 region_copy
[i
]->count
);
2457 if (single_succ_p (bb
))
2459 /* Make sure the successor is the next node in the path. */
2460 gcc_assert (i
+ 1 == n_region
2461 || region_copy
[i
+ 1] == single_succ_edge (bb
)->dest
);
2462 if (i
+ 1 != n_region
)
2464 curr_count
= single_succ_edge (bb
)->count ();
2469 /* Special case the last block on the path: make sure that it does not
2470 jump back on the copied path, including back to itself. */
2471 if (i
+ 1 == n_region
)
2473 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2474 if (bb_in_bbs (e
->dest
, region_copy
, n_region
))
2476 basic_block orig
= get_bb_original (e
->dest
);
2478 redirect_edge_and_branch_force (e
, orig
);
2483 /* Redirect all other edges jumping to non-adjacent blocks back to the
2485 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2486 if (region_copy
[i
+ 1] != e
->dest
)
2488 basic_block orig
= get_bb_original (e
->dest
);
2490 redirect_edge_and_branch_force (e
, orig
);
2494 curr_count
= e
->count ();
2500 verify_jump_thread (region_copy
, n_region
);
2502 /* Remove the last branch in the jump thread path. */
2503 remove_ctrl_stmt_and_useless_edges (region_copy
[n_region
- 1], exit
->dest
);
2505 /* And fixup the flags on the single remaining edge. */
2506 edge fix_e
= find_edge (region_copy
[n_region
- 1], exit
->dest
);
2507 fix_e
->flags
&= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
| EDGE_ABNORMAL
);
2508 fix_e
->flags
|= EDGE_FALLTHRU
;
2510 edge e
= make_edge (region_copy
[n_region
- 1], exit
->dest
, EDGE_FALLTHRU
);
2514 rescan_loop_exit (e
, true, false);
2515 e
->probability
= profile_probability::always ();
2518 /* Redirect the entry and add the phi node arguments. */
2519 if (entry
->dest
== loop
->header
)
2520 mark_loop_for_removal (loop
);
2521 redirected
= redirect_edge_and_branch (entry
, get_bb_copy (entry
->dest
));
2522 gcc_assert (redirected
!= NULL
);
2523 flush_pending_stmts (entry
);
2525 /* Add the other PHI node arguments. */
2526 add_phi_args_after_copy (region_copy
, n_region
, NULL
);
2530 adjust_paths_after_duplication (current_path_no
);
2532 free_original_copy_tables ();
2536 /* Return true when PATH is a valid jump-thread path. */
2539 valid_jump_thread_path (vec
<jump_thread_edge
*> *path
)
2541 unsigned len
= path
->length ();
2543 /* Check that the path is connected. */
2544 for (unsigned int j
= 0; j
< len
- 1; j
++)
2546 edge e
= (*path
)[j
]->e
;
2547 if (e
->dest
!= (*path
)[j
+1]->e
->src
)
2553 /* Remove any queued jump threads that include edge E.
2555 We don't actually remove them here, just record the edges into ax
2556 hash table. That way we can do the search once per iteration of
2557 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2560 fwd_jt_path_registry::remove_jump_threads_including (edge_def
*e
)
2562 if (!m_paths
.exists () || !flag_thread_jumps
)
2565 edge
*slot
= m_removed_edges
->find_slot (e
, INSERT
);
2569 /* Thread all paths that have been queued for jump threading, and
2570 update the CFG accordingly.
2572 It is the caller's responsibility to fix the dominance information
2573 and rewrite duplicated SSA_NAMEs back into SSA form.
2575 If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2576 headers if it does not simplify the loop.
2578 Returns true if one or more edges were threaded. */
2581 jt_path_registry::thread_through_all_blocks (bool peel_loop_headers
)
2583 if (m_paths
.length () == 0)
2586 m_num_threaded_edges
= 0;
2588 bool retval
= update_cfg (peel_loop_headers
);
2590 statistics_counter_event (cfun
, "Jumps threaded", m_num_threaded_edges
);
2594 loops_state_set (LOOPS_NEED_FIXUP
);
2600 /* This is the backward threader version of thread_through_all_blocks
2601 using a generic BB copier. */
2604 back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2606 bool retval
= false;
2607 hash_set
<edge
> visited_starting_edges
;
2609 while (m_paths
.length ())
2611 vec
<jump_thread_edge
*> *path
= m_paths
[0];
2612 edge entry
= (*path
)[0]->e
;
2614 /* Do not jump-thread twice from the same starting edge.
2616 Previously we only checked that we weren't threading twice
2617 from the same BB, but that was too restrictive. Imagine a
2618 path that starts from GIMPLE_COND(x_123 == 0,...), where both
2619 edges out of this conditional yield paths that can be
2620 threaded (for example, both lead to an x_123==0 or x_123!=0
2621 conditional further down the line. */
2622 if (visited_starting_edges
.contains (entry
)
2623 /* We may not want to realize this jump thread path for
2624 various reasons. So check it first. */
2625 || !valid_jump_thread_path (path
))
2627 /* Remove invalid jump-thread paths. */
2628 cancel_thread (path
, "Avoiding threading twice from same edge");
2629 m_paths
.unordered_remove (0);
2633 unsigned len
= path
->length ();
2634 edge exit
= (*path
)[len
- 1]->e
;
2635 basic_block
*region
= XNEWVEC (basic_block
, len
- 1);
2637 for (unsigned int j
= 0; j
< len
- 1; j
++)
2638 region
[j
] = (*path
)[j
]->e
->dest
;
2640 if (duplicate_thread_path (entry
, exit
, region
, len
- 1, 0))
2642 /* We do not update dominance info. */
2643 free_dominance_info (CDI_DOMINATORS
);
2644 visited_starting_edges
.add (entry
);
2646 m_num_threaded_edges
++;
2650 m_paths
.unordered_remove (0);
2656 /* This is the forward threader version of thread_through_all_blocks,
2657 using a custom BB copier. */
2660 fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers
)
2662 bool retval
= false;
2664 /* Remove any paths that referenced removed edges. */
2665 if (m_removed_edges
)
2666 for (unsigned i
= 0; i
< m_paths
.length (); )
2669 vec
<jump_thread_edge
*> *path
= m_paths
[i
];
2671 for (j
= 0; j
< path
->length (); j
++)
2673 edge e
= (*path
)[j
]->e
;
2674 if (m_removed_edges
->find_slot (e
, NO_INSERT
)
2675 || (((*path
)[j
]->type
== EDGE_COPY_SRC_BLOCK
2676 || (*path
)[j
]->type
== EDGE_COPY_SRC_JOINER_BLOCK
)
2677 && !can_duplicate_block_p (e
->src
)))
2681 if (j
!= path
->length ())
2683 cancel_thread (path
, "Thread references removed edge");
2684 m_paths
.unordered_remove (i
);
2690 auto_bitmap threaded_blocks
;
2691 mark_threaded_blocks (threaded_blocks
);
2693 initialize_original_copy_tables ();
2695 /* The order in which we process jump threads can be important.
2697 Consider if we have two jump threading paths A and B. If the
2698 target edge of A is the starting edge of B and we thread path A
2699 first, then we create an additional incoming edge into B->dest that
2700 we cannot discover as a jump threading path on this iteration.
2702 If we instead thread B first, then the edge into B->dest will have
2703 already been redirected before we process path A and path A will
2704 natually, with no further work, target the redirected path for B.
2706 An post-order is sufficient here. Compute the ordering first, then
2707 process the blocks. */
2708 if (!bitmap_empty_p (threaded_blocks
))
2710 int *postorder
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
));
2711 unsigned int postorder_num
= post_order_compute (postorder
, false, false);
2712 for (unsigned int i
= 0; i
< postorder_num
; i
++)
2714 unsigned int indx
= postorder
[i
];
2715 if (bitmap_bit_p (threaded_blocks
, indx
))
2717 basic_block bb
= BASIC_BLOCK_FOR_FN (cfun
, indx
);
2718 retval
|= thread_block (bb
, true);
2724 /* Then perform the threading through loop headers. We start with the
2725 innermost loop, so that the changes in cfg we perform won't affect
2726 further threading. */
2727 for (auto loop
: loops_list (cfun
, LI_FROM_INNERMOST
))
2730 || !bitmap_bit_p (threaded_blocks
, loop
->header
->index
))
2733 retval
|= thread_through_loop_header (loop
, may_peel_loop_headers
);
2736 /* All jump threading paths should have been resolved at this
2737 point. Verify that is the case. */
2739 FOR_EACH_BB_FN (bb
, cfun
)
2743 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
2744 gcc_assert (e
->aux
== NULL
);
2747 free_original_copy_tables ();
2753 jt_path_registry::cancel_invalid_paths (vec
<jump_thread_edge
*> &path
)
2755 gcc_checking_assert (!path
.is_empty ());
2756 edge entry
= path
[0]->e
;
2757 edge exit
= path
[path
.length () - 1]->e
;
2758 bool seen_latch
= false;
2759 int loops_crossed
= 0;
2760 bool crossed_latch
= false;
2761 bool crossed_loop_header
= false;
2762 // Use ->dest here instead of ->src to ignore the first block. The
2763 // first block is allowed to be in a different loop, since it'll be
2764 // redirected. See similar comment in profitable_path_p: "we don't
2765 // care about that block...".
2766 loop_p loop
= entry
->dest
->loop_father
;
2767 loop_p curr_loop
= loop
;
2769 for (unsigned int i
= 0; i
< path
.length (); i
++)
2771 edge e
= path
[i
]->e
;
2775 // NULL outgoing edges on a path can happen for jumping to a
2776 // constant address.
2777 cancel_thread (&path
, "Found NULL edge in jump threading path");
2781 if (loop
->latch
== e
->src
|| loop
->latch
== e
->dest
)
2784 // Like seen_latch, but excludes the first block.
2785 if (e
->src
!= entry
->src
)
2786 crossed_latch
= true;
2789 if (e
->dest
->loop_father
!= curr_loop
)
2791 curr_loop
= e
->dest
->loop_father
;
2795 // ?? Avoid threading through loop headers that remain in the
2796 // loop, as such threadings tend to create sub-loops which
2797 // _might_ be OK ??.
2798 if (e
->dest
->loop_father
->header
== e
->dest
2799 && !flow_loop_nested_p (exit
->dest
->loop_father
,
2800 e
->dest
->loop_father
))
2801 crossed_loop_header
= true;
2803 if (flag_checking
&& !m_backedge_threads
)
2804 gcc_assert ((path
[i
]->e
->flags
& EDGE_DFS_BACK
) == 0);
2807 // If we crossed a loop into an outer loop without crossing the
2808 // latch, this is just an early exit from the loop.
2809 if (loops_crossed
== 1
2811 && flow_loop_nested_p (exit
->dest
->loop_father
, exit
->src
->loop_father
))
2814 if (cfun
->curr_properties
& PROP_loop_opts_done
)
2817 if (seen_latch
&& empty_block_p (loop
->latch
))
2819 cancel_thread (&path
, "Threading through latch before loop opts "
2820 "would create non-empty latch");
2825 cancel_thread (&path
, "Path crosses loops");
2828 // The path should either start and end in the same loop or exit the
2829 // loop it starts in but never enter a loop. This also catches
2830 // creating irreducible loops, not only rotation.
2831 if (entry
->src
->loop_father
!= exit
->dest
->loop_father
2832 && !flow_loop_nested_p (exit
->src
->loop_father
,
2833 entry
->dest
->loop_father
))
2835 cancel_thread (&path
, "Path rotates loop");
2838 if (crossed_loop_header
)
2840 cancel_thread (&path
, "Path crosses loop header but does not exit it");
2846 /* Register a jump threading opportunity. We queue up all the jump
2847 threading opportunities discovered by a pass and update the CFG
2848 and SSA form all at once.
2850 E is the edge we can thread, E2 is the new target edge, i.e., we
2851 are effectively recording that E->dest can be changed to E2->dest
2852 after fixing the SSA graph.
2854 Return TRUE if PATH was successfully threaded. */
2857 jt_path_registry::register_jump_thread (vec
<jump_thread_edge
*> *path
)
2859 gcc_checking_assert (flag_thread_jumps
);
2861 if (!dbg_cnt (registered_jump_thread
))
2867 if (cancel_invalid_paths (*path
))
2870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2871 dump_jump_thread_path (dump_file
, *path
, true);
2873 m_paths
.safe_push (path
);
2877 /* Return how many uses of T there are within BB, as long as there
2878 aren't any uses outside BB. If there are any uses outside BB,
2879 return -1 if there's at most one use within BB, or -2 if there is
2880 more than one use within BB. */
2883 uses_in_bb (tree t
, basic_block bb
)
2886 bool outside_bb
= false;
2888 imm_use_iterator iter
;
2889 use_operand_p use_p
;
2890 FOR_EACH_IMM_USE_FAST (use_p
, iter
, t
)
2892 if (is_gimple_debug (USE_STMT (use_p
)))
2895 if (gimple_bb (USE_STMT (use_p
)) != bb
)
2900 if (outside_bb
&& uses
> 1)
2910 /* Starting from the final control flow stmt in BB, assuming it will
2911 be removed, follow uses in to-be-removed stmts back to their defs
2912 and count how many defs are to become dead and be removed as
2916 estimate_threading_killed_stmts (basic_block bb
)
2918 int killed_stmts
= 0;
2919 hash_map
<tree
, int> ssa_remaining_uses
;
2920 auto_vec
<gimple
*, 4> dead_worklist
;
2922 /* If the block has only two predecessors, threading will turn phi
2923 dsts into either src, so count them as dead stmts. */
2924 bool drop_all_phis
= EDGE_COUNT (bb
->preds
) == 2;
2927 for (gphi_iterator gsi
= gsi_start_phis (bb
);
2928 !gsi_end_p (gsi
); gsi_next (&gsi
))
2930 gphi
*phi
= gsi
.phi ();
2931 tree dst
= gimple_phi_result (phi
);
2933 /* We don't count virtual PHIs as stmts in
2934 record_temporary_equivalences_from_phis. */
2935 if (virtual_operand_p (dst
))
2941 if (gsi_end_p (gsi_last_bb (bb
)))
2942 return killed_stmts
;
2944 gimple
*stmt
= gsi_stmt (gsi_last_bb (bb
));
2945 if (gimple_code (stmt
) != GIMPLE_COND
2946 && gimple_code (stmt
) != GIMPLE_GOTO
2947 && gimple_code (stmt
) != GIMPLE_SWITCH
)
2948 return killed_stmts
;
2950 /* The control statement is always dead. */
2952 dead_worklist
.quick_push (stmt
);
2953 while (!dead_worklist
.is_empty ())
2955 stmt
= dead_worklist
.pop ();
2958 use_operand_p use_p
;
2959 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_USE
)
2961 tree t
= USE_FROM_PTR (use_p
);
2962 gimple
*def
= SSA_NAME_DEF_STMT (t
);
2964 if (gimple_bb (def
) == bb
2965 && (gimple_code (def
) != GIMPLE_PHI
2967 && !gimple_has_side_effects (def
))
2969 int *usesp
= ssa_remaining_uses
.get (t
);
2975 uses
= uses_in_bb (t
, bb
);
2979 /* Don't bother recording the expected use count if we
2980 won't find any further uses within BB. */
2981 if (!usesp
&& (uses
< -1 || uses
> 1))
2983 usesp
= &ssa_remaining_uses
.get_or_insert (t
);
2998 ssa_remaining_uses
.remove (t
);
2999 if (gimple_code (def
) != GIMPLE_PHI
)
3000 dead_worklist
.safe_push (def
);
3007 fprintf (dump_file
, "threading bb %i kills %i stmts\n",
3008 bb
->index
, killed_stmts
);
3010 return killed_stmts
;