2017-10-27 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blob8681707651caf2eb3b0008bab443b23c79b675f2
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "fold-const.h"
30 #include "cfganal.h"
31 #include "gimple-iterator.h"
32 #include "tree-ssa.h"
33 #include "tree-ssa-threadupdate.h"
34 #include "cfgloop.h"
35 #include "dbgcnt.h"
36 #include "tree-cfg.h"
37 #include "tree-vectorizer.h"
39 /* Given a block B, update the CFG and SSA graph to reflect redirecting
40 one or more in-edges to B to instead reach the destination of an
41 out-edge from B while preserving any side effects in B.
43 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
44 side effects of executing B.
46 1. Make a copy of B (including its outgoing edges and statements). Call
47 the copy B'. Note B' has no incoming edges or PHIs at this time.
49 2. Remove the control statement at the end of B' and all outgoing edges
50 except B'->C.
52 3. Add a new argument to each PHI in C with the same value as the existing
53 argument associated with edge B->C. Associate the new PHI arguments
54 with the edge B'->C.
56 4. For each PHI in B, find or create a PHI in B' with an identical
57 PHI_RESULT. Add an argument to the PHI in B' which has the same
58 value as the PHI in B associated with the edge A->B. Associate
59 the new argument in the PHI in B' with the edge A->B.
61 5. Change the edge A->B to A->B'.
63 5a. This automatically deletes any PHI arguments associated with the
64 edge A->B in B.
66 5b. This automatically associates each new argument added in step 4
67 with the edge A->B'.
69 6. Repeat for other incoming edges into B.
71 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73 Note that block duplication can be minimized by first collecting the
74 set of unique destination blocks that the incoming edges should
75 be threaded to.
77 We reduce the number of edges and statements we create by not copying all
78 the outgoing edges and the control statement in step #1. We instead create
79 a template block without the outgoing edges and duplicate the template.
81 Another case this code handles is threading through a "joiner" block. In
82 this case, we do not know the destination of the joiner block, but one
83 of the outgoing edges from the joiner block leads to a threadable path. This
84 case largely works as outlined above, except the duplicate of the joiner
85 block still contains a full set of outgoing edges and its control statement.
86 We just redirect one of its outgoing edges to our jump threading path. */
89 /* Steps #5 and #6 of the above algorithm are best implemented by walking
90 all the incoming edges which thread to the same destination edge at
91 the same time. That avoids lots of table lookups to get information
92 for the destination edge.
94 To realize that implementation we create a list of incoming edges
95 which thread to the same outgoing edge. Thus to implement steps
96 #5 and #6 we traverse our hash table of outgoing edge information.
97 For each entry we walk the list of incoming edges which thread to
98 the current outgoing edge. */
100 struct el
102 edge e;
103 struct el *next;
106 /* Main data structure recording information regarding B's duplicate
107 blocks. */
109 /* We need to efficiently record the unique thread destinations of this
110 block and specific information associated with those destinations. We
111 may have many incoming edges threaded to the same outgoing edge. This
112 can be naturally implemented with a hash table. */
114 struct redirection_data : free_ptr_hash<redirection_data>
116 /* We support wiring up two block duplicates in a jump threading path.
118 One is a normal block copy where we remove the control statement
119 and wire up its single remaining outgoing edge to the thread path.
121 The other is a joiner block where we leave the control statement
122 in place, but wire one of the outgoing edges to a thread path.
124 In theory we could have multiple block duplicates in a jump
125 threading path, but I haven't tried that.
127 The duplicate blocks appear in this array in the same order in
128 which they appear in the jump thread path. */
129 basic_block dup_blocks[2];
131 /* The jump threading path. */
132 vec<jump_thread_edge *> *path;
134 /* A list of incoming edges which we want to thread to the
135 same path. */
136 struct el *incoming_edges;
138 /* hash_table support. */
139 static inline hashval_t hash (const redirection_data *);
140 static inline int equal (const redirection_data *, const redirection_data *);
143 /* Dump a jump threading path, including annotations about each
144 edge in the path. */
146 static void
147 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
148 bool registering)
150 fprintf (dump_file,
151 " %s%s jump thread: (%d, %d) incoming edge; ",
152 (registering ? "Registering" : "Cancelling"),
153 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
154 path[0]->e->src->index, path[0]->e->dest->index);
156 for (unsigned int i = 1; i < path.length (); i++)
158 /* We can get paths with a NULL edge when the final destination
159 of a jump thread turns out to be a constant address. We dump
160 those paths when debugging, so we have to be prepared for that
161 possibility here. */
162 if (path[i]->e == NULL)
163 continue;
165 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
166 fprintf (dump_file, " (%d, %d) joiner; ",
167 path[i]->e->src->index, path[i]->e->dest->index);
168 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
169 fprintf (dump_file, " (%d, %d) normal;",
170 path[i]->e->src->index, path[i]->e->dest->index);
171 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
172 fprintf (dump_file, " (%d, %d) nocopy;",
173 path[i]->e->src->index, path[i]->e->dest->index);
174 if (path[0]->type == EDGE_FSM_THREAD)
175 fprintf (dump_file, " (%d, %d) ",
176 path[i]->e->src->index, path[i]->e->dest->index);
178 fputc ('\n', dump_file);
181 /* Simple hashing function. For any given incoming edge E, we're going
182 to be most concerned with the final destination of its jump thread
183 path. So hash on the block index of the final edge in the path. */
185 inline hashval_t
186 redirection_data::hash (const redirection_data *p)
188 vec<jump_thread_edge *> *path = p->path;
189 return path->last ()->e->dest->index;
192 /* Given two hash table entries, return true if they have the same
193 jump threading path. */
194 inline int
195 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
197 vec<jump_thread_edge *> *path1 = p1->path;
198 vec<jump_thread_edge *> *path2 = p2->path;
200 if (path1->length () != path2->length ())
201 return false;
203 for (unsigned int i = 1; i < path1->length (); i++)
205 if ((*path1)[i]->type != (*path2)[i]->type
206 || (*path1)[i]->e != (*path2)[i]->e)
207 return false;
210 return true;
213 /* Rather than search all the edges in jump thread paths each time
214 DOM is able to simply if control statement, we build a hash table
215 with the deleted edges. We only care about the address of the edge,
216 not its contents. */
217 struct removed_edges : nofree_ptr_hash<edge_def>
219 static hashval_t hash (edge e) { return htab_hash_pointer (e); }
220 static bool equal (edge e1, edge e2) { return e1 == e2; }
223 static hash_table<removed_edges> *removed_edges;
225 /* Data structure of information to pass to hash table traversal routines. */
226 struct ssa_local_info_t
228 /* The current block we are working on. */
229 basic_block bb;
231 /* We only create a template block for the first duplicated block in a
232 jump threading path as we may need many duplicates of that block.
234 The second duplicate block in a path is specific to that path. Creating
235 and sharing a template for that block is considerably more difficult. */
236 basic_block template_block;
238 /* Blocks duplicated for the thread. */
239 bitmap duplicate_blocks;
241 /* TRUE if we thread one or more jumps, FALSE otherwise. */
242 bool jumps_threaded;
244 /* When we have multiple paths through a joiner which reach different
245 final destinations, then we may need to correct for potential
246 profile insanities. */
247 bool need_profile_correction;
250 /* Passes which use the jump threading code register jump threading
251 opportunities as they are discovered. We keep the registered
252 jump threading opportunities in this vector as edge pairs
253 (original_edge, target_edge). */
254 static vec<vec<jump_thread_edge *> *> paths;
256 /* When we start updating the CFG for threading, data necessary for jump
257 threading is attached to the AUX field for the incoming edge. Use these
258 macros to access the underlying structure attached to the AUX field. */
259 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
261 /* Jump threading statistics. */
263 struct thread_stats_d
265 unsigned long num_threaded_edges;
268 struct thread_stats_d thread_stats;
271 /* Remove the last statement in block BB if it is a control statement
272 Also remove all outgoing edges except the edge which reaches DEST_BB.
273 If DEST_BB is NULL, then remove all outgoing edges. */
275 void
276 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
278 gimple_stmt_iterator gsi;
279 edge e;
280 edge_iterator ei;
282 gsi = gsi_last_bb (bb);
284 /* If the duplicate ends with a control statement, then remove it.
286 Note that if we are duplicating the template block rather than the
287 original basic block, then the duplicate might not have any real
288 statements in it. */
289 if (!gsi_end_p (gsi)
290 && gsi_stmt (gsi)
291 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
292 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
293 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
294 gsi_remove (&gsi, true);
296 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
298 if (e->dest != dest_bb)
300 free_dom_edge_info (e);
301 remove_edge (e);
303 else
305 e->probability = profile_probability::always ();
306 ei_next (&ei);
310 /* If the remaining edge is a loop exit, there must have
311 a removed edge that was not a loop exit.
313 In that case BB and possibly other blocks were previously
314 in the loop, but are now outside the loop. Thus, we need
315 to update the loop structures. */
316 if (single_succ_p (bb)
317 && loop_outer (bb->loop_father)
318 && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
319 loops_state_set (LOOPS_NEED_FIXUP);
322 /* Create a duplicate of BB. Record the duplicate block in an array
323 indexed by COUNT stored in RD. */
325 static void
326 create_block_for_threading (basic_block bb,
327 struct redirection_data *rd,
328 unsigned int count,
329 bitmap *duplicate_blocks)
331 edge_iterator ei;
332 edge e;
334 /* We can use the generic block duplication code and simply remove
335 the stuff we do not need. */
336 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
338 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
339 e->aux = NULL;
341 /* Zero out the profile, since the block is unreachable for now. */
342 rd->dup_blocks[count]->frequency = 0;
343 rd->dup_blocks[count]->count = profile_count::uninitialized ();
344 if (duplicate_blocks)
345 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
348 /* Main data structure to hold information for duplicates of BB. */
350 static hash_table<redirection_data> *redirection_data;
352 /* Given an outgoing edge E lookup and return its entry in our hash table.
354 If INSERT is true, then we insert the entry into the hash table if
355 it is not already present. INCOMING_EDGE is added to the list of incoming
356 edges associated with E in the hash table. */
358 static struct redirection_data *
359 lookup_redirection_data (edge e, enum insert_option insert)
361 struct redirection_data **slot;
362 struct redirection_data *elt;
363 vec<jump_thread_edge *> *path = THREAD_PATH (e);
365 /* Build a hash table element so we can see if E is already
366 in the table. */
367 elt = XNEW (struct redirection_data);
368 elt->path = path;
369 elt->dup_blocks[0] = NULL;
370 elt->dup_blocks[1] = NULL;
371 elt->incoming_edges = NULL;
373 slot = redirection_data->find_slot (elt, insert);
375 /* This will only happen if INSERT is false and the entry is not
376 in the hash table. */
377 if (slot == NULL)
379 free (elt);
380 return NULL;
383 /* This will only happen if E was not in the hash table and
384 INSERT is true. */
385 if (*slot == NULL)
387 *slot = elt;
388 elt->incoming_edges = XNEW (struct el);
389 elt->incoming_edges->e = e;
390 elt->incoming_edges->next = NULL;
391 return elt;
393 /* E was in the hash table. */
394 else
396 /* Free ELT as we do not need it anymore, we will extract the
397 relevant entry from the hash table itself. */
398 free (elt);
400 /* Get the entry stored in the hash table. */
401 elt = *slot;
403 /* If insertion was requested, then we need to add INCOMING_EDGE
404 to the list of incoming edges associated with E. */
405 if (insert)
407 struct el *el = XNEW (struct el);
408 el->next = elt->incoming_edges;
409 el->e = e;
410 elt->incoming_edges = el;
413 return elt;
417 /* Similar to copy_phi_args, except that the PHI arg exists, it just
418 does not have a value associated with it. */
420 static void
421 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
423 int src_idx = src_e->dest_idx;
424 int tgt_idx = tgt_e->dest_idx;
426 /* Iterate over each PHI in e->dest. */
427 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
428 gsi2 = gsi_start_phis (tgt_e->dest);
429 !gsi_end_p (gsi);
430 gsi_next (&gsi), gsi_next (&gsi2))
432 gphi *src_phi = gsi.phi ();
433 gphi *dest_phi = gsi2.phi ();
434 tree val = gimple_phi_arg_def (src_phi, src_idx);
435 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
437 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
438 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
442 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
443 to see if it has constant value in a flow sensitive manner. Set
444 LOCUS to location of the constant phi arg and return the value.
445 Return DEF directly if either PATH or idx is ZERO. */
447 static tree
448 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
449 basic_block bb, int idx, source_location *locus)
451 tree arg;
452 gphi *def_phi;
453 basic_block def_bb;
455 if (path == NULL || idx == 0)
456 return def;
458 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
459 if (!def_phi)
460 return def;
462 def_bb = gimple_bb (def_phi);
463 /* Don't propagate loop invariants into deeper loops. */
464 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
465 return def;
467 /* Backtrack jump threading path from IDX to see if def has constant
468 value. */
469 for (int j = idx - 1; j >= 0; j--)
471 edge e = (*path)[j]->e;
472 if (e->dest == def_bb)
474 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
475 if (is_gimple_min_invariant (arg))
477 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
478 return arg;
480 break;
484 return def;
487 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
488 Try to backtrack jump threading PATH from node IDX to see if the arg
489 has constant value, copy constant value instead of argument itself
490 if yes. */
492 static void
493 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
494 vec<jump_thread_edge *> *path, int idx)
496 gphi_iterator gsi;
497 int src_indx = src_e->dest_idx;
499 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
501 gphi *phi = gsi.phi ();
502 tree def = gimple_phi_arg_def (phi, src_indx);
503 source_location locus = gimple_phi_arg_location (phi, src_indx);
505 if (TREE_CODE (def) == SSA_NAME
506 && !virtual_operand_p (gimple_phi_result (phi)))
507 def = get_value_locus_in_path (def, path, bb, idx, &locus);
509 add_phi_arg (phi, def, tgt_e, locus);
513 /* We have recently made a copy of ORIG_BB, including its outgoing
514 edges. The copy is NEW_BB. Every PHI node in every direct successor of
515 ORIG_BB has a new argument associated with edge from NEW_BB to the
516 successor. Initialize the PHI argument so that it is equal to the PHI
517 argument associated with the edge from ORIG_BB to the successor.
518 PATH and IDX are used to check if the new PHI argument has constant
519 value in a flow sensitive manner. */
521 static void
522 update_destination_phis (basic_block orig_bb, basic_block new_bb,
523 vec<jump_thread_edge *> *path, int idx)
525 edge_iterator ei;
526 edge e;
528 FOR_EACH_EDGE (e, ei, orig_bb->succs)
530 edge e2 = find_edge (new_bb, e->dest);
531 copy_phi_args (e->dest, e, e2, path, idx);
535 /* Given a duplicate block and its single destination (both stored
536 in RD). Create an edge between the duplicate and its single
537 destination.
539 Add an additional argument to any PHI nodes at the single
540 destination. IDX is the start node in jump threading path
541 we start to check to see if the new PHI argument has constant
542 value along the jump threading path. */
544 static void
545 create_edge_and_update_destination_phis (struct redirection_data *rd,
546 basic_block bb, int idx)
548 edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
550 rescan_loop_exit (e, true, false);
552 /* We used to copy the thread path here. That was added in 2007
553 and dutifully updated through the representation changes in 2013.
555 In 2013 we added code to thread from an interior node through
556 the backedge to another interior node. That runs after the code
557 to thread through loop headers from outside the loop.
559 The latter may delete edges in the CFG, including those
560 which appeared in the jump threading path we copied here. Thus
561 we'd end up using a dangling pointer.
563 After reviewing the 2007/2011 code, I can't see how anything
564 depended on copying the AUX field and clearly copying the jump
565 threading path is problematical due to embedded edge pointers.
566 It has been removed. */
567 e->aux = NULL;
569 /* If there are any PHI nodes at the destination of the outgoing edge
570 from the duplicate block, then we will need to add a new argument
571 to them. The argument should have the same value as the argument
572 associated with the outgoing edge stored in RD. */
573 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
576 /* Look through PATH beginning at START and return TRUE if there are
577 any additional blocks that need to be duplicated. Otherwise,
578 return FALSE. */
579 static bool
580 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
581 unsigned int start)
583 for (unsigned int i = start + 1; i < path->length (); i++)
585 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
586 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
587 return true;
589 return false;
593 /* Compute the amount of profile count/frequency coming into the jump threading
594 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
595 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
596 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
597 identify blocks duplicated for jump threading, which have duplicated
598 edges that need to be ignored in the analysis. Return true if path contains
599 a joiner, false otherwise.
601 In the non-joiner case, this is straightforward - all the counts/frequency
602 flowing into the jump threading path should flow through the duplicated
603 block and out of the duplicated path.
605 In the joiner case, it is very tricky. Some of the counts flowing into
606 the original path go offpath at the joiner. The problem is that while
607 we know how much total count goes off-path in the original control flow,
608 we don't know how many of the counts corresponding to just the jump
609 threading path go offpath at the joiner.
611 For example, assume we have the following control flow and identified
612 jump threading paths:
614 A B C
615 \ | /
616 Ea \ |Eb / Ec
617 \ | /
618 v v v
619 J <-- Joiner
621 Eoff/ \Eon
624 Soff Son <--- Normal
626 Ed/ \ Ee
631 Jump threading paths: A -> J -> Son -> D (path 1)
632 C -> J -> Son -> E (path 2)
634 Note that the control flow could be more complicated:
635 - Each jump threading path may have more than one incoming edge. I.e. A and
636 Ea could represent multiple incoming blocks/edges that are included in
637 path 1.
638 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
639 before or after the "normal" copy block). These are not duplicated onto
640 the jump threading path, as they are single-successor.
641 - Any of the blocks along the path may have other incoming edges that
642 are not part of any jump threading path, but add profile counts along
643 the path.
645 In the above example, after all jump threading is complete, we will
646 end up with the following control flow:
648 A B C
649 | | |
650 Ea| |Eb |Ec
651 | | |
652 v v v
653 Ja J Jc
654 / \ / \Eon' / \
655 Eona/ \ ---/---\-------- \Eonc
656 / \ / / \ \
657 v v v v v
658 Sona Soff Son Sonc
659 \ /\ /
660 \___________ / \ _____/
661 \ / \/
662 vv v
665 The main issue to notice here is that when we are processing path 1
666 (A->J->Son->D) we need to figure out the outgoing edge weights to
667 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
668 sum of the incoming weights to D remain Ed. The problem with simply
669 assuming that Ja (and Jc when processing path 2) has the same outgoing
670 probabilities to its successors as the original block J, is that after
671 all paths are processed and other edges/counts removed (e.g. none
672 of Ec will reach D after processing path 2), we may end up with not
673 enough count flowing along duplicated edge Sona->D.
675 Therefore, in the case of a joiner, we keep track of all counts
676 coming in along the current path, as well as from predecessors not
677 on any jump threading path (Eb in the above example). While we
678 first assume that the duplicated Eona for Ja->Sona has the same
679 probability as the original, we later compensate for other jump
680 threading paths that may eliminate edges. We do that by keep track
681 of all counts coming into the original path that are not in a jump
682 thread (Eb in the above example, but as noted earlier, there could
683 be other predecessors incoming to the path at various points, such
684 as at Son). Call this cumulative non-path count coming into the path
685 before D as Enonpath. We then ensure that the count from Sona->D is as at
686 least as big as (Ed - Enonpath), but no bigger than the minimum
687 weight along the jump threading path. The probabilities of both the
688 original and duplicated joiner block J and Ja will be adjusted
689 accordingly after the updates. */
691 static bool
692 compute_path_counts (struct redirection_data *rd,
693 ssa_local_info_t *local_info,
694 profile_count *path_in_count_ptr,
695 profile_count *path_out_count_ptr,
696 int *path_in_freq_ptr)
698 edge e = rd->incoming_edges->e;
699 vec<jump_thread_edge *> *path = THREAD_PATH (e);
700 edge elast = path->last ()->e;
701 profile_count nonpath_count = profile_count::zero ();
702 bool has_joiner = false;
703 profile_count path_in_count = profile_count::zero ();
704 int path_in_freq = 0;
706 /* Start by accumulating incoming edge counts to the path's first bb
707 into a couple buckets:
708 path_in_count: total count of incoming edges that flow into the
709 current path.
710 nonpath_count: total count of incoming edges that are not
711 flowing along *any* path. These are the counts
712 that will still flow along the original path after
713 all path duplication is done by potentially multiple
714 calls to this routine.
715 (any other incoming edge counts are for a different jump threading
716 path that will be handled by a later call to this routine.)
717 To make this easier, start by recording all incoming edges that flow into
718 the current path in a bitmap. We could add up the path's incoming edge
719 counts here, but we still need to walk all the first bb's incoming edges
720 below to add up the counts of the other edges not included in this jump
721 threading path. */
722 struct el *next, *el;
723 auto_bitmap in_edge_srcs;
724 for (el = rd->incoming_edges; el; el = next)
726 next = el->next;
727 bitmap_set_bit (in_edge_srcs, el->e->src->index);
729 edge ein;
730 edge_iterator ei;
731 FOR_EACH_EDGE (ein, ei, e->dest->preds)
733 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
734 /* Simply check the incoming edge src against the set captured above. */
735 if (ein_path
736 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
738 /* It is necessary but not sufficient that the last path edges
739 are identical. There may be different paths that share the
740 same last path edge in the case where the last edge has a nocopy
741 source block. */
742 gcc_assert (ein_path->last ()->e == elast);
743 path_in_count += ein->count ();
744 path_in_freq += EDGE_FREQUENCY (ein);
746 else if (!ein_path)
748 /* Keep track of the incoming edges that are not on any jump-threading
749 path. These counts will still flow out of original path after all
750 jump threading is complete. */
751 nonpath_count += ein->count ();
755 /* This is needed due to insane incoming frequencies. */
756 if (path_in_freq > BB_FREQ_MAX)
757 path_in_freq = BB_FREQ_MAX;
759 /* Now compute the fraction of the total count coming into the first
760 path bb that is from the current threading path. */
761 profile_count total_count = e->dest->count;
762 /* Handle incoming profile insanities. */
763 if (total_count < path_in_count)
764 path_in_count = total_count;
765 profile_probability onpath_scale = path_in_count.probability_in (total_count);
767 /* Walk the entire path to do some more computation in order to estimate
768 how much of the path_in_count will flow out of the duplicated threading
769 path. In the non-joiner case this is straightforward (it should be
770 the same as path_in_count, although we will handle incoming profile
771 insanities by setting it equal to the minimum count along the path).
773 In the joiner case, we need to estimate how much of the path_in_count
774 will stay on the threading path after the joiner's conditional branch.
775 We don't really know for sure how much of the counts
776 associated with this path go to each successor of the joiner, but we'll
777 estimate based on the fraction of the total count coming into the path
778 bb was from the threading paths (computed above in onpath_scale).
779 Afterwards, we will need to do some fixup to account for other threading
780 paths and possible profile insanities.
782 In order to estimate the joiner case's counts we also need to update
783 nonpath_count with any additional counts coming into the path. Other
784 blocks along the path may have additional predecessors from outside
785 the path. */
786 profile_count path_out_count = path_in_count;
787 profile_count min_path_count = path_in_count;
788 for (unsigned int i = 1; i < path->length (); i++)
790 edge epath = (*path)[i]->e;
791 profile_count cur_count = epath->count ();
792 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
794 has_joiner = true;
795 cur_count = cur_count.apply_probability (onpath_scale);
797 /* In the joiner case we need to update nonpath_count for any edges
798 coming into the path that will contribute to the count flowing
799 into the path successor. */
800 if (has_joiner && epath != elast)
802 /* Look for other incoming edges after joiner. */
803 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
805 if (ein != epath
806 /* Ignore in edges from blocks we have duplicated for a
807 threading path, which have duplicated edge counts until
808 they are redirected by an invocation of this routine. */
809 && !bitmap_bit_p (local_info->duplicate_blocks,
810 ein->src->index))
811 nonpath_count += ein->count ();
814 if (cur_count < path_out_count)
815 path_out_count = cur_count;
816 if (epath->count () < min_path_count)
817 min_path_count = epath->count ();
820 /* We computed path_out_count above assuming that this path targeted
821 the joiner's on-path successor with the same likelihood as it
822 reached the joiner. However, other thread paths through the joiner
823 may take a different path through the normal copy source block
824 (i.e. they have a different elast), meaning that they do not
825 contribute any counts to this path's elast. As a result, it may
826 turn out that this path must have more count flowing to the on-path
827 successor of the joiner. Essentially, all of this path's elast
828 count must be contributed by this path and any nonpath counts
829 (since any path through the joiner with a different elast will not
830 include a copy of this elast in its duplicated path).
831 So ensure that this path's path_out_count is at least the
832 difference between elast->count () and nonpath_count. Otherwise the edge
833 counts after threading will not be sane. */
834 if (local_info->need_profile_correction
835 && has_joiner && path_out_count < elast->count () - nonpath_count)
837 path_out_count = elast->count () - nonpath_count;
838 /* But neither can we go above the minimum count along the path
839 we are duplicating. This can be an issue due to profile
840 insanities coming in to this pass. */
841 if (path_out_count > min_path_count)
842 path_out_count = min_path_count;
845 *path_in_count_ptr = path_in_count;
846 *path_out_count_ptr = path_out_count;
847 *path_in_freq_ptr = path_in_freq;
848 return has_joiner;
852 /* Update the counts and frequencies for both an original path
853 edge EPATH and its duplicate EDUP. The duplicate source block
854 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
855 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
856 static void
857 update_profile (edge epath, edge edup, profile_count path_in_count,
858 profile_count path_out_count, int path_in_freq)
860 if (!(path_in_count > 0))
861 return;
863 /* First update the duplicated block's count / frequency. */
864 if (edup)
866 basic_block dup_block = edup->src;
868 /* Edup's count is reduced by path_out_count. We need to redistribute
869 probabilities to the remaining edges. */
871 edge esucc;
872 edge_iterator ei;
873 profile_probability edup_prob
874 = path_out_count.probability_in (path_in_count);
876 /* Either scale up or down the remaining edges.
877 probabilities are always in range <0,1> and thus we can't do
878 both by same loop. */
879 if (edup->probability > edup_prob)
881 profile_probability rev_scale
882 = (profile_probability::always () - edup->probability)
883 / (profile_probability::always () - edup_prob);
884 FOR_EACH_EDGE (esucc, ei, dup_block->succs)
885 if (esucc != edup)
886 esucc->probability /= rev_scale;
888 else if (edup->probability < edup_prob)
890 profile_probability scale
891 = (profile_probability::always () - edup_prob)
892 / (profile_probability::always () - edup->probability);
893 FOR_EACH_EDGE (esucc, ei, dup_block->succs)
894 if (esucc != edup)
895 esucc->probability *= scale;
897 edup->probability = edup_prob;
899 /* FIXME once freqs_to_counts is dropped re-enable this check. */
900 gcc_assert (!dup_block->count.initialized_p () || 1);
901 gcc_assert (dup_block->frequency == 0);
902 dup_block->count = path_in_count;
903 dup_block->frequency = path_in_freq;
906 profile_count final_count = epath->count () - path_out_count;
908 /* Now update the original block's count and frequency in the
909 opposite manner - remove the counts/freq that will flow
910 into the duplicated block. Handle underflow due to precision/
911 rounding issues. */
912 epath->src->count -= path_in_count;
913 epath->src->frequency -= path_in_freq;
914 if (epath->src->frequency < 0)
915 epath->src->frequency = 0;
917 /* Next update this path edge's original and duplicated counts. We know
918 that the duplicated path will have path_out_count flowing
919 out of it (in the joiner case this is the count along the duplicated path
920 out of the duplicated joiner). This count can then be removed from the
921 original path edge. */
922 if (epath->src->count > 0)
924 edge esucc;
925 edge_iterator ei;
926 profile_probability epath_prob = final_count.probability_in (epath->src->count);
928 if (epath->probability > epath_prob)
930 profile_probability rev_scale
931 = (profile_probability::always () - epath->probability)
932 / (profile_probability::always () - epath_prob);
933 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
934 if (esucc != epath)
935 esucc->probability /= rev_scale;
937 else if (epath->probability < epath_prob)
939 profile_probability scale
940 = (profile_probability::always () - epath_prob)
941 / (profile_probability::always () - epath->probability);
942 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
943 if (esucc != epath)
944 esucc->probability *= scale;
946 epath->probability = epath_prob;
951 /* Check if the paths through RD all have estimated frequencies but zero
952 profile counts. This is more accurate than checking the entry block
953 for a zero profile count, since profile insanities sometimes creep in. */
955 static bool
956 estimated_freqs_path (struct redirection_data *rd)
958 edge e = rd->incoming_edges->e;
959 vec<jump_thread_edge *> *path = THREAD_PATH (e);
960 edge ein;
961 edge_iterator ei;
962 bool non_zero_freq = false;
963 FOR_EACH_EDGE (ein, ei, e->dest->preds)
965 if (ein->count () > 0)
966 return false;
967 non_zero_freq |= ein->src->frequency != 0;
970 for (unsigned int i = 1; i < path->length (); i++)
972 edge epath = (*path)[i]->e;
973 if (epath->src->count > 0)
974 return false;
975 non_zero_freq |= epath->src->frequency != 0;
976 edge esucc;
977 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
979 if (esucc->count () > 0)
980 return false;
981 non_zero_freq |= esucc->src->frequency != 0;
984 return non_zero_freq;
988 /* Invoked for routines that have guessed frequencies and no profile
989 counts to record the block and edge frequencies for paths through RD
990 in the profile count fields of those blocks and edges. This is because
991 ssa_fix_duplicate_block_edges incrementally updates the block and
992 edge counts as edges are redirected, and it is difficult to do that
993 for edge frequencies which are computed on the fly from the source
994 block frequency and probability. When a block frequency is updated
995 its outgoing edge frequencies are affected and become difficult to
996 adjust. */
998 static void
999 freqs_to_counts_path (struct redirection_data *rd)
1001 edge e = rd->incoming_edges->e;
1002 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1003 edge ein;
1004 edge_iterator ei;
1006 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1007 ein->src->count = profile_count::from_gcov_type
1008 (ein->src->frequency * REG_BR_PROB_BASE);
1009 for (unsigned int i = 1; i < path->length (); i++)
1011 edge epath = (*path)[i]->e;
1012 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1013 errors applying the edge probability when the frequencies are very
1014 small. */
1015 epath->src->count =
1016 profile_count::from_gcov_type
1017 (epath->src->frequency * REG_BR_PROB_BASE);
1022 /* For routines that have guessed frequencies and no profile counts, where we
1023 used freqs_to_counts_path to record block and edge frequencies for paths
1024 through RD, we clear the counts after completing all updates for RD.
1025 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1026 but the block frequencies and edge probabilities were updated as well,
1027 so we can simply clear the count fields. */
1029 static void
1030 clear_counts_path (struct redirection_data *rd)
1032 edge e = rd->incoming_edges->e;
1033 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1034 profile_count val = profile_count::uninitialized ();
1035 if (profile_status_for_fn (cfun) == PROFILE_READ)
1036 val = profile_count::zero ();
1038 edge ein;
1039 edge_iterator ei;
1041 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1042 ein->src->count = val;
1044 /* First clear counts along original path. */
1045 for (unsigned int i = 1; i < path->length (); i++)
1047 edge epath = (*path)[i]->e;
1048 epath->src->count = val;
1050 /* Also need to clear the counts along duplicated path. */
1051 for (unsigned int i = 0; i < 2; i++)
1053 basic_block dup = rd->dup_blocks[i];
1054 if (!dup)
1055 continue;
1056 dup->count = val;
1060 /* Wire up the outgoing edges from the duplicate blocks and
1061 update any PHIs as needed. Also update the profile counts
1062 on the original and duplicate blocks and edges. */
1063 void
1064 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1065 ssa_local_info_t *local_info)
1067 bool multi_incomings = (rd->incoming_edges->next != NULL);
1068 edge e = rd->incoming_edges->e;
1069 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1070 edge elast = path->last ()->e;
1071 profile_count path_in_count = profile_count::zero ();
1072 profile_count path_out_count = profile_count::zero ();
1073 int path_in_freq = 0;
1075 /* This routine updates profile counts, frequencies, and probabilities
1076 incrementally. Since it is difficult to do the incremental updates
1077 using frequencies/probabilities alone, for routines without profile
1078 data we first take a snapshot of the existing block and edge frequencies
1079 by copying them into the empty profile count fields. These counts are
1080 then used to do the incremental updates, and cleared at the end of this
1081 routine. If the function is marked as having a profile, we still check
1082 to see if the paths through RD are using estimated frequencies because
1083 the routine had zero profile counts. */
1084 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1085 || estimated_freqs_path (rd));
1086 if (do_freqs_to_counts)
1087 freqs_to_counts_path (rd);
1089 /* First determine how much profile count to move from original
1090 path to the duplicate path. This is tricky in the presence of
1091 a joiner (see comments for compute_path_counts), where some portion
1092 of the path's counts will flow off-path from the joiner. In the
1093 non-joiner case the path_in_count and path_out_count should be the
1094 same. */
1095 bool has_joiner = compute_path_counts (rd, local_info,
1096 &path_in_count, &path_out_count,
1097 &path_in_freq);
1099 int cur_path_freq = path_in_freq;
1100 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1102 edge epath = (*path)[i]->e;
1104 /* If we were threading through an joiner block, then we want
1105 to keep its control statement and redirect an outgoing edge.
1106 Else we want to remove the control statement & edges, then create
1107 a new outgoing edge. In both cases we may need to update PHIs. */
1108 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1110 edge victim;
1111 edge e2;
1113 gcc_assert (has_joiner);
1115 /* This updates the PHIs at the destination of the duplicate
1116 block. Pass 0 instead of i if we are threading a path which
1117 has multiple incoming edges. */
1118 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1119 path, multi_incomings ? 0 : i);
1121 /* Find the edge from the duplicate block to the block we're
1122 threading through. That's the edge we want to redirect. */
1123 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1125 /* If there are no remaining blocks on the path to duplicate,
1126 then redirect VICTIM to the final destination of the jump
1127 threading path. */
1128 if (!any_remaining_duplicated_blocks (path, i))
1130 e2 = redirect_edge_and_branch (victim, elast->dest);
1131 /* If we redirected the edge, then we need to copy PHI arguments
1132 at the target. If the edge already existed (e2 != victim
1133 case), then the PHIs in the target already have the correct
1134 arguments. */
1135 if (e2 == victim)
1136 copy_phi_args (e2->dest, elast, e2,
1137 path, multi_incomings ? 0 : i);
1139 else
1141 /* Redirect VICTIM to the next duplicated block in the path. */
1142 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1144 /* We need to update the PHIs in the next duplicated block. We
1145 want the new PHI args to have the same value as they had
1146 in the source of the next duplicate block.
1148 Thus, we need to know which edge we traversed into the
1149 source of the duplicate. Furthermore, we may have
1150 traversed many edges to reach the source of the duplicate.
1152 Walk through the path starting at element I until we
1153 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1154 the edge from the prior element. */
1155 for (unsigned int j = i + 1; j < path->length (); j++)
1157 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1159 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1160 break;
1165 /* Update the counts and frequency of both the original block
1166 and path edge, and the duplicates. The path duplicate's
1167 incoming count and frequency are the totals for all edges
1168 incoming to this jump threading path computed earlier.
1169 And we know that the duplicated path will have path_out_count
1170 flowing out of it (i.e. along the duplicated path out of the
1171 duplicated joiner). */
1172 update_profile (epath, e2, path_in_count, path_out_count,
1173 path_in_freq);
1175 /* Record the frequency flowing to the downstream duplicated
1176 path blocks. */
1177 cur_path_freq = EDGE_FREQUENCY (e2);
1179 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1181 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1182 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1183 multi_incomings ? 0 : i);
1184 if (count == 1)
1185 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1187 /* Update the counts and frequency of both the original block
1188 and path edge, and the duplicates. Since we are now after
1189 any joiner that may have existed on the path, the count
1190 flowing along the duplicated threaded path is path_out_count.
1191 If we didn't have a joiner, then cur_path_freq was the sum
1192 of the total frequencies along all incoming edges to the
1193 thread path (path_in_freq). If we had a joiner, it would have
1194 been updated at the end of that handling to the edge frequency
1195 along the duplicated joiner path edge. */
1196 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1197 path_out_count, path_out_count, cur_path_freq);
1199 else
1201 /* No copy case. In this case we don't have an equivalent block
1202 on the duplicated thread path to update, but we do need
1203 to remove the portion of the counts/freqs that were moved
1204 to the duplicated path from the counts/freqs flowing through
1205 this block on the original path. Since all the no-copy edges
1206 are after any joiner, the removed count is the same as
1207 path_out_count.
1209 If we didn't have a joiner, then cur_path_freq was the sum
1210 of the total frequencies along all incoming edges to the
1211 thread path (path_in_freq). If we had a joiner, it would have
1212 been updated at the end of that handling to the edge frequency
1213 along the duplicated joiner path edge. */
1214 update_profile (epath, NULL, path_out_count, path_out_count,
1215 cur_path_freq);
1218 /* Increment the index into the duplicated path when we processed
1219 a duplicated block. */
1220 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1221 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1223 count++;
1227 /* Done with all profile and frequency updates, clear counts if they
1228 were copied. */
1229 if (do_freqs_to_counts)
1230 clear_counts_path (rd);
1233 /* Hash table traversal callback routine to create duplicate blocks. */
1236 ssa_create_duplicates (struct redirection_data **slot,
1237 ssa_local_info_t *local_info)
1239 struct redirection_data *rd = *slot;
1241 /* The second duplicated block in a jump threading path is specific
1242 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1244 Each time we're called, we have to look through the path and see
1245 if a second block needs to be duplicated.
1247 Note the search starts with the third edge on the path. The first
1248 edge is the incoming edge, the second edge always has its source
1249 duplicated. Thus we start our search with the third edge. */
1250 vec<jump_thread_edge *> *path = rd->path;
1251 for (unsigned int i = 2; i < path->length (); i++)
1253 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1254 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1256 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1257 &local_info->duplicate_blocks);
1258 break;
1262 /* Create a template block if we have not done so already. Otherwise
1263 use the template to create a new block. */
1264 if (local_info->template_block == NULL)
1266 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1267 &local_info->duplicate_blocks);
1268 local_info->template_block = rd->dup_blocks[0];
1270 /* We do not create any outgoing edges for the template. We will
1271 take care of that in a later traversal. That way we do not
1272 create edges that are going to just be deleted. */
1274 else
1276 create_block_for_threading (local_info->template_block, rd, 0,
1277 &local_info->duplicate_blocks);
1279 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1280 block. */
1281 ssa_fix_duplicate_block_edges (rd, local_info);
1284 /* Keep walking the hash table. */
1285 return 1;
1288 /* We did not create any outgoing edges for the template block during
1289 block creation. This hash table traversal callback creates the
1290 outgoing edge for the template block. */
1292 inline int
1293 ssa_fixup_template_block (struct redirection_data **slot,
1294 ssa_local_info_t *local_info)
1296 struct redirection_data *rd = *slot;
1298 /* If this is the template block halt the traversal after updating
1299 it appropriately.
1301 If we were threading through an joiner block, then we want
1302 to keep its control statement and redirect an outgoing edge.
1303 Else we want to remove the control statement & edges, then create
1304 a new outgoing edge. In both cases we may need to update PHIs. */
1305 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1307 ssa_fix_duplicate_block_edges (rd, local_info);
1308 return 0;
1311 return 1;
1314 /* Hash table traversal callback to redirect each incoming edge
1315 associated with this hash table element to its new destination. */
1318 ssa_redirect_edges (struct redirection_data **slot,
1319 ssa_local_info_t *local_info)
1321 struct redirection_data *rd = *slot;
1322 struct el *next, *el;
1324 /* Walk over all the incoming edges associated with this hash table
1325 entry. */
1326 for (el = rd->incoming_edges; el; el = next)
1328 edge e = el->e;
1329 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1331 /* Go ahead and free this element from the list. Doing this now
1332 avoids the need for another list walk when we destroy the hash
1333 table. */
1334 next = el->next;
1335 free (el);
1337 thread_stats.num_threaded_edges++;
1339 if (rd->dup_blocks[0])
1341 edge e2;
1343 if (dump_file && (dump_flags & TDF_DETAILS))
1344 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1345 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1347 /* Redirect the incoming edge (possibly to the joiner block) to the
1348 appropriate duplicate block. */
1349 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1350 gcc_assert (e == e2);
1351 flush_pending_stmts (e2);
1354 /* Go ahead and clear E->aux. It's not needed anymore and failure
1355 to clear it will cause all kinds of unpleasant problems later. */
1356 delete_jump_thread_path (path);
1357 e->aux = NULL;
1361 /* Indicate that we actually threaded one or more jumps. */
1362 if (rd->incoming_edges)
1363 local_info->jumps_threaded = true;
1365 return 1;
1368 /* Return true if this block has no executable statements other than
1369 a simple ctrl flow instruction. When the number of outgoing edges
1370 is one, this is equivalent to a "forwarder" block. */
1372 static bool
1373 redirection_block_p (basic_block bb)
1375 gimple_stmt_iterator gsi;
1377 /* Advance to the first executable statement. */
1378 gsi = gsi_start_bb (bb);
1379 while (!gsi_end_p (gsi)
1380 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1381 || is_gimple_debug (gsi_stmt (gsi))
1382 || gimple_nop_p (gsi_stmt (gsi))
1383 || gimple_clobber_p (gsi_stmt (gsi))))
1384 gsi_next (&gsi);
1386 /* Check if this is an empty block. */
1387 if (gsi_end_p (gsi))
1388 return true;
1390 /* Test that we've reached the terminating control statement. */
1391 return gsi_stmt (gsi)
1392 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1393 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1394 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1397 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1398 is reached via one or more specific incoming edges, we know which
1399 outgoing edge from BB will be traversed.
1401 We want to redirect those incoming edges to the target of the
1402 appropriate outgoing edge. Doing so avoids a conditional branch
1403 and may expose new optimization opportunities. Note that we have
1404 to update dominator tree and SSA graph after such changes.
1406 The key to keeping the SSA graph update manageable is to duplicate
1407 the side effects occurring in BB so that those side effects still
1408 occur on the paths which bypass BB after redirecting edges.
1410 We accomplish this by creating duplicates of BB and arranging for
1411 the duplicates to unconditionally pass control to one specific
1412 successor of BB. We then revector the incoming edges into BB to
1413 the appropriate duplicate of BB.
1415 If NOLOOP_ONLY is true, we only perform the threading as long as it
1416 does not affect the structure of the loops in a nontrivial way.
1418 If JOINERS is true, then thread through joiner blocks as well. */
1420 static bool
1421 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1423 /* E is an incoming edge into BB that we may or may not want to
1424 redirect to a duplicate of BB. */
1425 edge e, e2;
1426 edge_iterator ei;
1427 ssa_local_info_t local_info;
1429 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1430 local_info.need_profile_correction = false;
1432 /* To avoid scanning a linear array for the element we need we instead
1433 use a hash table. For normal code there should be no noticeable
1434 difference. However, if we have a block with a large number of
1435 incoming and outgoing edges such linear searches can get expensive. */
1436 redirection_data
1437 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1439 /* Record each unique threaded destination into a hash table for
1440 efficient lookups. */
1441 edge last = NULL;
1442 FOR_EACH_EDGE (e, ei, bb->preds)
1444 if (e->aux == NULL)
1445 continue;
1447 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1449 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1450 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1451 continue;
1453 e2 = path->last ()->e;
1454 if (!e2 || noloop_only)
1456 /* If NOLOOP_ONLY is true, we only allow threading through the
1457 header of a loop to exit edges. */
1459 /* One case occurs when there was loop header buried in a jump
1460 threading path that crosses loop boundaries. We do not try
1461 and thread this elsewhere, so just cancel the jump threading
1462 request by clearing the AUX field now. */
1463 if (bb->loop_father != e2->src->loop_father
1464 && !loop_exit_edge_p (e2->src->loop_father, e2))
1466 /* Since this case is not handled by our special code
1467 to thread through a loop header, we must explicitly
1468 cancel the threading request here. */
1469 delete_jump_thread_path (path);
1470 e->aux = NULL;
1471 continue;
1474 /* Another case occurs when trying to thread through our
1475 own loop header, possibly from inside the loop. We will
1476 thread these later. */
1477 unsigned int i;
1478 for (i = 1; i < path->length (); i++)
1480 if ((*path)[i]->e->src == bb->loop_father->header
1481 && (!loop_exit_edge_p (bb->loop_father, e2)
1482 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1483 break;
1486 if (i != path->length ())
1487 continue;
1490 /* Insert the outgoing edge into the hash table if it is not
1491 already in the hash table. */
1492 lookup_redirection_data (e, INSERT);
1494 /* When we have thread paths through a common joiner with different
1495 final destinations, then we may need corrections to deal with
1496 profile insanities. See the big comment before compute_path_counts. */
1497 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1499 if (!last)
1500 last = e2;
1501 else if (e2 != last)
1502 local_info.need_profile_correction = true;
1506 /* We do not update dominance info. */
1507 free_dominance_info (CDI_DOMINATORS);
1509 /* We know we only thread through the loop header to loop exits.
1510 Let the basic block duplication hook know we are not creating
1511 a multiple entry loop. */
1512 if (noloop_only
1513 && bb == bb->loop_father->header)
1514 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1516 /* Now create duplicates of BB.
1518 Note that for a block with a high outgoing degree we can waste
1519 a lot of time and memory creating and destroying useless edges.
1521 So we first duplicate BB and remove the control structure at the
1522 tail of the duplicate as well as all outgoing edges from the
1523 duplicate. We then use that duplicate block as a template for
1524 the rest of the duplicates. */
1525 local_info.template_block = NULL;
1526 local_info.bb = bb;
1527 local_info.jumps_threaded = false;
1528 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1529 (&local_info);
1531 /* The template does not have an outgoing edge. Create that outgoing
1532 edge and update PHI nodes as the edge's target as necessary.
1534 We do this after creating all the duplicates to avoid creating
1535 unnecessary edges. */
1536 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1537 (&local_info);
1539 /* The hash table traversals above created the duplicate blocks (and the
1540 statements within the duplicate blocks). This loop creates PHI nodes for
1541 the duplicated blocks and redirects the incoming edges into BB to reach
1542 the duplicates of BB. */
1543 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1544 (&local_info);
1546 /* Done with this block. Clear REDIRECTION_DATA. */
1547 delete redirection_data;
1548 redirection_data = NULL;
1550 if (noloop_only
1551 && bb == bb->loop_father->header)
1552 set_loop_copy (bb->loop_father, NULL);
1554 BITMAP_FREE (local_info.duplicate_blocks);
1555 local_info.duplicate_blocks = NULL;
1557 /* Indicate to our caller whether or not any jumps were threaded. */
1558 return local_info.jumps_threaded;
1561 /* Wrapper for thread_block_1 so that we can first handle jump
1562 thread paths which do not involve copying joiner blocks, then
1563 handle jump thread paths which have joiner blocks.
1565 By doing things this way we can be as aggressive as possible and
1566 not worry that copying a joiner block will create a jump threading
1567 opportunity. */
1569 static bool
1570 thread_block (basic_block bb, bool noloop_only)
1572 bool retval;
1573 retval = thread_block_1 (bb, noloop_only, false);
1574 retval |= thread_block_1 (bb, noloop_only, true);
1575 return retval;
1578 /* Callback for dfs_enumerate_from. Returns true if BB is different
1579 from STOP and DBDS_CE_STOP. */
1581 static basic_block dbds_ce_stop;
1582 static bool
1583 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1585 return (bb != (const_basic_block) stop
1586 && bb != dbds_ce_stop);
1589 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1590 returns the state. */
1592 enum bb_dom_status
1593 determine_bb_domination_status (struct loop *loop, basic_block bb)
1595 basic_block *bblocks;
1596 unsigned nblocks, i;
1597 bool bb_reachable = false;
1598 edge_iterator ei;
1599 edge e;
1601 /* This function assumes BB is a successor of LOOP->header.
1602 If that is not the case return DOMST_NONDOMINATING which
1603 is always safe. */
1605 bool ok = false;
1607 FOR_EACH_EDGE (e, ei, bb->preds)
1609 if (e->src == loop->header)
1611 ok = true;
1612 break;
1616 if (!ok)
1617 return DOMST_NONDOMINATING;
1620 if (bb == loop->latch)
1621 return DOMST_DOMINATING;
1623 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1624 from it. */
1626 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1627 dbds_ce_stop = loop->header;
1628 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1629 bblocks, loop->num_nodes, bb);
1630 for (i = 0; i < nblocks; i++)
1631 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1633 if (e->src == loop->header)
1635 free (bblocks);
1636 return DOMST_NONDOMINATING;
1638 if (e->src == bb)
1639 bb_reachable = true;
1642 free (bblocks);
1643 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1646 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1647 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1648 to the inside of the loop. */
1650 static bool
1651 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1653 basic_block header = loop->header;
1654 edge e, tgt_edge, latch = loop_latch_edge (loop);
1655 edge_iterator ei;
1656 basic_block tgt_bb, atgt_bb;
1657 enum bb_dom_status domst;
1659 /* We have already threaded through headers to exits, so all the threading
1660 requests now are to the inside of the loop. We need to avoid creating
1661 irreducible regions (i.e., loops with more than one entry block), and
1662 also loop with several latch edges, or new subloops of the loop (although
1663 there are cases where it might be appropriate, it is difficult to decide,
1664 and doing it wrongly may confuse other optimizers).
1666 We could handle more general cases here. However, the intention is to
1667 preserve some information about the loop, which is impossible if its
1668 structure changes significantly, in a way that is not well understood.
1669 Thus we only handle few important special cases, in which also updating
1670 of the loop-carried information should be feasible:
1672 1) Propagation of latch edge to a block that dominates the latch block
1673 of a loop. This aims to handle the following idiom:
1675 first = 1;
1676 while (1)
1678 if (first)
1679 initialize;
1680 first = 0;
1681 body;
1684 After threading the latch edge, this becomes
1686 first = 1;
1687 if (first)
1688 initialize;
1689 while (1)
1691 first = 0;
1692 body;
1695 The original header of the loop is moved out of it, and we may thread
1696 the remaining edges through it without further constraints.
1698 2) All entry edges are propagated to a single basic block that dominates
1699 the latch block of the loop. This aims to handle the following idiom
1700 (normally created for "for" loops):
1702 i = 0;
1703 while (1)
1705 if (i >= 100)
1706 break;
1707 body;
1708 i++;
1711 This becomes
1713 i = 0;
1714 while (1)
1716 body;
1717 i++;
1718 if (i >= 100)
1719 break;
1723 /* Threading through the header won't improve the code if the header has just
1724 one successor. */
1725 if (single_succ_p (header))
1726 goto fail;
1728 if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1729 goto fail;
1730 else
1732 tgt_bb = NULL;
1733 tgt_edge = NULL;
1734 FOR_EACH_EDGE (e, ei, header->preds)
1736 if (!e->aux)
1738 if (e == latch)
1739 continue;
1741 /* If latch is not threaded, and there is a header
1742 edge that is not threaded, we would create loop
1743 with multiple entries. */
1744 goto fail;
1747 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1749 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1750 goto fail;
1751 tgt_edge = (*path)[1]->e;
1752 atgt_bb = tgt_edge->dest;
1753 if (!tgt_bb)
1754 tgt_bb = atgt_bb;
1755 /* Two targets of threading would make us create loop
1756 with multiple entries. */
1757 else if (tgt_bb != atgt_bb)
1758 goto fail;
1761 if (!tgt_bb)
1763 /* There are no threading requests. */
1764 return false;
1767 /* Redirecting to empty loop latch is useless. */
1768 if (tgt_bb == loop->latch
1769 && empty_block_p (loop->latch))
1770 goto fail;
1773 /* The target block must dominate the loop latch, otherwise we would be
1774 creating a subloop. */
1775 domst = determine_bb_domination_status (loop, tgt_bb);
1776 if (domst == DOMST_NONDOMINATING)
1777 goto fail;
1778 if (domst == DOMST_LOOP_BROKEN)
1780 /* If the loop ceased to exist, mark it as such, and thread through its
1781 original header. */
1782 mark_loop_for_removal (loop);
1783 return thread_block (header, false);
1786 if (tgt_bb->loop_father->header == tgt_bb)
1788 /* If the target of the threading is a header of a subloop, we need
1789 to create a preheader for it, so that the headers of the two loops
1790 do not merge. */
1791 if (EDGE_COUNT (tgt_bb->preds) > 2)
1793 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1794 gcc_assert (tgt_bb != NULL);
1796 else
1797 tgt_bb = split_edge (tgt_edge);
1800 basic_block new_preheader;
1802 /* Now consider the case entry edges are redirected to the new entry
1803 block. Remember one entry edge, so that we can find the new
1804 preheader (its destination after threading). */
1805 FOR_EACH_EDGE (e, ei, header->preds)
1807 if (e->aux)
1808 break;
1811 /* The duplicate of the header is the new preheader of the loop. Ensure
1812 that it is placed correctly in the loop hierarchy. */
1813 set_loop_copy (loop, loop_outer (loop));
1815 thread_block (header, false);
1816 set_loop_copy (loop, NULL);
1817 new_preheader = e->dest;
1819 /* Create the new latch block. This is always necessary, as the latch
1820 must have only a single successor, but the original header had at
1821 least two successors. */
1822 loop->latch = NULL;
1823 mfb_kj_edge = single_succ_edge (new_preheader);
1824 loop->header = mfb_kj_edge->dest;
1825 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1826 loop->header = latch->dest;
1827 loop->latch = latch->src;
1828 return true;
1830 fail:
1831 /* We failed to thread anything. Cancel the requests. */
1832 FOR_EACH_EDGE (e, ei, header->preds)
1834 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1836 if (path)
1838 delete_jump_thread_path (path);
1839 e->aux = NULL;
1842 return false;
1845 /* E1 and E2 are edges into the same basic block. Return TRUE if the
1846 PHI arguments associated with those edges are equal or there are no
1847 PHI arguments, otherwise return FALSE. */
1849 static bool
1850 phi_args_equal_on_edges (edge e1, edge e2)
1852 gphi_iterator gsi;
1853 int indx1 = e1->dest_idx;
1854 int indx2 = e2->dest_idx;
1856 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1858 gphi *phi = gsi.phi ();
1860 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1861 gimple_phi_arg_def (phi, indx2), 0))
1862 return false;
1864 return true;
1867 /* Walk through the registered jump threads and convert them into a
1868 form convenient for this pass.
1870 Any block which has incoming edges threaded to outgoing edges
1871 will have its entry in THREADED_BLOCK set.
1873 Any threaded edge will have its new outgoing edge stored in the
1874 original edge's AUX field.
1876 This form avoids the need to walk all the edges in the CFG to
1877 discover blocks which need processing and avoids unnecessary
1878 hash table lookups to map from threaded edge to new target. */
1880 static void
1881 mark_threaded_blocks (bitmap threaded_blocks)
1883 unsigned int i;
1884 bitmap_iterator bi;
1885 auto_bitmap tmp;
1886 basic_block bb;
1887 edge e;
1888 edge_iterator ei;
1890 /* It is possible to have jump threads in which one is a subpath
1891 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1892 block and (B, C), (C, D) where no joiner block exists.
1894 When this occurs ignore the jump thread request with the joiner
1895 block. It's totally subsumed by the simpler jump thread request.
1897 This results in less block copying, simpler CFGs. More importantly,
1898 when we duplicate the joiner block, B, in this case we will create
1899 a new threading opportunity that we wouldn't be able to optimize
1900 until the next jump threading iteration.
1902 So first convert the jump thread requests which do not require a
1903 joiner block. */
1904 for (i = 0; i < paths.length (); i++)
1906 vec<jump_thread_edge *> *path = paths[i];
1908 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1910 edge e = (*path)[0]->e;
1911 e->aux = (void *)path;
1912 bitmap_set_bit (tmp, e->dest->index);
1916 /* Now iterate again, converting cases where we want to thread
1917 through a joiner block, but only if no other edge on the path
1918 already has a jump thread attached to it. We do this in two passes,
1919 to avoid situations where the order in the paths vec can hide overlapping
1920 threads (the path is recorded on the incoming edge, so we would miss
1921 cases where the second path starts at a downstream edge on the same
1922 path). First record all joiner paths, deleting any in the unexpected
1923 case where there is already a path for that incoming edge. */
1924 for (i = 0; i < paths.length ();)
1926 vec<jump_thread_edge *> *path = paths[i];
1928 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1930 /* Attach the path to the starting edge if none is yet recorded. */
1931 if ((*path)[0]->e->aux == NULL)
1933 (*path)[0]->e->aux = path;
1934 i++;
1936 else
1938 paths.unordered_remove (i);
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 dump_jump_thread_path (dump_file, *path, false);
1941 delete_jump_thread_path (path);
1944 else
1946 i++;
1950 /* Second, look for paths that have any other jump thread attached to
1951 them, and either finish converting them or cancel them. */
1952 for (i = 0; i < paths.length ();)
1954 vec<jump_thread_edge *> *path = paths[i];
1955 edge e = (*path)[0]->e;
1957 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
1959 unsigned int j;
1960 for (j = 1; j < path->length (); j++)
1961 if ((*path)[j]->e->aux != NULL)
1962 break;
1964 /* If we iterated through the entire path without exiting the loop,
1965 then we are good to go, record it. */
1966 if (j == path->length ())
1968 bitmap_set_bit (tmp, e->dest->index);
1969 i++;
1971 else
1973 e->aux = NULL;
1974 paths.unordered_remove (i);
1975 if (dump_file && (dump_flags & TDF_DETAILS))
1976 dump_jump_thread_path (dump_file, *path, false);
1977 delete_jump_thread_path (path);
1980 else
1982 i++;
1986 /* If optimizing for size, only thread through block if we don't have
1987 to duplicate it or it's an otherwise empty redirection block. */
1988 if (optimize_function_for_size_p (cfun))
1990 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
1992 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1993 if (EDGE_COUNT (bb->preds) > 1
1994 && !redirection_block_p (bb))
1996 FOR_EACH_EDGE (e, ei, bb->preds)
1998 if (e->aux)
2000 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2001 delete_jump_thread_path (path);
2002 e->aux = NULL;
2006 else
2007 bitmap_set_bit (threaded_blocks, i);
2010 else
2011 bitmap_copy (threaded_blocks, tmp);
2013 /* If we have a joiner block (J) which has two successors S1 and S2 and
2014 we are threading though S1 and the final destination of the thread
2015 is S2, then we must verify that any PHI nodes in S2 have the same
2016 PHI arguments for the edge J->S2 and J->S1->...->S2.
2018 We used to detect this prior to registering the jump thread, but
2019 that prohibits propagation of edge equivalences into non-dominated
2020 PHI nodes as the equivalency test might occur before propagation.
2022 This must also occur after we truncate any jump threading paths
2023 as this scenario may only show up after truncation.
2025 This works for now, but will need improvement as part of the FSA
2026 optimization.
2028 Note since we've moved the thread request data to the edges,
2029 we have to iterate on those rather than the threaded_edges vector. */
2030 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2032 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2033 FOR_EACH_EDGE (e, ei, bb->preds)
2035 if (e->aux)
2037 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2038 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2040 if (have_joiner)
2042 basic_block joiner = e->dest;
2043 edge final_edge = path->last ()->e;
2044 basic_block final_dest = final_edge->dest;
2045 edge e2 = find_edge (joiner, final_dest);
2047 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2049 delete_jump_thread_path (path);
2050 e->aux = NULL;
2057 /* Look for jump threading paths which cross multiple loop headers.
2059 The code to thread through loop headers will change the CFG in ways
2060 that invalidate the cached loop iteration information. So we must
2061 detect that case and wipe the cached information. */
2062 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2064 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2065 FOR_EACH_EDGE (e, ei, bb->preds)
2067 if (e->aux)
2069 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2071 for (unsigned int i = 0, crossed_headers = 0;
2072 i < path->length ();
2073 i++)
2075 basic_block dest = (*path)[i]->e->dest;
2076 basic_block src = (*path)[i]->e->src;
2077 /* If we enter a loop. */
2078 if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2079 ++crossed_headers;
2080 /* If we step from a block outside an irreducible region
2081 to a block inside an irreducible region, then we have
2082 crossed into a loop. */
2083 else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2084 && (dest->flags & BB_IRREDUCIBLE_LOOP))
2085 ++crossed_headers;
2086 if (crossed_headers > 1)
2088 vect_free_loop_info_assumptions
2089 ((*path)[path->length () - 1]->e->dest->loop_father);
2090 break;
2099 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2100 case of SEME Single Entry Multiple Exits region in which all nodes in the
2101 REGION have exactly one incoming edge. The only exception is the first block
2102 that may not have been connected to the rest of the cfg yet. */
2104 DEBUG_FUNCTION void
2105 verify_jump_thread (basic_block *region, unsigned n_region)
2107 for (unsigned i = 0; i < n_region; i++)
2108 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2111 /* Return true when BB is one of the first N items in BBS. */
2113 static inline bool
2114 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2116 for (int i = 0; i < n; i++)
2117 if (bb == bbs[i])
2118 return true;
2120 return false;
2123 /* Duplicates a jump-thread path of N_REGION basic blocks.
2124 The ENTRY edge is redirected to the duplicate of the region.
2126 Remove the last conditional statement in the last basic block in the REGION,
2127 and create a single fallthru edge pointing to the same destination as the
2128 EXIT edge.
2130 Returns false if it is unable to copy the region, true otherwise. */
2132 static bool
2133 duplicate_thread_path (edge entry, edge exit, basic_block *region,
2134 unsigned n_region)
2136 unsigned i;
2137 struct loop *loop = entry->dest->loop_father;
2138 edge exit_copy;
2139 edge redirected;
2140 int curr_freq;
2141 profile_count curr_count;
2143 if (!can_copy_bbs_p (region, n_region))
2144 return false;
2146 /* Some sanity checking. Note that we do not check for all possible
2147 missuses of the functions. I.e. if you ask to copy something weird,
2148 it will work, but the state of structures probably will not be
2149 correct. */
2150 for (i = 0; i < n_region; i++)
2152 /* We do not handle subloops, i.e. all the blocks must belong to the
2153 same loop. */
2154 if (region[i]->loop_father != loop)
2155 return false;
2158 initialize_original_copy_tables ();
2160 set_loop_copy (loop, loop);
2162 basic_block *region_copy = XNEWVEC (basic_block, n_region);
2163 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2164 split_edge_bb_loc (entry), false);
2166 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2167 following code ensures that all the edges exiting the jump-thread path are
2168 redirected back to the original code: these edges are exceptions
2169 invalidating the property that is propagated by executing all the blocks of
2170 the jump-thread path in order. */
2172 curr_count = entry->count ();
2173 curr_freq = EDGE_FREQUENCY (entry);
2175 for (i = 0; i < n_region; i++)
2177 edge e;
2178 edge_iterator ei;
2179 basic_block bb = region_copy[i];
2181 /* Watch inconsistent profile. */
2182 if (curr_count > region[i]->count)
2183 curr_count = region[i]->count;
2184 if (curr_freq > region[i]->frequency)
2185 curr_freq = region[i]->frequency;
2186 /* Scale current BB. */
2187 if (region[i]->count > 0 && curr_count.initialized_p ())
2189 /* In the middle of the path we only scale the frequencies.
2190 In last BB we need to update probabilities of outgoing edges
2191 because we know which one is taken at the threaded path. */
2192 if (i + 1 != n_region)
2193 scale_bbs_frequencies_profile_count (region + i, 1,
2194 region[i]->count - curr_count,
2195 region[i]->count);
2196 else
2197 update_bb_profile_for_threading (region[i],
2198 curr_freq, curr_count,
2199 exit);
2200 scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2201 region_copy[i]->count);
2203 else if (region[i]->frequency)
2205 if (i + 1 != n_region)
2206 scale_bbs_frequencies_int (region + i, 1,
2207 region[i]->frequency - curr_freq,
2208 region[i]->frequency);
2209 else
2210 update_bb_profile_for_threading (region[i],
2211 curr_freq, curr_count,
2212 exit);
2213 scale_bbs_frequencies_int (region_copy + i, 1, curr_freq,
2214 region_copy[i]->frequency);
2217 if (single_succ_p (bb))
2219 /* Make sure the successor is the next node in the path. */
2220 gcc_assert (i + 1 == n_region
2221 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2222 if (i + 1 != n_region)
2224 curr_freq = EDGE_FREQUENCY (single_succ_edge (bb));
2225 curr_count = single_succ_edge (bb)->count ();
2227 continue;
2230 /* Special case the last block on the path: make sure that it does not
2231 jump back on the copied path, including back to itself. */
2232 if (i + 1 == n_region)
2234 FOR_EACH_EDGE (e, ei, bb->succs)
2235 if (bb_in_bbs (e->dest, region_copy, n_region))
2237 basic_block orig = get_bb_original (e->dest);
2238 if (orig)
2239 redirect_edge_and_branch_force (e, orig);
2241 continue;
2244 /* Redirect all other edges jumping to non-adjacent blocks back to the
2245 original code. */
2246 FOR_EACH_EDGE (e, ei, bb->succs)
2247 if (region_copy[i + 1] != e->dest)
2249 basic_block orig = get_bb_original (e->dest);
2250 if (orig)
2251 redirect_edge_and_branch_force (e, orig);
2253 else
2255 curr_freq = EDGE_FREQUENCY (e);
2256 curr_count = e->count ();
2261 if (flag_checking)
2262 verify_jump_thread (region_copy, n_region);
2264 /* Remove the last branch in the jump thread path. */
2265 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2267 /* And fixup the flags on the single remaining edge. */
2268 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2269 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2270 fix_e->flags |= EDGE_FALLTHRU;
2272 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2274 if (e)
2276 rescan_loop_exit (e, true, false);
2277 e->probability = profile_probability::always ();
2280 /* Redirect the entry and add the phi node arguments. */
2281 if (entry->dest == loop->header)
2282 mark_loop_for_removal (loop);
2283 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2284 gcc_assert (redirected != NULL);
2285 flush_pending_stmts (entry);
2287 /* Add the other PHI node arguments. */
2288 add_phi_args_after_copy (region_copy, n_region, NULL);
2290 free (region_copy);
2292 free_original_copy_tables ();
2293 return true;
2296 /* Return true when PATH is a valid jump-thread path. */
2298 static bool
2299 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2301 unsigned len = path->length ();
2303 /* Check that the path is connected. */
2304 for (unsigned int j = 0; j < len - 1; j++)
2306 edge e = (*path)[j]->e;
2307 if (e->dest != (*path)[j+1]->e->src)
2308 return false;
2310 return true;
2313 /* Remove any queued jump threads that include edge E.
2315 We don't actually remove them here, just record the edges into ax
2316 hash table. That way we can do the search once per iteration of
2317 DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2319 void
2320 remove_jump_threads_including (edge_def *e)
2322 if (!paths.exists ())
2323 return;
2325 if (!removed_edges)
2326 removed_edges = new hash_table<struct removed_edges> (17);
2328 edge *slot = removed_edges->find_slot (e, INSERT);
2329 *slot = e;
2332 /* Walk through all blocks and thread incoming edges to the appropriate
2333 outgoing edge for each edge pair recorded in THREADED_EDGES.
2335 It is the caller's responsibility to fix the dominance information
2336 and rewrite duplicated SSA_NAMEs back into SSA form.
2338 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2339 loop headers if it does not simplify the loop.
2341 Returns true if one or more edges were threaded, false otherwise. */
2343 bool
2344 thread_through_all_blocks (bool may_peel_loop_headers)
2346 bool retval = false;
2347 unsigned int i;
2348 bitmap_iterator bi;
2349 struct loop *loop;
2350 auto_bitmap threaded_blocks;
2352 if (!paths.exists ())
2354 retval = false;
2355 goto out;
2358 memset (&thread_stats, 0, sizeof (thread_stats));
2360 /* Remove any paths that referenced removed edges. */
2361 if (removed_edges)
2362 for (i = 0; i < paths.length (); )
2364 unsigned int j;
2365 vec<jump_thread_edge *> *path = paths[i];
2367 for (j = 0; j < path->length (); j++)
2369 edge e = (*path)[j]->e;
2370 if (removed_edges->find_slot (e, NO_INSERT))
2371 break;
2374 if (j != path->length ())
2376 delete_jump_thread_path (path);
2377 paths.unordered_remove (i);
2378 continue;
2380 i++;
2383 /* Jump-thread all FSM threads before other jump-threads. */
2384 for (i = 0; i < paths.length ();)
2386 vec<jump_thread_edge *> *path = paths[i];
2387 edge entry = (*path)[0]->e;
2389 /* Only code-generate FSM jump-threads in this loop. */
2390 if ((*path)[0]->type != EDGE_FSM_THREAD)
2392 i++;
2393 continue;
2396 /* Do not jump-thread twice from the same block. */
2397 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2398 /* We may not want to realize this jump thread path
2399 for various reasons. So check it first. */
2400 || !valid_jump_thread_path (path))
2402 /* Remove invalid FSM jump-thread paths. */
2403 delete_jump_thread_path (path);
2404 paths.unordered_remove (i);
2405 continue;
2408 unsigned len = path->length ();
2409 edge exit = (*path)[len - 1]->e;
2410 basic_block *region = XNEWVEC (basic_block, len - 1);
2412 for (unsigned int j = 0; j < len - 1; j++)
2413 region[j] = (*path)[j]->e->dest;
2415 if (duplicate_thread_path (entry, exit, region, len - 1))
2417 /* We do not update dominance info. */
2418 free_dominance_info (CDI_DOMINATORS);
2419 bitmap_set_bit (threaded_blocks, entry->src->index);
2420 retval = true;
2421 thread_stats.num_threaded_edges++;
2424 delete_jump_thread_path (path);
2425 paths.unordered_remove (i);
2426 free (region);
2429 /* Remove from PATHS all the jump-threads starting with an edge already
2430 jump-threaded. */
2431 for (i = 0; i < paths.length ();)
2433 vec<jump_thread_edge *> *path = paths[i];
2434 edge entry = (*path)[0]->e;
2436 /* Do not jump-thread twice from the same block. */
2437 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2439 delete_jump_thread_path (path);
2440 paths.unordered_remove (i);
2442 else
2443 i++;
2446 bitmap_clear (threaded_blocks);
2448 mark_threaded_blocks (threaded_blocks);
2450 initialize_original_copy_tables ();
2452 /* First perform the threading requests that do not affect
2453 loop structure. */
2454 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2456 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2458 if (EDGE_COUNT (bb->preds) > 0)
2459 retval |= thread_block (bb, true);
2462 /* Then perform the threading through loop headers. We start with the
2463 innermost loop, so that the changes in cfg we perform won't affect
2464 further threading. */
2465 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2467 if (!loop->header
2468 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2469 continue;
2471 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2474 /* All jump threading paths should have been resolved at this
2475 point. Verify that is the case. */
2476 basic_block bb;
2477 FOR_EACH_BB_FN (bb, cfun)
2479 edge_iterator ei;
2480 edge e;
2481 FOR_EACH_EDGE (e, ei, bb->preds)
2482 gcc_assert (e->aux == NULL);
2485 statistics_counter_event (cfun, "Jumps threaded",
2486 thread_stats.num_threaded_edges);
2488 free_original_copy_tables ();
2490 paths.release ();
2492 if (retval)
2493 loops_state_set (LOOPS_NEED_FIXUP);
2495 out:
2496 delete removed_edges;
2497 removed_edges = NULL;
2498 return retval;
2501 /* Delete the jump threading path PATH. We have to explicitly delete
2502 each entry in the vector, then the container. */
2504 void
2505 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2507 for (unsigned int i = 0; i < path->length (); i++)
2508 delete (*path)[i];
2509 path->release();
2510 delete path;
2513 /* Register a jump threading opportunity. We queue up all the jump
2514 threading opportunities discovered by a pass and update the CFG
2515 and SSA form all at once.
2517 E is the edge we can thread, E2 is the new target edge, i.e., we
2518 are effectively recording that E->dest can be changed to E2->dest
2519 after fixing the SSA graph. */
2521 void
2522 register_jump_thread (vec<jump_thread_edge *> *path)
2524 if (!dbg_cnt (registered_jump_thread))
2526 delete_jump_thread_path (path);
2527 return;
2530 /* First make sure there are no NULL outgoing edges on the jump threading
2531 path. That can happen for jumping to a constant address. */
2532 for (unsigned int i = 0; i < path->length (); i++)
2534 if ((*path)[i]->e == NULL)
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2538 fprintf (dump_file,
2539 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2540 dump_jump_thread_path (dump_file, *path, false);
2543 delete_jump_thread_path (path);
2544 return;
2547 /* Only the FSM threader is allowed to thread across
2548 backedges in the CFG. */
2549 if (flag_checking
2550 && (*path)[0]->type != EDGE_FSM_THREAD)
2551 gcc_assert (((*path)[i]->e->flags & EDGE_DFS_BACK) == 0);
2554 if (dump_file && (dump_flags & TDF_DETAILS))
2555 dump_jump_thread_path (dump_file, *path, true);
2557 if (!paths.exists ())
2558 paths.create (5);
2560 paths.safe_push (path);