2015-09-25 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blob6f215293a7ba7f87d7af999a40b0e4b8a664f18a
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "hard-reg-set.h"
29 #include "ssa.h"
30 #include "options.h"
31 #include "fold-const.h"
32 #include "flags.h"
33 #include "cfganal.h"
34 #include "internal-fn.h"
35 #include "gimple-iterator.h"
36 #include "tree-ssa.h"
37 #include "tree-ssa-threadupdate.h"
38 #include "dumpfile.h"
39 #include "cfgloop.h"
40 #include "dbgcnt.h"
41 #include "tree-cfg.h"
42 #include "tree-pass.h"
44 /* Given a block B, update the CFG and SSA graph to reflect redirecting
45 one or more in-edges to B to instead reach the destination of an
46 out-edge from B while preserving any side effects in B.
48 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
49 side effects of executing B.
51 1. Make a copy of B (including its outgoing edges and statements). Call
52 the copy B'. Note B' has no incoming edges or PHIs at this time.
54 2. Remove the control statement at the end of B' and all outgoing edges
55 except B'->C.
57 3. Add a new argument to each PHI in C with the same value as the existing
58 argument associated with edge B->C. Associate the new PHI arguments
59 with the edge B'->C.
61 4. For each PHI in B, find or create a PHI in B' with an identical
62 PHI_RESULT. Add an argument to the PHI in B' which has the same
63 value as the PHI in B associated with the edge A->B. Associate
64 the new argument in the PHI in B' with the edge A->B.
66 5. Change the edge A->B to A->B'.
68 5a. This automatically deletes any PHI arguments associated with the
69 edge A->B in B.
71 5b. This automatically associates each new argument added in step 4
72 with the edge A->B'.
74 6. Repeat for other incoming edges into B.
76 7. Put the duplicated resources in B and all the B' blocks into SSA form.
78 Note that block duplication can be minimized by first collecting the
79 set of unique destination blocks that the incoming edges should
80 be threaded to.
82 We reduce the number of edges and statements we create by not copying all
83 the outgoing edges and the control statement in step #1. We instead create
84 a template block without the outgoing edges and duplicate the template.
86 Another case this code handles is threading through a "joiner" block. In
87 this case, we do not know the destination of the joiner block, but one
88 of the outgoing edges from the joiner block leads to a threadable path. This
89 case largely works as outlined above, except the duplicate of the joiner
90 block still contains a full set of outgoing edges and its control statement.
91 We just redirect one of its outgoing edges to our jump threading path. */
94 /* Steps #5 and #6 of the above algorithm are best implemented by walking
95 all the incoming edges which thread to the same destination edge at
96 the same time. That avoids lots of table lookups to get information
97 for the destination edge.
99 To realize that implementation we create a list of incoming edges
100 which thread to the same outgoing edge. Thus to implement steps
101 #5 and #6 we traverse our hash table of outgoing edge information.
102 For each entry we walk the list of incoming edges which thread to
103 the current outgoing edge. */
105 struct el
107 edge e;
108 struct el *next;
111 /* Main data structure recording information regarding B's duplicate
112 blocks. */
114 /* We need to efficiently record the unique thread destinations of this
115 block and specific information associated with those destinations. We
116 may have many incoming edges threaded to the same outgoing edge. This
117 can be naturally implemented with a hash table. */
119 struct redirection_data : free_ptr_hash<redirection_data>
121 /* We support wiring up two block duplicates in a jump threading path.
123 One is a normal block copy where we remove the control statement
124 and wire up its single remaining outgoing edge to the thread path.
126 The other is a joiner block where we leave the control statement
127 in place, but wire one of the outgoing edges to a thread path.
129 In theory we could have multiple block duplicates in a jump
130 threading path, but I haven't tried that.
132 The duplicate blocks appear in this array in the same order in
133 which they appear in the jump thread path. */
134 basic_block dup_blocks[2];
136 /* The jump threading path. */
137 vec<jump_thread_edge *> *path;
139 /* A list of incoming edges which we want to thread to the
140 same path. */
141 struct el *incoming_edges;
143 /* hash_table support. */
144 static inline hashval_t hash (const redirection_data *);
145 static inline int equal (const redirection_data *, const redirection_data *);
148 /* Dump a jump threading path, including annotations about each
149 edge in the path. */
151 static void
152 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
153 bool registering)
155 fprintf (dump_file,
156 " %s%s jump thread: (%d, %d) incoming edge; ",
157 (registering ? "Registering" : "Cancelling"),
158 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
159 path[0]->e->src->index, path[0]->e->dest->index);
161 for (unsigned int i = 1; i < path.length (); i++)
163 /* We can get paths with a NULL edge when the final destination
164 of a jump thread turns out to be a constant address. We dump
165 those paths when debugging, so we have to be prepared for that
166 possibility here. */
167 if (path[i]->e == NULL)
168 continue;
170 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
171 fprintf (dump_file, " (%d, %d) joiner; ",
172 path[i]->e->src->index, path[i]->e->dest->index);
173 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
174 fprintf (dump_file, " (%d, %d) normal;",
175 path[i]->e->src->index, path[i]->e->dest->index);
176 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
177 fprintf (dump_file, " (%d, %d) nocopy;",
178 path[i]->e->src->index, path[i]->e->dest->index);
179 if (path[0]->type == EDGE_FSM_THREAD)
180 fprintf (dump_file, " (%d, %d) ",
181 path[i]->e->src->index, path[i]->e->dest->index);
183 fputc ('\n', dump_file);
186 /* Simple hashing function. For any given incoming edge E, we're going
187 to be most concerned with the final destination of its jump thread
188 path. So hash on the block index of the final edge in the path. */
190 inline hashval_t
191 redirection_data::hash (const redirection_data *p)
193 vec<jump_thread_edge *> *path = p->path;
194 return path->last ()->e->dest->index;
197 /* Given two hash table entries, return true if they have the same
198 jump threading path. */
199 inline int
200 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
202 vec<jump_thread_edge *> *path1 = p1->path;
203 vec<jump_thread_edge *> *path2 = p2->path;
205 if (path1->length () != path2->length ())
206 return false;
208 for (unsigned int i = 1; i < path1->length (); i++)
210 if ((*path1)[i]->type != (*path2)[i]->type
211 || (*path1)[i]->e != (*path2)[i]->e)
212 return false;
215 return true;
218 /* Data structure of information to pass to hash table traversal routines. */
219 struct ssa_local_info_t
221 /* The current block we are working on. */
222 basic_block bb;
224 /* We only create a template block for the first duplicated block in a
225 jump threading path as we may need many duplicates of that block.
227 The second duplicate block in a path is specific to that path. Creating
228 and sharing a template for that block is considerably more difficult. */
229 basic_block template_block;
231 /* TRUE if we thread one or more jumps, FALSE otherwise. */
232 bool jumps_threaded;
234 /* Blocks duplicated for the thread. */
235 bitmap duplicate_blocks;
238 /* Passes which use the jump threading code register jump threading
239 opportunities as they are discovered. We keep the registered
240 jump threading opportunities in this vector as edge pairs
241 (original_edge, target_edge). */
242 static vec<vec<jump_thread_edge *> *> paths;
244 /* When we start updating the CFG for threading, data necessary for jump
245 threading is attached to the AUX field for the incoming edge. Use these
246 macros to access the underlying structure attached to the AUX field. */
247 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
249 /* Jump threading statistics. */
251 struct thread_stats_d
253 unsigned long num_threaded_edges;
256 struct thread_stats_d thread_stats;
259 /* Remove the last statement in block BB if it is a control statement
260 Also remove all outgoing edges except the edge which reaches DEST_BB.
261 If DEST_BB is NULL, then remove all outgoing edges. */
263 static void
264 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
266 gimple_stmt_iterator gsi;
267 edge e;
268 edge_iterator ei;
270 gsi = gsi_last_bb (bb);
272 /* If the duplicate ends with a control statement, then remove it.
274 Note that if we are duplicating the template block rather than the
275 original basic block, then the duplicate might not have any real
276 statements in it. */
277 if (!gsi_end_p (gsi)
278 && gsi_stmt (gsi)
279 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
280 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
281 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
282 gsi_remove (&gsi, true);
284 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
286 if (e->dest != dest_bb)
287 remove_edge (e);
288 else
289 ei_next (&ei);
293 /* Create a duplicate of BB. Record the duplicate block in an array
294 indexed by COUNT stored in RD. */
296 static void
297 create_block_for_threading (basic_block bb,
298 struct redirection_data *rd,
299 unsigned int count,
300 bitmap *duplicate_blocks)
302 edge_iterator ei;
303 edge e;
305 /* We can use the generic block duplication code and simply remove
306 the stuff we do not need. */
307 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
309 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
310 e->aux = NULL;
312 /* Zero out the profile, since the block is unreachable for now. */
313 rd->dup_blocks[count]->frequency = 0;
314 rd->dup_blocks[count]->count = 0;
315 if (duplicate_blocks)
316 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
319 /* Main data structure to hold information for duplicates of BB. */
321 static hash_table<redirection_data> *redirection_data;
323 /* Given an outgoing edge E lookup and return its entry in our hash table.
325 If INSERT is true, then we insert the entry into the hash table if
326 it is not already present. INCOMING_EDGE is added to the list of incoming
327 edges associated with E in the hash table. */
329 static struct redirection_data *
330 lookup_redirection_data (edge e, enum insert_option insert)
332 struct redirection_data **slot;
333 struct redirection_data *elt;
334 vec<jump_thread_edge *> *path = THREAD_PATH (e);
336 /* Build a hash table element so we can see if E is already
337 in the table. */
338 elt = XNEW (struct redirection_data);
339 elt->path = path;
340 elt->dup_blocks[0] = NULL;
341 elt->dup_blocks[1] = NULL;
342 elt->incoming_edges = NULL;
344 slot = redirection_data->find_slot (elt, insert);
346 /* This will only happen if INSERT is false and the entry is not
347 in the hash table. */
348 if (slot == NULL)
350 free (elt);
351 return NULL;
354 /* This will only happen if E was not in the hash table and
355 INSERT is true. */
356 if (*slot == NULL)
358 *slot = elt;
359 elt->incoming_edges = XNEW (struct el);
360 elt->incoming_edges->e = e;
361 elt->incoming_edges->next = NULL;
362 return elt;
364 /* E was in the hash table. */
365 else
367 /* Free ELT as we do not need it anymore, we will extract the
368 relevant entry from the hash table itself. */
369 free (elt);
371 /* Get the entry stored in the hash table. */
372 elt = *slot;
374 /* If insertion was requested, then we need to add INCOMING_EDGE
375 to the list of incoming edges associated with E. */
376 if (insert)
378 struct el *el = XNEW (struct el);
379 el->next = elt->incoming_edges;
380 el->e = e;
381 elt->incoming_edges = el;
384 return elt;
388 /* Similar to copy_phi_args, except that the PHI arg exists, it just
389 does not have a value associated with it. */
391 static void
392 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
394 int src_idx = src_e->dest_idx;
395 int tgt_idx = tgt_e->dest_idx;
397 /* Iterate over each PHI in e->dest. */
398 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
399 gsi2 = gsi_start_phis (tgt_e->dest);
400 !gsi_end_p (gsi);
401 gsi_next (&gsi), gsi_next (&gsi2))
403 gphi *src_phi = gsi.phi ();
404 gphi *dest_phi = gsi2.phi ();
405 tree val = gimple_phi_arg_def (src_phi, src_idx);
406 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
408 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
409 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
413 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
414 to see if it has constant value in a flow sensitive manner. Set
415 LOCUS to location of the constant phi arg and return the value.
416 Return DEF directly if either PATH or idx is ZERO. */
418 static tree
419 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
420 basic_block bb, int idx, source_location *locus)
422 tree arg;
423 gphi *def_phi;
424 basic_block def_bb;
426 if (path == NULL || idx == 0)
427 return def;
429 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
430 if (!def_phi)
431 return def;
433 def_bb = gimple_bb (def_phi);
434 /* Don't propagate loop invariants into deeper loops. */
435 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
436 return def;
438 /* Backtrack jump threading path from IDX to see if def has constant
439 value. */
440 for (int j = idx - 1; j >= 0; j--)
442 edge e = (*path)[j]->e;
443 if (e->dest == def_bb)
445 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
446 if (is_gimple_min_invariant (arg))
448 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
449 return arg;
451 break;
455 return def;
458 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
459 Try to backtrack jump threading PATH from node IDX to see if the arg
460 has constant value, copy constant value instead of argument itself
461 if yes. */
463 static void
464 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
465 vec<jump_thread_edge *> *path, int idx)
467 gphi_iterator gsi;
468 int src_indx = src_e->dest_idx;
470 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
472 gphi *phi = gsi.phi ();
473 tree def = gimple_phi_arg_def (phi, src_indx);
474 source_location locus = gimple_phi_arg_location (phi, src_indx);
476 if (TREE_CODE (def) == SSA_NAME
477 && !virtual_operand_p (gimple_phi_result (phi)))
478 def = get_value_locus_in_path (def, path, bb, idx, &locus);
480 add_phi_arg (phi, def, tgt_e, locus);
484 /* We have recently made a copy of ORIG_BB, including its outgoing
485 edges. The copy is NEW_BB. Every PHI node in every direct successor of
486 ORIG_BB has a new argument associated with edge from NEW_BB to the
487 successor. Initialize the PHI argument so that it is equal to the PHI
488 argument associated with the edge from ORIG_BB to the successor.
489 PATH and IDX are used to check if the new PHI argument has constant
490 value in a flow sensitive manner. */
492 static void
493 update_destination_phis (basic_block orig_bb, basic_block new_bb,
494 vec<jump_thread_edge *> *path, int idx)
496 edge_iterator ei;
497 edge e;
499 FOR_EACH_EDGE (e, ei, orig_bb->succs)
501 edge e2 = find_edge (new_bb, e->dest);
502 copy_phi_args (e->dest, e, e2, path, idx);
506 /* Given a duplicate block and its single destination (both stored
507 in RD). Create an edge between the duplicate and its single
508 destination.
510 Add an additional argument to any PHI nodes at the single
511 destination. IDX is the start node in jump threading path
512 we start to check to see if the new PHI argument has constant
513 value along the jump threading path. */
515 static void
516 create_edge_and_update_destination_phis (struct redirection_data *rd,
517 basic_block bb, int idx)
519 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
521 rescan_loop_exit (e, true, false);
522 e->probability = REG_BR_PROB_BASE;
523 e->count = bb->count;
525 /* We used to copy the thread path here. That was added in 2007
526 and dutifully updated through the representation changes in 2013.
528 In 2013 we added code to thread from an interior node through
529 the backedge to another interior node. That runs after the code
530 to thread through loop headers from outside the loop.
532 The latter may delete edges in the CFG, including those
533 which appeared in the jump threading path we copied here. Thus
534 we'd end up using a dangling pointer.
536 After reviewing the 2007/2011 code, I can't see how anything
537 depended on copying the AUX field and clearly copying the jump
538 threading path is problematical due to embedded edge pointers.
539 It has been removed. */
540 e->aux = NULL;
542 /* If there are any PHI nodes at the destination of the outgoing edge
543 from the duplicate block, then we will need to add a new argument
544 to them. The argument should have the same value as the argument
545 associated with the outgoing edge stored in RD. */
546 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
549 /* Look through PATH beginning at START and return TRUE if there are
550 any additional blocks that need to be duplicated. Otherwise,
551 return FALSE. */
552 static bool
553 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
554 unsigned int start)
556 for (unsigned int i = start + 1; i < path->length (); i++)
558 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
559 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
560 return true;
562 return false;
566 /* Compute the amount of profile count/frequency coming into the jump threading
567 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
568 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
569 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
570 identify blocks duplicated for jump threading, which have duplicated
571 edges that need to be ignored in the analysis. Return true if path contains
572 a joiner, false otherwise.
574 In the non-joiner case, this is straightforward - all the counts/frequency
575 flowing into the jump threading path should flow through the duplicated
576 block and out of the duplicated path.
578 In the joiner case, it is very tricky. Some of the counts flowing into
579 the original path go offpath at the joiner. The problem is that while
580 we know how much total count goes off-path in the original control flow,
581 we don't know how many of the counts corresponding to just the jump
582 threading path go offpath at the joiner.
584 For example, assume we have the following control flow and identified
585 jump threading paths:
587 A B C
588 \ | /
589 Ea \ |Eb / Ec
590 \ | /
591 v v v
592 J <-- Joiner
594 Eoff/ \Eon
597 Soff Son <--- Normal
599 Ed/ \ Ee
604 Jump threading paths: A -> J -> Son -> D (path 1)
605 C -> J -> Son -> E (path 2)
607 Note that the control flow could be more complicated:
608 - Each jump threading path may have more than one incoming edge. I.e. A and
609 Ea could represent multiple incoming blocks/edges that are included in
610 path 1.
611 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
612 before or after the "normal" copy block). These are not duplicated onto
613 the jump threading path, as they are single-successor.
614 - Any of the blocks along the path may have other incoming edges that
615 are not part of any jump threading path, but add profile counts along
616 the path.
618 In the aboe example, after all jump threading is complete, we will
619 end up with the following control flow:
621 A B C
622 | | |
623 Ea| |Eb |Ec
624 | | |
625 v v v
626 Ja J Jc
627 / \ / \Eon' / \
628 Eona/ \ ---/---\-------- \Eonc
629 / \ / / \ \
630 v v v v v
631 Sona Soff Son Sonc
632 \ /\ /
633 \___________ / \ _____/
634 \ / \/
635 vv v
638 The main issue to notice here is that when we are processing path 1
639 (A->J->Son->D) we need to figure out the outgoing edge weights to
640 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
641 sum of the incoming weights to D remain Ed. The problem with simply
642 assuming that Ja (and Jc when processing path 2) has the same outgoing
643 probabilities to its successors as the original block J, is that after
644 all paths are processed and other edges/counts removed (e.g. none
645 of Ec will reach D after processing path 2), we may end up with not
646 enough count flowing along duplicated edge Sona->D.
648 Therefore, in the case of a joiner, we keep track of all counts
649 coming in along the current path, as well as from predecessors not
650 on any jump threading path (Eb in the above example). While we
651 first assume that the duplicated Eona for Ja->Sona has the same
652 probability as the original, we later compensate for other jump
653 threading paths that may eliminate edges. We do that by keep track
654 of all counts coming into the original path that are not in a jump
655 thread (Eb in the above example, but as noted earlier, there could
656 be other predecessors incoming to the path at various points, such
657 as at Son). Call this cumulative non-path count coming into the path
658 before D as Enonpath. We then ensure that the count from Sona->D is as at
659 least as big as (Ed - Enonpath), but no bigger than the minimum
660 weight along the jump threading path. The probabilities of both the
661 original and duplicated joiner block J and Ja will be adjusted
662 accordingly after the updates. */
664 static bool
665 compute_path_counts (struct redirection_data *rd,
666 ssa_local_info_t *local_info,
667 gcov_type *path_in_count_ptr,
668 gcov_type *path_out_count_ptr,
669 int *path_in_freq_ptr)
671 edge e = rd->incoming_edges->e;
672 vec<jump_thread_edge *> *path = THREAD_PATH (e);
673 edge elast = path->last ()->e;
674 gcov_type nonpath_count = 0;
675 bool has_joiner = false;
676 gcov_type path_in_count = 0;
677 int path_in_freq = 0;
679 /* Start by accumulating incoming edge counts to the path's first bb
680 into a couple buckets:
681 path_in_count: total count of incoming edges that flow into the
682 current path.
683 nonpath_count: total count of incoming edges that are not
684 flowing along *any* path. These are the counts
685 that will still flow along the original path after
686 all path duplication is done by potentially multiple
687 calls to this routine.
688 (any other incoming edge counts are for a different jump threading
689 path that will be handled by a later call to this routine.)
690 To make this easier, start by recording all incoming edges that flow into
691 the current path in a bitmap. We could add up the path's incoming edge
692 counts here, but we still need to walk all the first bb's incoming edges
693 below to add up the counts of the other edges not included in this jump
694 threading path. */
695 struct el *next, *el;
696 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
697 for (el = rd->incoming_edges; el; el = next)
699 next = el->next;
700 bitmap_set_bit (in_edge_srcs, el->e->src->index);
702 edge ein;
703 edge_iterator ei;
704 FOR_EACH_EDGE (ein, ei, e->dest->preds)
706 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
707 /* Simply check the incoming edge src against the set captured above. */
708 if (ein_path
709 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
711 /* It is necessary but not sufficient that the last path edges
712 are identical. There may be different paths that share the
713 same last path edge in the case where the last edge has a nocopy
714 source block. */
715 gcc_assert (ein_path->last ()->e == elast);
716 path_in_count += ein->count;
717 path_in_freq += EDGE_FREQUENCY (ein);
719 else if (!ein_path)
721 /* Keep track of the incoming edges that are not on any jump-threading
722 path. These counts will still flow out of original path after all
723 jump threading is complete. */
724 nonpath_count += ein->count;
728 /* This is needed due to insane incoming frequencies. */
729 if (path_in_freq > BB_FREQ_MAX)
730 path_in_freq = BB_FREQ_MAX;
732 BITMAP_FREE (in_edge_srcs);
734 /* Now compute the fraction of the total count coming into the first
735 path bb that is from the current threading path. */
736 gcov_type total_count = e->dest->count;
737 /* Handle incoming profile insanities. */
738 if (total_count < path_in_count)
739 path_in_count = total_count;
740 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
742 /* Walk the entire path to do some more computation in order to estimate
743 how much of the path_in_count will flow out of the duplicated threading
744 path. In the non-joiner case this is straightforward (it should be
745 the same as path_in_count, although we will handle incoming profile
746 insanities by setting it equal to the minimum count along the path).
748 In the joiner case, we need to estimate how much of the path_in_count
749 will stay on the threading path after the joiner's conditional branch.
750 We don't really know for sure how much of the counts
751 associated with this path go to each successor of the joiner, but we'll
752 estimate based on the fraction of the total count coming into the path
753 bb was from the threading paths (computed above in onpath_scale).
754 Afterwards, we will need to do some fixup to account for other threading
755 paths and possible profile insanities.
757 In order to estimate the joiner case's counts we also need to update
758 nonpath_count with any additional counts coming into the path. Other
759 blocks along the path may have additional predecessors from outside
760 the path. */
761 gcov_type path_out_count = path_in_count;
762 gcov_type min_path_count = path_in_count;
763 for (unsigned int i = 1; i < path->length (); i++)
765 edge epath = (*path)[i]->e;
766 gcov_type cur_count = epath->count;
767 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
769 has_joiner = true;
770 cur_count = apply_probability (cur_count, onpath_scale);
772 /* In the joiner case we need to update nonpath_count for any edges
773 coming into the path that will contribute to the count flowing
774 into the path successor. */
775 if (has_joiner && epath != elast)
777 /* Look for other incoming edges after joiner. */
778 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
780 if (ein != epath
781 /* Ignore in edges from blocks we have duplicated for a
782 threading path, which have duplicated edge counts until
783 they are redirected by an invocation of this routine. */
784 && !bitmap_bit_p (local_info->duplicate_blocks,
785 ein->src->index))
786 nonpath_count += ein->count;
789 if (cur_count < path_out_count)
790 path_out_count = cur_count;
791 if (epath->count < min_path_count)
792 min_path_count = epath->count;
795 /* We computed path_out_count above assuming that this path targeted
796 the joiner's on-path successor with the same likelihood as it
797 reached the joiner. However, other thread paths through the joiner
798 may take a different path through the normal copy source block
799 (i.e. they have a different elast), meaning that they do not
800 contribute any counts to this path's elast. As a result, it may
801 turn out that this path must have more count flowing to the on-path
802 successor of the joiner. Essentially, all of this path's elast
803 count must be contributed by this path and any nonpath counts
804 (since any path through the joiner with a different elast will not
805 include a copy of this elast in its duplicated path).
806 So ensure that this path's path_out_count is at least the
807 difference between elast->count and nonpath_count. Otherwise the edge
808 counts after threading will not be sane. */
809 if (has_joiner && path_out_count < elast->count - nonpath_count)
811 path_out_count = elast->count - nonpath_count;
812 /* But neither can we go above the minimum count along the path
813 we are duplicating. This can be an issue due to profile
814 insanities coming in to this pass. */
815 if (path_out_count > min_path_count)
816 path_out_count = min_path_count;
819 *path_in_count_ptr = path_in_count;
820 *path_out_count_ptr = path_out_count;
821 *path_in_freq_ptr = path_in_freq;
822 return has_joiner;
826 /* Update the counts and frequencies for both an original path
827 edge EPATH and its duplicate EDUP. The duplicate source block
828 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
829 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
830 static void
831 update_profile (edge epath, edge edup, gcov_type path_in_count,
832 gcov_type path_out_count, int path_in_freq)
835 /* First update the duplicated block's count / frequency. */
836 if (edup)
838 basic_block dup_block = edup->src;
839 gcc_assert (dup_block->count == 0);
840 gcc_assert (dup_block->frequency == 0);
841 dup_block->count = path_in_count;
842 dup_block->frequency = path_in_freq;
845 /* Now update the original block's count and frequency in the
846 opposite manner - remove the counts/freq that will flow
847 into the duplicated block. Handle underflow due to precision/
848 rounding issues. */
849 epath->src->count -= path_in_count;
850 if (epath->src->count < 0)
851 epath->src->count = 0;
852 epath->src->frequency -= path_in_freq;
853 if (epath->src->frequency < 0)
854 epath->src->frequency = 0;
856 /* Next update this path edge's original and duplicated counts. We know
857 that the duplicated path will have path_out_count flowing
858 out of it (in the joiner case this is the count along the duplicated path
859 out of the duplicated joiner). This count can then be removed from the
860 original path edge. */
861 if (edup)
862 edup->count = path_out_count;
863 epath->count -= path_out_count;
864 gcc_assert (epath->count >= 0);
868 /* The duplicate and original joiner blocks may end up with different
869 probabilities (different from both the original and from each other).
870 Recompute the probabilities here once we have updated the edge
871 counts and frequencies. */
873 static void
874 recompute_probabilities (basic_block bb)
876 edge esucc;
877 edge_iterator ei;
878 FOR_EACH_EDGE (esucc, ei, bb->succs)
880 if (!bb->count)
881 continue;
883 /* Prevent overflow computation due to insane profiles. */
884 if (esucc->count < bb->count)
885 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
886 bb->count);
887 else
888 /* Can happen with missing/guessed probabilities, since we
889 may determine that more is flowing along duplicated
890 path than joiner succ probabilities allowed.
891 Counts and freqs will be insane after jump threading,
892 at least make sure probability is sane or we will
893 get a flow verification error.
894 Not much we can do to make counts/freqs sane without
895 redoing the profile estimation. */
896 esucc->probability = REG_BR_PROB_BASE;
901 /* Update the counts of the original and duplicated edges from a joiner
902 that go off path, given that we have already determined that the
903 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
904 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
905 edge from joiner is EPATH. */
907 static void
908 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
909 gcov_type path_in_count,
910 gcov_type path_out_count)
912 /* Compute the count that currently flows off path from the joiner.
913 In other words, the total count of joiner's out edges other than
914 epath. Compute this by walking the successors instead of
915 subtracting epath's count from the joiner bb count, since there
916 are sometimes slight insanities where the total out edge count is
917 larger than the bb count (possibly due to rounding/truncation
918 errors). */
919 gcov_type total_orig_off_path_count = 0;
920 edge enonpath;
921 edge_iterator ei;
922 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
924 if (enonpath == epath)
925 continue;
926 total_orig_off_path_count += enonpath->count;
929 /* For the path that we are duplicating, the amount that will flow
930 off path from the duplicated joiner is the delta between the
931 path's cumulative in count and the portion of that count we
932 estimated above as flowing from the joiner along the duplicated
933 path. */
934 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
936 /* Now do the actual updates of the off-path edges. */
937 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
939 /* Look for edges going off of the threading path. */
940 if (enonpath == epath)
941 continue;
943 /* Find the corresponding edge out of the duplicated joiner. */
944 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
945 gcc_assert (enonpathdup);
947 /* We can't use the original probability of the joiner's out
948 edges, since the probabilities of the original branch
949 and the duplicated branches may vary after all threading is
950 complete. But apportion the duplicated joiner's off-path
951 total edge count computed earlier (total_dup_off_path_count)
952 among the duplicated off-path edges based on their original
953 ratio to the full off-path count (total_orig_off_path_count).
955 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
956 total_orig_off_path_count);
957 /* Give the duplicated offpath edge a portion of the duplicated
958 total. */
959 enonpathdup->count = apply_scale (scale,
960 total_dup_off_path_count);
961 /* Now update the original offpath edge count, handling underflow
962 due to rounding errors. */
963 enonpath->count -= enonpathdup->count;
964 if (enonpath->count < 0)
965 enonpath->count = 0;
970 /* Check if the paths through RD all have estimated frequencies but zero
971 profile counts. This is more accurate than checking the entry block
972 for a zero profile count, since profile insanities sometimes creep in. */
974 static bool
975 estimated_freqs_path (struct redirection_data *rd)
977 edge e = rd->incoming_edges->e;
978 vec<jump_thread_edge *> *path = THREAD_PATH (e);
979 edge ein;
980 edge_iterator ei;
981 bool non_zero_freq = false;
982 FOR_EACH_EDGE (ein, ei, e->dest->preds)
984 if (ein->count)
985 return false;
986 non_zero_freq |= ein->src->frequency != 0;
989 for (unsigned int i = 1; i < path->length (); i++)
991 edge epath = (*path)[i]->e;
992 if (epath->src->count)
993 return false;
994 non_zero_freq |= epath->src->frequency != 0;
995 edge esucc;
996 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
998 if (esucc->count)
999 return false;
1000 non_zero_freq |= esucc->src->frequency != 0;
1003 return non_zero_freq;
1007 /* Invoked for routines that have guessed frequencies and no profile
1008 counts to record the block and edge frequencies for paths through RD
1009 in the profile count fields of those blocks and edges. This is because
1010 ssa_fix_duplicate_block_edges incrementally updates the block and
1011 edge counts as edges are redirected, and it is difficult to do that
1012 for edge frequencies which are computed on the fly from the source
1013 block frequency and probability. When a block frequency is updated
1014 its outgoing edge frequencies are affected and become difficult to
1015 adjust. */
1017 static void
1018 freqs_to_counts_path (struct redirection_data *rd)
1020 edge e = rd->incoming_edges->e;
1021 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1022 edge ein;
1023 edge_iterator ei;
1024 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1026 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1027 errors applying the probability when the frequencies are very
1028 small. */
1029 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1030 ein->probability);
1033 for (unsigned int i = 1; i < path->length (); i++)
1035 edge epath = (*path)[i]->e;
1036 edge esucc;
1037 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1038 errors applying the edge probability when the frequencies are very
1039 small. */
1040 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1041 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1042 esucc->count = apply_probability (esucc->src->count,
1043 esucc->probability);
1048 /* For routines that have guessed frequencies and no profile counts, where we
1049 used freqs_to_counts_path to record block and edge frequencies for paths
1050 through RD, we clear the counts after completing all updates for RD.
1051 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1052 but the block frequencies and edge probabilities were updated as well,
1053 so we can simply clear the count fields. */
1055 static void
1056 clear_counts_path (struct redirection_data *rd)
1058 edge e = rd->incoming_edges->e;
1059 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1060 edge ein, esucc;
1061 edge_iterator ei;
1062 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1063 ein->count = 0;
1065 /* First clear counts along original path. */
1066 for (unsigned int i = 1; i < path->length (); i++)
1068 edge epath = (*path)[i]->e;
1069 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1070 esucc->count = 0;
1071 epath->src->count = 0;
1073 /* Also need to clear the counts along duplicated path. */
1074 for (unsigned int i = 0; i < 2; i++)
1076 basic_block dup = rd->dup_blocks[i];
1077 if (!dup)
1078 continue;
1079 FOR_EACH_EDGE (esucc, ei, dup->succs)
1080 esucc->count = 0;
1081 dup->count = 0;
1085 /* Wire up the outgoing edges from the duplicate blocks and
1086 update any PHIs as needed. Also update the profile counts
1087 on the original and duplicate blocks and edges. */
1088 void
1089 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1090 ssa_local_info_t *local_info)
1092 bool multi_incomings = (rd->incoming_edges->next != NULL);
1093 edge e = rd->incoming_edges->e;
1094 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1095 edge elast = path->last ()->e;
1096 gcov_type path_in_count = 0;
1097 gcov_type path_out_count = 0;
1098 int path_in_freq = 0;
1100 /* This routine updates profile counts, frequencies, and probabilities
1101 incrementally. Since it is difficult to do the incremental updates
1102 using frequencies/probabilities alone, for routines without profile
1103 data we first take a snapshot of the existing block and edge frequencies
1104 by copying them into the empty profile count fields. These counts are
1105 then used to do the incremental updates, and cleared at the end of this
1106 routine. If the function is marked as having a profile, we still check
1107 to see if the paths through RD are using estimated frequencies because
1108 the routine had zero profile counts. */
1109 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1110 || estimated_freqs_path (rd));
1111 if (do_freqs_to_counts)
1112 freqs_to_counts_path (rd);
1114 /* First determine how much profile count to move from original
1115 path to the duplicate path. This is tricky in the presence of
1116 a joiner (see comments for compute_path_counts), where some portion
1117 of the path's counts will flow off-path from the joiner. In the
1118 non-joiner case the path_in_count and path_out_count should be the
1119 same. */
1120 bool has_joiner = compute_path_counts (rd, local_info,
1121 &path_in_count, &path_out_count,
1122 &path_in_freq);
1124 int cur_path_freq = path_in_freq;
1125 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1127 edge epath = (*path)[i]->e;
1129 /* If we were threading through an joiner block, then we want
1130 to keep its control statement and redirect an outgoing edge.
1131 Else we want to remove the control statement & edges, then create
1132 a new outgoing edge. In both cases we may need to update PHIs. */
1133 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1135 edge victim;
1136 edge e2;
1138 gcc_assert (has_joiner);
1140 /* This updates the PHIs at the destination of the duplicate
1141 block. Pass 0 instead of i if we are threading a path which
1142 has multiple incoming edges. */
1143 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1144 path, multi_incomings ? 0 : i);
1146 /* Find the edge from the duplicate block to the block we're
1147 threading through. That's the edge we want to redirect. */
1148 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1150 /* If there are no remaining blocks on the path to duplicate,
1151 then redirect VICTIM to the final destination of the jump
1152 threading path. */
1153 if (!any_remaining_duplicated_blocks (path, i))
1155 e2 = redirect_edge_and_branch (victim, elast->dest);
1156 /* If we redirected the edge, then we need to copy PHI arguments
1157 at the target. If the edge already existed (e2 != victim
1158 case), then the PHIs in the target already have the correct
1159 arguments. */
1160 if (e2 == victim)
1161 copy_phi_args (e2->dest, elast, e2,
1162 path, multi_incomings ? 0 : i);
1164 else
1166 /* Redirect VICTIM to the next duplicated block in the path. */
1167 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1169 /* We need to update the PHIs in the next duplicated block. We
1170 want the new PHI args to have the same value as they had
1171 in the source of the next duplicate block.
1173 Thus, we need to know which edge we traversed into the
1174 source of the duplicate. Furthermore, we may have
1175 traversed many edges to reach the source of the duplicate.
1177 Walk through the path starting at element I until we
1178 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1179 the edge from the prior element. */
1180 for (unsigned int j = i + 1; j < path->length (); j++)
1182 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1184 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1185 break;
1190 /* Update the counts and frequency of both the original block
1191 and path edge, and the duplicates. The path duplicate's
1192 incoming count and frequency are the totals for all edges
1193 incoming to this jump threading path computed earlier.
1194 And we know that the duplicated path will have path_out_count
1195 flowing out of it (i.e. along the duplicated path out of the
1196 duplicated joiner). */
1197 update_profile (epath, e2, path_in_count, path_out_count,
1198 path_in_freq);
1200 /* Next we need to update the counts of the original and duplicated
1201 edges from the joiner that go off path. */
1202 update_joiner_offpath_counts (epath, e2->src, path_in_count,
1203 path_out_count);
1205 /* Finally, we need to set the probabilities on the duplicated
1206 edges out of the duplicated joiner (e2->src). The probabilities
1207 along the original path will all be updated below after we finish
1208 processing the whole path. */
1209 recompute_probabilities (e2->src);
1211 /* Record the frequency flowing to the downstream duplicated
1212 path blocks. */
1213 cur_path_freq = EDGE_FREQUENCY (e2);
1215 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1217 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1218 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1219 multi_incomings ? 0 : i);
1220 if (count == 1)
1221 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1223 /* Update the counts and frequency of both the original block
1224 and path edge, and the duplicates. Since we are now after
1225 any joiner that may have existed on the path, the count
1226 flowing along the duplicated threaded path is path_out_count.
1227 If we didn't have a joiner, then cur_path_freq was the sum
1228 of the total frequencies along all incoming edges to the
1229 thread path (path_in_freq). If we had a joiner, it would have
1230 been updated at the end of that handling to the edge frequency
1231 along the duplicated joiner path edge. */
1232 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1233 path_out_count, path_out_count,
1234 cur_path_freq);
1236 else
1238 /* No copy case. In this case we don't have an equivalent block
1239 on the duplicated thread path to update, but we do need
1240 to remove the portion of the counts/freqs that were moved
1241 to the duplicated path from the counts/freqs flowing through
1242 this block on the original path. Since all the no-copy edges
1243 are after any joiner, the removed count is the same as
1244 path_out_count.
1246 If we didn't have a joiner, then cur_path_freq was the sum
1247 of the total frequencies along all incoming edges to the
1248 thread path (path_in_freq). If we had a joiner, it would have
1249 been updated at the end of that handling to the edge frequency
1250 along the duplicated joiner path edge. */
1251 update_profile (epath, NULL, path_out_count, path_out_count,
1252 cur_path_freq);
1255 /* Increment the index into the duplicated path when we processed
1256 a duplicated block. */
1257 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1258 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1260 count++;
1264 /* Now walk orig blocks and update their probabilities, since the
1265 counts and freqs should be updated properly by above loop. */
1266 for (unsigned int i = 1; i < path->length (); i++)
1268 edge epath = (*path)[i]->e;
1269 recompute_probabilities (epath->src);
1272 /* Done with all profile and frequency updates, clear counts if they
1273 were copied. */
1274 if (do_freqs_to_counts)
1275 clear_counts_path (rd);
1278 /* Hash table traversal callback routine to create duplicate blocks. */
1281 ssa_create_duplicates (struct redirection_data **slot,
1282 ssa_local_info_t *local_info)
1284 struct redirection_data *rd = *slot;
1286 /* The second duplicated block in a jump threading path is specific
1287 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1289 Each time we're called, we have to look through the path and see
1290 if a second block needs to be duplicated.
1292 Note the search starts with the third edge on the path. The first
1293 edge is the incoming edge, the second edge always has its source
1294 duplicated. Thus we start our search with the third edge. */
1295 vec<jump_thread_edge *> *path = rd->path;
1296 for (unsigned int i = 2; i < path->length (); i++)
1298 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1299 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1301 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1302 &local_info->duplicate_blocks);
1303 break;
1307 /* Create a template block if we have not done so already. Otherwise
1308 use the template to create a new block. */
1309 if (local_info->template_block == NULL)
1311 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1312 &local_info->duplicate_blocks);
1313 local_info->template_block = rd->dup_blocks[0];
1315 /* We do not create any outgoing edges for the template. We will
1316 take care of that in a later traversal. That way we do not
1317 create edges that are going to just be deleted. */
1319 else
1321 create_block_for_threading (local_info->template_block, rd, 0,
1322 &local_info->duplicate_blocks);
1324 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1325 block. */
1326 ssa_fix_duplicate_block_edges (rd, local_info);
1329 /* Keep walking the hash table. */
1330 return 1;
1333 /* We did not create any outgoing edges for the template block during
1334 block creation. This hash table traversal callback creates the
1335 outgoing edge for the template block. */
1337 inline int
1338 ssa_fixup_template_block (struct redirection_data **slot,
1339 ssa_local_info_t *local_info)
1341 struct redirection_data *rd = *slot;
1343 /* If this is the template block halt the traversal after updating
1344 it appropriately.
1346 If we were threading through an joiner block, then we want
1347 to keep its control statement and redirect an outgoing edge.
1348 Else we want to remove the control statement & edges, then create
1349 a new outgoing edge. In both cases we may need to update PHIs. */
1350 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1352 ssa_fix_duplicate_block_edges (rd, local_info);
1353 return 0;
1356 return 1;
1359 /* Hash table traversal callback to redirect each incoming edge
1360 associated with this hash table element to its new destination. */
1363 ssa_redirect_edges (struct redirection_data **slot,
1364 ssa_local_info_t *local_info)
1366 struct redirection_data *rd = *slot;
1367 struct el *next, *el;
1369 /* Walk over all the incoming edges associated with this hash table
1370 entry. */
1371 for (el = rd->incoming_edges; el; el = next)
1373 edge e = el->e;
1374 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1376 /* Go ahead and free this element from the list. Doing this now
1377 avoids the need for another list walk when we destroy the hash
1378 table. */
1379 next = el->next;
1380 free (el);
1382 thread_stats.num_threaded_edges++;
1384 if (rd->dup_blocks[0])
1386 edge e2;
1388 if (dump_file && (dump_flags & TDF_DETAILS))
1389 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1390 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1392 /* If we redirect a loop latch edge cancel its loop. */
1393 if (e->src == e->src->loop_father->latch)
1394 mark_loop_for_removal (e->src->loop_father);
1396 /* Redirect the incoming edge (possibly to the joiner block) to the
1397 appropriate duplicate block. */
1398 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1399 gcc_assert (e == e2);
1400 flush_pending_stmts (e2);
1403 /* Go ahead and clear E->aux. It's not needed anymore and failure
1404 to clear it will cause all kinds of unpleasant problems later. */
1405 delete_jump_thread_path (path);
1406 e->aux = NULL;
1410 /* Indicate that we actually threaded one or more jumps. */
1411 if (rd->incoming_edges)
1412 local_info->jumps_threaded = true;
1414 return 1;
1417 /* Return true if this block has no executable statements other than
1418 a simple ctrl flow instruction. When the number of outgoing edges
1419 is one, this is equivalent to a "forwarder" block. */
1421 static bool
1422 redirection_block_p (basic_block bb)
1424 gimple_stmt_iterator gsi;
1426 /* Advance to the first executable statement. */
1427 gsi = gsi_start_bb (bb);
1428 while (!gsi_end_p (gsi)
1429 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1430 || is_gimple_debug (gsi_stmt (gsi))
1431 || gimple_nop_p (gsi_stmt (gsi))
1432 || gimple_clobber_p (gsi_stmt (gsi))))
1433 gsi_next (&gsi);
1435 /* Check if this is an empty block. */
1436 if (gsi_end_p (gsi))
1437 return true;
1439 /* Test that we've reached the terminating control statement. */
1440 return gsi_stmt (gsi)
1441 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1442 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1443 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1446 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1447 is reached via one or more specific incoming edges, we know which
1448 outgoing edge from BB will be traversed.
1450 We want to redirect those incoming edges to the target of the
1451 appropriate outgoing edge. Doing so avoids a conditional branch
1452 and may expose new optimization opportunities. Note that we have
1453 to update dominator tree and SSA graph after such changes.
1455 The key to keeping the SSA graph update manageable is to duplicate
1456 the side effects occurring in BB so that those side effects still
1457 occur on the paths which bypass BB after redirecting edges.
1459 We accomplish this by creating duplicates of BB and arranging for
1460 the duplicates to unconditionally pass control to one specific
1461 successor of BB. We then revector the incoming edges into BB to
1462 the appropriate duplicate of BB.
1464 If NOLOOP_ONLY is true, we only perform the threading as long as it
1465 does not affect the structure of the loops in a nontrivial way.
1467 If JOINERS is true, then thread through joiner blocks as well. */
1469 static bool
1470 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1472 /* E is an incoming edge into BB that we may or may not want to
1473 redirect to a duplicate of BB. */
1474 edge e, e2;
1475 edge_iterator ei;
1476 ssa_local_info_t local_info;
1478 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1480 /* To avoid scanning a linear array for the element we need we instead
1481 use a hash table. For normal code there should be no noticeable
1482 difference. However, if we have a block with a large number of
1483 incoming and outgoing edges such linear searches can get expensive. */
1484 redirection_data
1485 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1487 /* Record each unique threaded destination into a hash table for
1488 efficient lookups. */
1489 FOR_EACH_EDGE (e, ei, bb->preds)
1491 if (e->aux == NULL)
1492 continue;
1494 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1496 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1497 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1498 continue;
1500 e2 = path->last ()->e;
1501 if (!e2 || noloop_only)
1503 /* If NOLOOP_ONLY is true, we only allow threading through the
1504 header of a loop to exit edges. */
1506 /* One case occurs when there was loop header buried in a jump
1507 threading path that crosses loop boundaries. We do not try
1508 and thread this elsewhere, so just cancel the jump threading
1509 request by clearing the AUX field now. */
1510 if ((bb->loop_father != e2->src->loop_father
1511 && !loop_exit_edge_p (e2->src->loop_father, e2))
1512 || (e2->src->loop_father != e2->dest->loop_father
1513 && !loop_exit_edge_p (e2->src->loop_father, e2)))
1515 /* Since this case is not handled by our special code
1516 to thread through a loop header, we must explicitly
1517 cancel the threading request here. */
1518 delete_jump_thread_path (path);
1519 e->aux = NULL;
1520 continue;
1523 /* Another case occurs when trying to thread through our
1524 own loop header, possibly from inside the loop. We will
1525 thread these later. */
1526 unsigned int i;
1527 for (i = 1; i < path->length (); i++)
1529 if ((*path)[i]->e->src == bb->loop_father->header
1530 && (!loop_exit_edge_p (bb->loop_father, e2)
1531 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1532 break;
1535 if (i != path->length ())
1536 continue;
1539 /* Insert the outgoing edge into the hash table if it is not
1540 already in the hash table. */
1541 lookup_redirection_data (e, INSERT);
1544 /* We do not update dominance info. */
1545 free_dominance_info (CDI_DOMINATORS);
1547 /* We know we only thread through the loop header to loop exits.
1548 Let the basic block duplication hook know we are not creating
1549 a multiple entry loop. */
1550 if (noloop_only
1551 && bb == bb->loop_father->header)
1552 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1554 /* Now create duplicates of BB.
1556 Note that for a block with a high outgoing degree we can waste
1557 a lot of time and memory creating and destroying useless edges.
1559 So we first duplicate BB and remove the control structure at the
1560 tail of the duplicate as well as all outgoing edges from the
1561 duplicate. We then use that duplicate block as a template for
1562 the rest of the duplicates. */
1563 local_info.template_block = NULL;
1564 local_info.bb = bb;
1565 local_info.jumps_threaded = false;
1566 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1567 (&local_info);
1569 /* The template does not have an outgoing edge. Create that outgoing
1570 edge and update PHI nodes as the edge's target as necessary.
1572 We do this after creating all the duplicates to avoid creating
1573 unnecessary edges. */
1574 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1575 (&local_info);
1577 /* The hash table traversals above created the duplicate blocks (and the
1578 statements within the duplicate blocks). This loop creates PHI nodes for
1579 the duplicated blocks and redirects the incoming edges into BB to reach
1580 the duplicates of BB. */
1581 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1582 (&local_info);
1584 /* Done with this block. Clear REDIRECTION_DATA. */
1585 delete redirection_data;
1586 redirection_data = NULL;
1588 if (noloop_only
1589 && bb == bb->loop_father->header)
1590 set_loop_copy (bb->loop_father, NULL);
1592 BITMAP_FREE (local_info.duplicate_blocks);
1593 local_info.duplicate_blocks = NULL;
1595 /* Indicate to our caller whether or not any jumps were threaded. */
1596 return local_info.jumps_threaded;
1599 /* Wrapper for thread_block_1 so that we can first handle jump
1600 thread paths which do not involve copying joiner blocks, then
1601 handle jump thread paths which have joiner blocks.
1603 By doing things this way we can be as aggressive as possible and
1604 not worry that copying a joiner block will create a jump threading
1605 opportunity. */
1607 static bool
1608 thread_block (basic_block bb, bool noloop_only)
1610 bool retval;
1611 retval = thread_block_1 (bb, noloop_only, false);
1612 retval |= thread_block_1 (bb, noloop_only, true);
1613 return retval;
1617 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1618 copy of E->dest created during threading, or E->dest if it was not necessary
1619 to copy it (E is its single predecessor). */
1621 static basic_block
1622 thread_single_edge (edge e)
1624 basic_block bb = e->dest;
1625 struct redirection_data rd;
1626 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1627 edge eto = (*path)[1]->e;
1629 delete_jump_thread_path (path);
1630 e->aux = NULL;
1632 thread_stats.num_threaded_edges++;
1634 if (single_pred_p (bb))
1636 /* If BB has just a single predecessor, we should only remove the
1637 control statements at its end, and successors except for ETO. */
1638 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1640 /* And fixup the flags on the single remaining edge. */
1641 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1642 eto->flags |= EDGE_FALLTHRU;
1644 return bb;
1647 /* Otherwise, we need to create a copy. */
1648 if (e->dest == eto->src)
1649 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1651 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1652 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1653 npath->safe_push (x);
1655 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1656 npath->safe_push (x);
1657 rd.path = npath;
1659 create_block_for_threading (bb, &rd, 0, NULL);
1660 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1661 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1663 if (dump_file && (dump_flags & TDF_DETAILS))
1664 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1665 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1667 rd.dup_blocks[0]->count = e->count;
1668 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1669 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1670 redirect_edge_and_branch (e, rd.dup_blocks[0]);
1671 flush_pending_stmts (e);
1673 delete_jump_thread_path (npath);
1674 return rd.dup_blocks[0];
1677 /* Callback for dfs_enumerate_from. Returns true if BB is different
1678 from STOP and DBDS_CE_STOP. */
1680 static basic_block dbds_ce_stop;
1681 static bool
1682 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1684 return (bb != (const_basic_block) stop
1685 && bb != dbds_ce_stop);
1688 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1689 returns the state. */
1691 enum bb_dom_status
1693 /* BB does not dominate latch of the LOOP. */
1694 DOMST_NONDOMINATING,
1695 /* The LOOP is broken (there is no path from the header to its latch. */
1696 DOMST_LOOP_BROKEN,
1697 /* BB dominates the latch of the LOOP. */
1698 DOMST_DOMINATING
1701 static enum bb_dom_status
1702 determine_bb_domination_status (struct loop *loop, basic_block bb)
1704 basic_block *bblocks;
1705 unsigned nblocks, i;
1706 bool bb_reachable = false;
1707 edge_iterator ei;
1708 edge e;
1710 /* This function assumes BB is a successor of LOOP->header.
1711 If that is not the case return DOMST_NONDOMINATING which
1712 is always safe. */
1714 bool ok = false;
1716 FOR_EACH_EDGE (e, ei, bb->preds)
1718 if (e->src == loop->header)
1720 ok = true;
1721 break;
1725 if (!ok)
1726 return DOMST_NONDOMINATING;
1729 if (bb == loop->latch)
1730 return DOMST_DOMINATING;
1732 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1733 from it. */
1735 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1736 dbds_ce_stop = loop->header;
1737 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1738 bblocks, loop->num_nodes, bb);
1739 for (i = 0; i < nblocks; i++)
1740 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1742 if (e->src == loop->header)
1744 free (bblocks);
1745 return DOMST_NONDOMINATING;
1747 if (e->src == bb)
1748 bb_reachable = true;
1751 free (bblocks);
1752 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1755 /* Return true if BB is part of the new pre-header that is created
1756 when threading the latch to DATA. */
1758 static bool
1759 def_split_header_continue_p (const_basic_block bb, const void *data)
1761 const_basic_block new_header = (const_basic_block) data;
1762 const struct loop *l;
1764 if (bb == new_header
1765 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1766 return false;
1767 for (l = bb->loop_father; l; l = loop_outer (l))
1768 if (l == new_header->loop_father)
1769 return true;
1770 return false;
1773 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1774 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1775 to the inside of the loop. */
1777 static bool
1778 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1780 basic_block header = loop->header;
1781 edge e, tgt_edge, latch = loop_latch_edge (loop);
1782 edge_iterator ei;
1783 basic_block tgt_bb, atgt_bb;
1784 enum bb_dom_status domst;
1786 /* We have already threaded through headers to exits, so all the threading
1787 requests now are to the inside of the loop. We need to avoid creating
1788 irreducible regions (i.e., loops with more than one entry block), and
1789 also loop with several latch edges, or new subloops of the loop (although
1790 there are cases where it might be appropriate, it is difficult to decide,
1791 and doing it wrongly may confuse other optimizers).
1793 We could handle more general cases here. However, the intention is to
1794 preserve some information about the loop, which is impossible if its
1795 structure changes significantly, in a way that is not well understood.
1796 Thus we only handle few important special cases, in which also updating
1797 of the loop-carried information should be feasible:
1799 1) Propagation of latch edge to a block that dominates the latch block
1800 of a loop. This aims to handle the following idiom:
1802 first = 1;
1803 while (1)
1805 if (first)
1806 initialize;
1807 first = 0;
1808 body;
1811 After threading the latch edge, this becomes
1813 first = 1;
1814 if (first)
1815 initialize;
1816 while (1)
1818 first = 0;
1819 body;
1822 The original header of the loop is moved out of it, and we may thread
1823 the remaining edges through it without further constraints.
1825 2) All entry edges are propagated to a single basic block that dominates
1826 the latch block of the loop. This aims to handle the following idiom
1827 (normally created for "for" loops):
1829 i = 0;
1830 while (1)
1832 if (i >= 100)
1833 break;
1834 body;
1835 i++;
1838 This becomes
1840 i = 0;
1841 while (1)
1843 body;
1844 i++;
1845 if (i >= 100)
1846 break;
1850 /* Threading through the header won't improve the code if the header has just
1851 one successor. */
1852 if (single_succ_p (header))
1853 goto fail;
1855 /* If we threaded the latch using a joiner block, we cancel the
1856 threading opportunity out of an abundance of caution. However,
1857 still allow threading from outside to inside the loop. */
1858 if (latch->aux)
1860 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1861 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1863 delete_jump_thread_path (path);
1864 latch->aux = NULL;
1868 if (latch->aux)
1870 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1871 tgt_edge = (*path)[1]->e;
1872 tgt_bb = tgt_edge->dest;
1874 else if (!may_peel_loop_headers
1875 && !redirection_block_p (loop->header))
1876 goto fail;
1877 else
1879 tgt_bb = NULL;
1880 tgt_edge = NULL;
1881 FOR_EACH_EDGE (e, ei, header->preds)
1883 if (!e->aux)
1885 if (e == latch)
1886 continue;
1888 /* If latch is not threaded, and there is a header
1889 edge that is not threaded, we would create loop
1890 with multiple entries. */
1891 goto fail;
1894 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1896 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1897 goto fail;
1898 tgt_edge = (*path)[1]->e;
1899 atgt_bb = tgt_edge->dest;
1900 if (!tgt_bb)
1901 tgt_bb = atgt_bb;
1902 /* Two targets of threading would make us create loop
1903 with multiple entries. */
1904 else if (tgt_bb != atgt_bb)
1905 goto fail;
1908 if (!tgt_bb)
1910 /* There are no threading requests. */
1911 return false;
1914 /* Redirecting to empty loop latch is useless. */
1915 if (tgt_bb == loop->latch
1916 && empty_block_p (loop->latch))
1917 goto fail;
1920 /* The target block must dominate the loop latch, otherwise we would be
1921 creating a subloop. */
1922 domst = determine_bb_domination_status (loop, tgt_bb);
1923 if (domst == DOMST_NONDOMINATING)
1924 goto fail;
1925 if (domst == DOMST_LOOP_BROKEN)
1927 /* If the loop ceased to exist, mark it as such, and thread through its
1928 original header. */
1929 mark_loop_for_removal (loop);
1930 return thread_block (header, false);
1933 if (tgt_bb->loop_father->header == tgt_bb)
1935 /* If the target of the threading is a header of a subloop, we need
1936 to create a preheader for it, so that the headers of the two loops
1937 do not merge. */
1938 if (EDGE_COUNT (tgt_bb->preds) > 2)
1940 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1941 gcc_assert (tgt_bb != NULL);
1943 else
1944 tgt_bb = split_edge (tgt_edge);
1947 if (latch->aux)
1949 basic_block *bblocks;
1950 unsigned nblocks, i;
1952 /* First handle the case latch edge is redirected. We are copying
1953 the loop header but not creating a multiple entry loop. Make the
1954 cfg manipulation code aware of that fact. */
1955 set_loop_copy (loop, loop);
1956 loop->latch = thread_single_edge (latch);
1957 set_loop_copy (loop, NULL);
1958 gcc_assert (single_succ (loop->latch) == tgt_bb);
1959 loop->header = tgt_bb;
1961 /* Remove the new pre-header blocks from our loop. */
1962 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1963 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1964 bblocks, loop->num_nodes, tgt_bb);
1965 for (i = 0; i < nblocks; i++)
1966 if (bblocks[i]->loop_father == loop)
1968 remove_bb_from_loops (bblocks[i]);
1969 add_bb_to_loop (bblocks[i], loop_outer (loop));
1971 free (bblocks);
1973 /* If the new header has multiple latches mark it so. */
1974 FOR_EACH_EDGE (e, ei, loop->header->preds)
1975 if (e->src->loop_father == loop
1976 && e->src != loop->latch)
1978 loop->latch = NULL;
1979 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1982 /* Cancel remaining threading requests that would make the
1983 loop a multiple entry loop. */
1984 FOR_EACH_EDGE (e, ei, header->preds)
1986 edge e2;
1988 if (e->aux == NULL)
1989 continue;
1991 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1992 e2 = path->last ()->e;
1994 if (e->src->loop_father != e2->dest->loop_father
1995 && e2->dest != loop->header)
1997 delete_jump_thread_path (path);
1998 e->aux = NULL;
2002 /* Thread the remaining edges through the former header. */
2003 thread_block (header, false);
2005 else
2007 basic_block new_preheader;
2009 /* Now consider the case entry edges are redirected to the new entry
2010 block. Remember one entry edge, so that we can find the new
2011 preheader (its destination after threading). */
2012 FOR_EACH_EDGE (e, ei, header->preds)
2014 if (e->aux)
2015 break;
2018 /* The duplicate of the header is the new preheader of the loop. Ensure
2019 that it is placed correctly in the loop hierarchy. */
2020 set_loop_copy (loop, loop_outer (loop));
2022 thread_block (header, false);
2023 set_loop_copy (loop, NULL);
2024 new_preheader = e->dest;
2026 /* Create the new latch block. This is always necessary, as the latch
2027 must have only a single successor, but the original header had at
2028 least two successors. */
2029 loop->latch = NULL;
2030 mfb_kj_edge = single_succ_edge (new_preheader);
2031 loop->header = mfb_kj_edge->dest;
2032 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2033 loop->header = latch->dest;
2034 loop->latch = latch->src;
2037 return true;
2039 fail:
2040 /* We failed to thread anything. Cancel the requests. */
2041 FOR_EACH_EDGE (e, ei, header->preds)
2043 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2045 if (path)
2047 delete_jump_thread_path (path);
2048 e->aux = NULL;
2051 return false;
2054 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2055 PHI arguments associated with those edges are equal or there are no
2056 PHI arguments, otherwise return FALSE. */
2058 static bool
2059 phi_args_equal_on_edges (edge e1, edge e2)
2061 gphi_iterator gsi;
2062 int indx1 = e1->dest_idx;
2063 int indx2 = e2->dest_idx;
2065 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2067 gphi *phi = gsi.phi ();
2069 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2070 gimple_phi_arg_def (phi, indx2), 0))
2071 return false;
2073 return true;
2076 /* Walk through the registered jump threads and convert them into a
2077 form convenient for this pass.
2079 Any block which has incoming edges threaded to outgoing edges
2080 will have its entry in THREADED_BLOCK set.
2082 Any threaded edge will have its new outgoing edge stored in the
2083 original edge's AUX field.
2085 This form avoids the need to walk all the edges in the CFG to
2086 discover blocks which need processing and avoids unnecessary
2087 hash table lookups to map from threaded edge to new target. */
2089 static void
2090 mark_threaded_blocks (bitmap threaded_blocks)
2092 unsigned int i;
2093 bitmap_iterator bi;
2094 bitmap tmp = BITMAP_ALLOC (NULL);
2095 basic_block bb;
2096 edge e;
2097 edge_iterator ei;
2099 /* It is possible to have jump threads in which one is a subpath
2100 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2101 block and (B, C), (C, D) where no joiner block exists.
2103 When this occurs ignore the jump thread request with the joiner
2104 block. It's totally subsumed by the simpler jump thread request.
2106 This results in less block copying, simpler CFGs. More importantly,
2107 when we duplicate the joiner block, B, in this case we will create
2108 a new threading opportunity that we wouldn't be able to optimize
2109 until the next jump threading iteration.
2111 So first convert the jump thread requests which do not require a
2112 joiner block. */
2113 for (i = 0; i < paths.length (); i++)
2115 vec<jump_thread_edge *> *path = paths[i];
2117 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2119 edge e = (*path)[0]->e;
2120 e->aux = (void *)path;
2121 bitmap_set_bit (tmp, e->dest->index);
2125 /* Now iterate again, converting cases where we want to thread
2126 through a joiner block, but only if no other edge on the path
2127 already has a jump thread attached to it. We do this in two passes,
2128 to avoid situations where the order in the paths vec can hide overlapping
2129 threads (the path is recorded on the incoming edge, so we would miss
2130 cases where the second path starts at a downstream edge on the same
2131 path). First record all joiner paths, deleting any in the unexpected
2132 case where there is already a path for that incoming edge. */
2133 for (i = 0; i < paths.length ();)
2135 vec<jump_thread_edge *> *path = paths[i];
2137 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2139 /* Attach the path to the starting edge if none is yet recorded. */
2140 if ((*path)[0]->e->aux == NULL)
2142 (*path)[0]->e->aux = path;
2143 i++;
2145 else
2147 paths.unordered_remove (i);
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 dump_jump_thread_path (dump_file, *path, false);
2150 delete_jump_thread_path (path);
2153 else
2155 i++;
2159 /* Second, look for paths that have any other jump thread attached to
2160 them, and either finish converting them or cancel them. */
2161 for (i = 0; i < paths.length ();)
2163 vec<jump_thread_edge *> *path = paths[i];
2164 edge e = (*path)[0]->e;
2166 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2168 unsigned int j;
2169 for (j = 1; j < path->length (); j++)
2170 if ((*path)[j]->e->aux != NULL)
2171 break;
2173 /* If we iterated through the entire path without exiting the loop,
2174 then we are good to go, record it. */
2175 if (j == path->length ())
2177 bitmap_set_bit (tmp, e->dest->index);
2178 i++;
2180 else
2182 e->aux = NULL;
2183 paths.unordered_remove (i);
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 dump_jump_thread_path (dump_file, *path, false);
2186 delete_jump_thread_path (path);
2189 else
2191 i++;
2195 /* If optimizing for size, only thread through block if we don't have
2196 to duplicate it or it's an otherwise empty redirection block. */
2197 if (optimize_function_for_size_p (cfun))
2199 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2201 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2202 if (EDGE_COUNT (bb->preds) > 1
2203 && !redirection_block_p (bb))
2205 FOR_EACH_EDGE (e, ei, bb->preds)
2207 if (e->aux)
2209 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2210 delete_jump_thread_path (path);
2211 e->aux = NULL;
2215 else
2216 bitmap_set_bit (threaded_blocks, i);
2219 else
2220 bitmap_copy (threaded_blocks, tmp);
2222 /* Look for jump threading paths which cross multiple loop headers.
2224 The code to thread through loop headers will change the CFG in ways
2225 that break assumptions made by the loop optimization code.
2227 We don't want to blindly cancel the requests. We can instead do better
2228 by trimming off the end of the jump thread path. */
2229 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2231 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2232 FOR_EACH_EDGE (e, ei, bb->preds)
2234 if (e->aux)
2236 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2238 for (unsigned int i = 0, crossed_headers = 0;
2239 i < path->length ();
2240 i++)
2242 basic_block dest = (*path)[i]->e->dest;
2243 crossed_headers += (dest == dest->loop_father->header);
2244 if (crossed_headers > 1)
2246 /* Trim from entry I onwards. */
2247 for (unsigned int j = i; j < path->length (); j++)
2248 delete (*path)[j];
2249 path->truncate (i);
2251 /* Now that we've truncated the path, make sure
2252 what's left is still valid. We need at least
2253 two edges on the path and the last edge can not
2254 be a joiner. This should never happen, but let's
2255 be safe. */
2256 if (path->length () < 2
2257 || (path->last ()->type
2258 == EDGE_COPY_SRC_JOINER_BLOCK))
2260 delete_jump_thread_path (path);
2261 e->aux = NULL;
2263 break;
2270 /* If we have a joiner block (J) which has two successors S1 and S2 and
2271 we are threading though S1 and the final destination of the thread
2272 is S2, then we must verify that any PHI nodes in S2 have the same
2273 PHI arguments for the edge J->S2 and J->S1->...->S2.
2275 We used to detect this prior to registering the jump thread, but
2276 that prohibits propagation of edge equivalences into non-dominated
2277 PHI nodes as the equivalency test might occur before propagation.
2279 This must also occur after we truncate any jump threading paths
2280 as this scenario may only show up after truncation.
2282 This works for now, but will need improvement as part of the FSA
2283 optimization.
2285 Note since we've moved the thread request data to the edges,
2286 we have to iterate on those rather than the threaded_edges vector. */
2287 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2289 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2290 FOR_EACH_EDGE (e, ei, bb->preds)
2292 if (e->aux)
2294 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2295 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2297 if (have_joiner)
2299 basic_block joiner = e->dest;
2300 edge final_edge = path->last ()->e;
2301 basic_block final_dest = final_edge->dest;
2302 edge e2 = find_edge (joiner, final_dest);
2304 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2306 delete_jump_thread_path (path);
2307 e->aux = NULL;
2314 BITMAP_FREE (tmp);
2318 /* Return TRUE if BB ends with a switch statement or a computed goto.
2319 Otherwise return false. */
2320 static bool
2321 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2323 gimple *stmt = last_stmt (bb);
2324 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2325 return true;
2326 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2327 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2328 return true;
2329 return false;
2332 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2333 case of SEME Single Entry Multiple Exits region in which all nodes in the
2334 REGION have exactly one incoming edge. The only exception is the first block
2335 that may not have been connected to the rest of the cfg yet. */
2337 DEBUG_FUNCTION void
2338 verify_jump_thread (basic_block *region, unsigned n_region)
2340 for (unsigned i = 0; i < n_region; i++)
2341 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2344 /* Return true when BB is one of the first N items in BBS. */
2346 static inline bool
2347 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2349 for (int i = 0; i < n; i++)
2350 if (bb == bbs[i])
2351 return true;
2353 return false;
2356 /* Duplicates a jump-thread path of N_REGION basic blocks.
2357 The ENTRY edge is redirected to the duplicate of the region.
2359 Remove the last conditional statement in the last basic block in the REGION,
2360 and create a single fallthru edge pointing to the same destination as the
2361 EXIT edge.
2363 The new basic blocks are stored to REGION_COPY in the same order as they had
2364 in REGION, provided that REGION_COPY is not NULL.
2366 Returns false if it is unable to copy the region, true otherwise. */
2368 static bool
2369 duplicate_thread_path (edge entry, edge exit,
2370 basic_block *region, unsigned n_region,
2371 basic_block *region_copy)
2373 unsigned i;
2374 bool free_region_copy = false;
2375 struct loop *loop = entry->dest->loop_father;
2376 edge exit_copy;
2377 edge redirected;
2378 int total_freq = 0, entry_freq = 0;
2379 gcov_type total_count = 0, entry_count = 0;
2381 if (!can_copy_bbs_p (region, n_region))
2382 return false;
2384 /* Some sanity checking. Note that we do not check for all possible
2385 missuses of the functions. I.e. if you ask to copy something weird,
2386 it will work, but the state of structures probably will not be
2387 correct. */
2388 for (i = 0; i < n_region; i++)
2390 /* We do not handle subloops, i.e. all the blocks must belong to the
2391 same loop. */
2392 if (region[i]->loop_father != loop)
2393 return false;
2396 initialize_original_copy_tables ();
2398 set_loop_copy (loop, loop);
2400 if (!region_copy)
2402 region_copy = XNEWVEC (basic_block, n_region);
2403 free_region_copy = true;
2406 if (entry->dest->count)
2408 total_count = entry->dest->count;
2409 entry_count = entry->count;
2410 /* Fix up corner cases, to avoid division by zero or creation of negative
2411 frequencies. */
2412 if (entry_count > total_count)
2413 entry_count = total_count;
2415 else
2417 total_freq = entry->dest->frequency;
2418 entry_freq = EDGE_FREQUENCY (entry);
2419 /* Fix up corner cases, to avoid division by zero or creation of negative
2420 frequencies. */
2421 if (total_freq == 0)
2422 total_freq = 1;
2423 else if (entry_freq > total_freq)
2424 entry_freq = total_freq;
2427 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2428 split_edge_bb_loc (entry), false);
2430 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2431 following code ensures that all the edges exiting the jump-thread path are
2432 redirected back to the original code: these edges are exceptions
2433 invalidating the property that is propagated by executing all the blocks of
2434 the jump-thread path in order. */
2436 for (i = 0; i < n_region; i++)
2438 edge e;
2439 edge_iterator ei;
2440 basic_block bb = region_copy[i];
2442 if (single_succ_p (bb))
2444 /* Make sure the successor is the next node in the path. */
2445 gcc_assert (i + 1 == n_region
2446 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2447 continue;
2450 /* Special case the last block on the path: make sure that it does not
2451 jump back on the copied path. */
2452 if (i + 1 == n_region)
2454 FOR_EACH_EDGE (e, ei, bb->succs)
2455 if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2457 basic_block orig = get_bb_original (e->dest);
2458 if (orig)
2459 redirect_edge_and_branch_force (e, orig);
2461 continue;
2464 /* Redirect all other edges jumping to non-adjacent blocks back to the
2465 original code. */
2466 FOR_EACH_EDGE (e, ei, bb->succs)
2467 if (region_copy[i + 1] != e->dest)
2469 basic_block orig = get_bb_original (e->dest);
2470 if (orig)
2471 redirect_edge_and_branch_force (e, orig);
2475 if (total_count)
2477 scale_bbs_frequencies_gcov_type (region, n_region,
2478 total_count - entry_count,
2479 total_count);
2480 scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2481 total_count);
2483 else
2485 scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2486 total_freq);
2487 scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2490 #ifdef ENABLE_CHECKING
2491 verify_jump_thread (region_copy, n_region);
2492 #endif
2494 /* Remove the last branch in the jump thread path. */
2495 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2497 /* And fixup the flags on the single remaining edge. */
2498 edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2499 fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2500 fix_e->flags |= EDGE_FALLTHRU;
2502 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2504 if (e) {
2505 rescan_loop_exit (e, true, false);
2506 e->probability = REG_BR_PROB_BASE;
2507 e->count = region_copy[n_region - 1]->count;
2510 /* Redirect the entry and add the phi node arguments. */
2511 if (entry->dest == loop->header)
2512 mark_loop_for_removal (loop);
2513 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2514 gcc_assert (redirected != NULL);
2515 flush_pending_stmts (entry);
2517 /* Add the other PHI node arguments. */
2518 add_phi_args_after_copy (region_copy, n_region, NULL);
2520 if (free_region_copy)
2521 free (region_copy);
2523 free_original_copy_tables ();
2524 return true;
2527 /* Return true when PATH is a valid jump-thread path. */
2529 static bool
2530 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2532 unsigned len = path->length ();
2534 /* Check that the path is connected. */
2535 for (unsigned int j = 0; j < len - 1; j++)
2536 if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2537 return false;
2539 return true;
2542 /* Walk through all blocks and thread incoming edges to the appropriate
2543 outgoing edge for each edge pair recorded in THREADED_EDGES.
2545 It is the caller's responsibility to fix the dominance information
2546 and rewrite duplicated SSA_NAMEs back into SSA form.
2548 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2549 loop headers if it does not simplify the loop.
2551 Returns true if one or more edges were threaded, false otherwise. */
2553 bool
2554 thread_through_all_blocks (bool may_peel_loop_headers)
2556 bool retval = false;
2557 unsigned int i;
2558 bitmap_iterator bi;
2559 bitmap threaded_blocks;
2560 struct loop *loop;
2562 if (!paths.exists ())
2563 return false;
2565 threaded_blocks = BITMAP_ALLOC (NULL);
2566 memset (&thread_stats, 0, sizeof (thread_stats));
2568 /* Jump-thread all FSM threads before other jump-threads. */
2569 for (i = 0; i < paths.length ();)
2571 vec<jump_thread_edge *> *path = paths[i];
2572 edge entry = (*path)[0]->e;
2574 /* Only code-generate FSM jump-threads in this loop. */
2575 if ((*path)[0]->type != EDGE_FSM_THREAD)
2577 i++;
2578 continue;
2581 /* Do not jump-thread twice from the same block. */
2582 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2583 /* Verify that the jump thread path is still valid: a
2584 previous jump-thread may have changed the CFG, and
2585 invalidated the current path. */
2586 || !valid_jump_thread_path (path))
2588 /* Remove invalid FSM jump-thread paths. */
2589 delete_jump_thread_path (path);
2590 paths.unordered_remove (i);
2591 continue;
2594 unsigned len = path->length ();
2595 edge exit = (*path)[len - 1]->e;
2596 basic_block *region = XNEWVEC (basic_block, len - 1);
2598 for (unsigned int j = 0; j < len - 1; j++)
2599 region[j] = (*path)[j]->e->dest;
2601 if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2603 /* We do not update dominance info. */
2604 free_dominance_info (CDI_DOMINATORS);
2605 bitmap_set_bit (threaded_blocks, entry->src->index);
2606 retval = true;
2609 delete_jump_thread_path (path);
2610 paths.unordered_remove (i);
2613 /* Remove from PATHS all the jump-threads starting with an edge already
2614 jump-threaded. */
2615 for (i = 0; i < paths.length ();)
2617 vec<jump_thread_edge *> *path = paths[i];
2618 edge entry = (*path)[0]->e;
2620 /* Do not jump-thread twice from the same block. */
2621 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2623 delete_jump_thread_path (path);
2624 paths.unordered_remove (i);
2626 else
2627 i++;
2630 bitmap_clear (threaded_blocks);
2632 mark_threaded_blocks (threaded_blocks);
2634 initialize_original_copy_tables ();
2636 /* First perform the threading requests that do not affect
2637 loop structure. */
2638 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2640 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2642 if (EDGE_COUNT (bb->preds) > 0)
2643 retval |= thread_block (bb, true);
2646 /* Then perform the threading through loop headers. We start with the
2647 innermost loop, so that the changes in cfg we perform won't affect
2648 further threading. */
2649 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2651 if (!loop->header
2652 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2653 continue;
2655 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2658 /* Any jump threading paths that are still attached to edges at this
2659 point must be one of two cases.
2661 First, we could have a jump threading path which went from outside
2662 a loop to inside a loop that was ignored because a prior jump thread
2663 across a backedge was realized (which indirectly causes the loop
2664 above to ignore the latter thread). We can detect these because the
2665 loop structures will be different and we do not currently try to
2666 optimize this case.
2668 Second, we could be threading across a backedge to a point within the
2669 same loop. This occurrs for the FSA/FSM optimization and we would
2670 like to optimize it. However, we have to be very careful as this
2671 may completely scramble the loop structures, with the result being
2672 irreducible loops causing us to throw away our loop structure.
2674 As a compromise for the latter case, if the thread path ends in
2675 a block where the last statement is a multiway branch, then go
2676 ahead and thread it, else ignore it. */
2677 basic_block bb;
2678 edge e;
2679 FOR_EACH_BB_FN (bb, cfun)
2681 /* If we do end up threading here, we can remove elements from
2682 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2683 for (edge_iterator ei = ei_start (bb->preds);
2684 (e = ei_safe_edge (ei));)
2685 if (e->aux)
2687 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2689 /* Case 1, threading from outside to inside the loop
2690 after we'd already threaded through the header. */
2691 if ((*path)[0]->e->dest->loop_father
2692 != path->last ()->e->src->loop_father)
2694 delete_jump_thread_path (path);
2695 e->aux = NULL;
2696 ei_next (&ei);
2698 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2700 /* The code to thread through loop headers may have
2701 split a block with jump threads attached to it.
2703 We can identify this with a disjoint jump threading
2704 path. If found, just remove it. */
2705 for (unsigned int i = 0; i < path->length () - 1; i++)
2706 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2708 delete_jump_thread_path (path);
2709 e->aux = NULL;
2710 ei_next (&ei);
2711 break;
2714 /* Our path is still valid, thread it. */
2715 if (e->aux)
2717 if (thread_block ((*path)[0]->e->dest, false))
2718 e->aux = NULL;
2719 else
2721 delete_jump_thread_path (path);
2722 e->aux = NULL;
2723 ei_next (&ei);
2727 else
2729 delete_jump_thread_path (path);
2730 e->aux = NULL;
2731 ei_next (&ei);
2734 else
2735 ei_next (&ei);
2738 statistics_counter_event (cfun, "Jumps threaded",
2739 thread_stats.num_threaded_edges);
2741 free_original_copy_tables ();
2743 BITMAP_FREE (threaded_blocks);
2744 threaded_blocks = NULL;
2745 paths.release ();
2747 if (retval)
2748 loops_state_set (LOOPS_NEED_FIXUP);
2750 return retval;
2753 /* Delete the jump threading path PATH. We have to explcitly delete
2754 each entry in the vector, then the container. */
2756 void
2757 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2759 for (unsigned int i = 0; i < path->length (); i++)
2760 delete (*path)[i];
2761 path->release();
2762 delete path;
2765 /* Register a jump threading opportunity. We queue up all the jump
2766 threading opportunities discovered by a pass and update the CFG
2767 and SSA form all at once.
2769 E is the edge we can thread, E2 is the new target edge, i.e., we
2770 are effectively recording that E->dest can be changed to E2->dest
2771 after fixing the SSA graph. */
2773 void
2774 register_jump_thread (vec<jump_thread_edge *> *path)
2776 if (!dbg_cnt (registered_jump_thread))
2778 delete_jump_thread_path (path);
2779 return;
2782 /* First make sure there are no NULL outgoing edges on the jump threading
2783 path. That can happen for jumping to a constant address. */
2784 for (unsigned int i = 0; i < path->length (); i++)
2785 if ((*path)[i]->e == NULL)
2787 if (dump_file && (dump_flags & TDF_DETAILS))
2789 fprintf (dump_file,
2790 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2791 dump_jump_thread_path (dump_file, *path, false);
2794 delete_jump_thread_path (path);
2795 return;
2798 if (dump_file && (dump_flags & TDF_DETAILS))
2799 dump_jump_thread_path (dump_file, *path, true);
2801 if (!paths.exists ())
2802 paths.create (5);
2804 paths.safe_push (path);