Fix bootstrap/PR63632
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blobd2cf4de3c6aa187f8baadff5b2cce46f8be25587
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2014 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tree.h"
24 #include "flags.h"
25 #include "basic-block.h"
26 #include "hashtab.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "machmode.h"
30 #include "tm.h"
31 #include "hard-reg-set.h"
32 #include "input.h"
33 #include "function.h"
34 #include "hash-table.h"
35 #include "tree-ssa-alias.h"
36 #include "internal-fn.h"
37 #include "gimple-expr.h"
38 #include "is-a.h"
39 #include "gimple.h"
40 #include "gimple-iterator.h"
41 #include "gimple-ssa.h"
42 #include "tree-phinodes.h"
43 #include "tree-ssa.h"
44 #include "tree-ssa-threadupdate.h"
45 #include "ssa-iterators.h"
46 #include "dumpfile.h"
47 #include "cfgloop.h"
48 #include "dbgcnt.h"
49 #include "tree-cfg.h"
50 #include "tree-pass.h"
52 /* Given a block B, update the CFG and SSA graph to reflect redirecting
53 one or more in-edges to B to instead reach the destination of an
54 out-edge from B while preserving any side effects in B.
56 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
57 side effects of executing B.
59 1. Make a copy of B (including its outgoing edges and statements). Call
60 the copy B'. Note B' has no incoming edges or PHIs at this time.
62 2. Remove the control statement at the end of B' and all outgoing edges
63 except B'->C.
65 3. Add a new argument to each PHI in C with the same value as the existing
66 argument associated with edge B->C. Associate the new PHI arguments
67 with the edge B'->C.
69 4. For each PHI in B, find or create a PHI in B' with an identical
70 PHI_RESULT. Add an argument to the PHI in B' which has the same
71 value as the PHI in B associated with the edge A->B. Associate
72 the new argument in the PHI in B' with the edge A->B.
74 5. Change the edge A->B to A->B'.
76 5a. This automatically deletes any PHI arguments associated with the
77 edge A->B in B.
79 5b. This automatically associates each new argument added in step 4
80 with the edge A->B'.
82 6. Repeat for other incoming edges into B.
84 7. Put the duplicated resources in B and all the B' blocks into SSA form.
86 Note that block duplication can be minimized by first collecting the
87 set of unique destination blocks that the incoming edges should
88 be threaded to.
90 We reduce the number of edges and statements we create by not copying all
91 the outgoing edges and the control statement in step #1. We instead create
92 a template block without the outgoing edges and duplicate the template.
94 Another case this code handles is threading through a "joiner" block. In
95 this case, we do not know the destination of the joiner block, but one
96 of the outgoing edges from the joiner block leads to a threadable path. This
97 case largely works as outlined above, except the duplicate of the joiner
98 block still contains a full set of outgoing edges and its control statement.
99 We just redirect one of its outgoing edges to our jump threading path. */
102 /* Steps #5 and #6 of the above algorithm are best implemented by walking
103 all the incoming edges which thread to the same destination edge at
104 the same time. That avoids lots of table lookups to get information
105 for the destination edge.
107 To realize that implementation we create a list of incoming edges
108 which thread to the same outgoing edge. Thus to implement steps
109 #5 and #6 we traverse our hash table of outgoing edge information.
110 For each entry we walk the list of incoming edges which thread to
111 the current outgoing edge. */
113 struct el
115 edge e;
116 struct el *next;
119 /* Main data structure recording information regarding B's duplicate
120 blocks. */
122 /* We need to efficiently record the unique thread destinations of this
123 block and specific information associated with those destinations. We
124 may have many incoming edges threaded to the same outgoing edge. This
125 can be naturally implemented with a hash table. */
127 struct redirection_data : typed_free_remove<redirection_data>
129 /* We support wiring up two block duplicates in a jump threading path.
131 One is a normal block copy where we remove the control statement
132 and wire up its single remaining outgoing edge to the thread path.
134 The other is a joiner block where we leave the control statement
135 in place, but wire one of the outgoing edges to a thread path.
137 In theory we could have multiple block duplicates in a jump
138 threading path, but I haven't tried that.
140 The duplicate blocks appear in this array in the same order in
141 which they appear in the jump thread path. */
142 basic_block dup_blocks[2];
144 /* The jump threading path. */
145 vec<jump_thread_edge *> *path;
147 /* A list of incoming edges which we want to thread to the
148 same path. */
149 struct el *incoming_edges;
151 /* hash_table support. */
152 typedef redirection_data value_type;
153 typedef redirection_data compare_type;
154 static inline hashval_t hash (const value_type *);
155 static inline int equal (const value_type *, const compare_type *);
158 /* Dump a jump threading path, including annotations about each
159 edge in the path. */
161 static void
162 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
163 bool registering)
165 fprintf (dump_file,
166 " %s jump thread: (%d, %d) incoming edge; ",
167 (registering ? "Registering" : "Cancelling"),
168 path[0]->e->src->index, path[0]->e->dest->index);
170 for (unsigned int i = 1; i < path.length (); i++)
172 /* We can get paths with a NULL edge when the final destination
173 of a jump thread turns out to be a constant address. We dump
174 those paths when debugging, so we have to be prepared for that
175 possibility here. */
176 if (path[i]->e == NULL)
177 continue;
179 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
180 fprintf (dump_file, " (%d, %d) joiner; ",
181 path[i]->e->src->index, path[i]->e->dest->index);
182 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
183 fprintf (dump_file, " (%d, %d) normal;",
184 path[i]->e->src->index, path[i]->e->dest->index);
185 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
186 fprintf (dump_file, " (%d, %d) nocopy;",
187 path[i]->e->src->index, path[i]->e->dest->index);
189 fputc ('\n', dump_file);
192 /* Simple hashing function. For any given incoming edge E, we're going
193 to be most concerned with the final destination of its jump thread
194 path. So hash on the block index of the final edge in the path. */
196 inline hashval_t
197 redirection_data::hash (const value_type *p)
199 vec<jump_thread_edge *> *path = p->path;
200 return path->last ()->e->dest->index;
203 /* Given two hash table entries, return true if they have the same
204 jump threading path. */
205 inline int
206 redirection_data::equal (const value_type *p1, const compare_type *p2)
208 vec<jump_thread_edge *> *path1 = p1->path;
209 vec<jump_thread_edge *> *path2 = p2->path;
211 if (path1->length () != path2->length ())
212 return false;
214 for (unsigned int i = 1; i < path1->length (); i++)
216 if ((*path1)[i]->type != (*path2)[i]->type
217 || (*path1)[i]->e != (*path2)[i]->e)
218 return false;
221 return true;
224 /* Data structure of information to pass to hash table traversal routines. */
225 struct ssa_local_info_t
227 /* The current block we are working on. */
228 basic_block bb;
230 /* We only create a template block for the first duplicated block in a
231 jump threading path as we may need many duplicates of that block.
233 The second duplicate block in a path is specific to that path. Creating
234 and sharing a template for that block is considerably more difficult. */
235 basic_block template_block;
237 /* TRUE if we thread one or more jumps, FALSE otherwise. */
238 bool jumps_threaded;
240 /* Blocks duplicated for the thread. */
241 bitmap duplicate_blocks;
244 /* Passes which use the jump threading code register jump threading
245 opportunities as they are discovered. We keep the registered
246 jump threading opportunities in this vector as edge pairs
247 (original_edge, target_edge). */
248 static vec<vec<jump_thread_edge *> *> paths;
250 /* When we start updating the CFG for threading, data necessary for jump
251 threading is attached to the AUX field for the incoming edge. Use these
252 macros to access the underlying structure attached to the AUX field. */
253 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
255 /* Jump threading statistics. */
257 struct thread_stats_d
259 unsigned long num_threaded_edges;
262 struct thread_stats_d thread_stats;
265 /* Remove the last statement in block BB if it is a control statement
266 Also remove all outgoing edges except the edge which reaches DEST_BB.
267 If DEST_BB is NULL, then remove all outgoing edges. */
269 static void
270 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
272 gimple_stmt_iterator gsi;
273 edge e;
274 edge_iterator ei;
276 gsi = gsi_last_bb (bb);
278 /* If the duplicate ends with a control statement, then remove it.
280 Note that if we are duplicating the template block rather than the
281 original basic block, then the duplicate might not have any real
282 statements in it. */
283 if (!gsi_end_p (gsi)
284 && gsi_stmt (gsi)
285 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
286 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
287 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
288 gsi_remove (&gsi, true);
290 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
292 if (e->dest != dest_bb)
293 remove_edge (e);
294 else
295 ei_next (&ei);
299 /* Create a duplicate of BB. Record the duplicate block in an array
300 indexed by COUNT stored in RD. */
302 static void
303 create_block_for_threading (basic_block bb,
304 struct redirection_data *rd,
305 unsigned int count,
306 bitmap *duplicate_blocks)
308 edge_iterator ei;
309 edge e;
311 /* We can use the generic block duplication code and simply remove
312 the stuff we do not need. */
313 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
315 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
316 e->aux = NULL;
318 /* Zero out the profile, since the block is unreachable for now. */
319 rd->dup_blocks[count]->frequency = 0;
320 rd->dup_blocks[count]->count = 0;
321 if (duplicate_blocks)
322 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
325 /* Main data structure to hold information for duplicates of BB. */
327 static hash_table<redirection_data> *redirection_data;
329 /* Given an outgoing edge E lookup and return its entry in our hash table.
331 If INSERT is true, then we insert the entry into the hash table if
332 it is not already present. INCOMING_EDGE is added to the list of incoming
333 edges associated with E in the hash table. */
335 static struct redirection_data *
336 lookup_redirection_data (edge e, enum insert_option insert)
338 struct redirection_data **slot;
339 struct redirection_data *elt;
340 vec<jump_thread_edge *> *path = THREAD_PATH (e);
342 /* Build a hash table element so we can see if E is already
343 in the table. */
344 elt = XNEW (struct redirection_data);
345 elt->path = path;
346 elt->dup_blocks[0] = NULL;
347 elt->dup_blocks[1] = NULL;
348 elt->incoming_edges = NULL;
350 slot = redirection_data->find_slot (elt, insert);
352 /* This will only happen if INSERT is false and the entry is not
353 in the hash table. */
354 if (slot == NULL)
356 free (elt);
357 return NULL;
360 /* This will only happen if E was not in the hash table and
361 INSERT is true. */
362 if (*slot == NULL)
364 *slot = elt;
365 elt->incoming_edges = XNEW (struct el);
366 elt->incoming_edges->e = e;
367 elt->incoming_edges->next = NULL;
368 return elt;
370 /* E was in the hash table. */
371 else
373 /* Free ELT as we do not need it anymore, we will extract the
374 relevant entry from the hash table itself. */
375 free (elt);
377 /* Get the entry stored in the hash table. */
378 elt = *slot;
380 /* If insertion was requested, then we need to add INCOMING_EDGE
381 to the list of incoming edges associated with E. */
382 if (insert)
384 struct el *el = XNEW (struct el);
385 el->next = elt->incoming_edges;
386 el->e = e;
387 elt->incoming_edges = el;
390 return elt;
394 /* Similar to copy_phi_args, except that the PHI arg exists, it just
395 does not have a value associated with it. */
397 static void
398 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
400 int src_idx = src_e->dest_idx;
401 int tgt_idx = tgt_e->dest_idx;
403 /* Iterate over each PHI in e->dest. */
404 for (gimple_stmt_iterator gsi = gsi_start_phis (src_e->dest),
405 gsi2 = gsi_start_phis (tgt_e->dest);
406 !gsi_end_p (gsi);
407 gsi_next (&gsi), gsi_next (&gsi2))
409 gimple src_phi = gsi_stmt (gsi);
410 gimple dest_phi = gsi_stmt (gsi2);
411 tree val = gimple_phi_arg_def (src_phi, src_idx);
412 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
414 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
415 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
419 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
420 to see if it has constant value in a flow sensitive manner. Set
421 LOCUS to location of the constant phi arg and return the value.
422 Return DEF directly if either PATH or idx is ZERO. */
424 static tree
425 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
426 basic_block bb, int idx, source_location *locus)
428 tree arg;
429 gimple def_phi;
430 basic_block def_bb;
432 if (path == NULL || idx == 0)
433 return def;
435 def_phi = SSA_NAME_DEF_STMT (def);
436 if (gimple_code (def_phi) != GIMPLE_PHI)
437 return def;
439 def_bb = gimple_bb (def_phi);
440 /* Don't propagate loop invariants into deeper loops. */
441 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
442 return def;
444 /* Backtrack jump threading path from IDX to see if def has constant
445 value. */
446 for (int j = idx - 1; j >= 0; j--)
448 edge e = (*path)[j]->e;
449 if (e->dest == def_bb)
451 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
452 if (is_gimple_min_invariant (arg))
454 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
455 return arg;
457 break;
461 return def;
464 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
465 Try to backtrack jump threading PATH from node IDX to see if the arg
466 has constant value, copy constant value instead of argument itself
467 if yes. */
469 static void
470 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
471 vec<jump_thread_edge *> *path, int idx)
473 gimple_stmt_iterator gsi;
474 int src_indx = src_e->dest_idx;
476 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
478 gimple phi = gsi_stmt (gsi);
479 tree def = gimple_phi_arg_def (phi, src_indx);
480 source_location locus = gimple_phi_arg_location (phi, src_indx);
482 if (TREE_CODE (def) == SSA_NAME
483 && !virtual_operand_p (gimple_phi_result (phi)))
484 def = get_value_locus_in_path (def, path, bb, idx, &locus);
486 add_phi_arg (phi, def, tgt_e, locus);
490 /* We have recently made a copy of ORIG_BB, including its outgoing
491 edges. The copy is NEW_BB. Every PHI node in every direct successor of
492 ORIG_BB has a new argument associated with edge from NEW_BB to the
493 successor. Initialize the PHI argument so that it is equal to the PHI
494 argument associated with the edge from ORIG_BB to the successor.
495 PATH and IDX are used to check if the new PHI argument has constant
496 value in a flow sensitive manner. */
498 static void
499 update_destination_phis (basic_block orig_bb, basic_block new_bb,
500 vec<jump_thread_edge *> *path, int idx)
502 edge_iterator ei;
503 edge e;
505 FOR_EACH_EDGE (e, ei, orig_bb->succs)
507 edge e2 = find_edge (new_bb, e->dest);
508 copy_phi_args (e->dest, e, e2, path, idx);
512 /* Given a duplicate block and its single destination (both stored
513 in RD). Create an edge between the duplicate and its single
514 destination.
516 Add an additional argument to any PHI nodes at the single
517 destination. IDX is the start node in jump threading path
518 we start to check to see if the new PHI argument has constant
519 value along the jump threading path. */
521 static void
522 create_edge_and_update_destination_phis (struct redirection_data *rd,
523 basic_block bb, int idx)
525 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
527 rescan_loop_exit (e, true, false);
528 e->probability = REG_BR_PROB_BASE;
529 e->count = bb->count;
531 /* We used to copy the thread path here. That was added in 2007
532 and dutifully updated through the representation changes in 2013.
534 In 2013 we added code to thread from an interior node through
535 the backedge to another interior node. That runs after the code
536 to thread through loop headers from outside the loop.
538 The latter may delete edges in the CFG, including those
539 which appeared in the jump threading path we copied here. Thus
540 we'd end up using a dangling pointer.
542 After reviewing the 2007/2011 code, I can't see how anything
543 depended on copying the AUX field and clearly copying the jump
544 threading path is problematical due to embedded edge pointers.
545 It has been removed. */
546 e->aux = NULL;
548 /* If there are any PHI nodes at the destination of the outgoing edge
549 from the duplicate block, then we will need to add a new argument
550 to them. The argument should have the same value as the argument
551 associated with the outgoing edge stored in RD. */
552 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
555 /* Look through PATH beginning at START and return TRUE if there are
556 any additional blocks that need to be duplicated. Otherwise,
557 return FALSE. */
558 static bool
559 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
560 unsigned int start)
562 for (unsigned int i = start + 1; i < path->length (); i++)
564 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
565 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
566 return true;
568 return false;
572 /* Compute the amount of profile count/frequency coming into the jump threading
573 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
574 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
575 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
576 identify blocks duplicated for jump threading, which have duplicated
577 edges that need to be ignored in the analysis. Return true if path contains
578 a joiner, false otherwise.
580 In the non-joiner case, this is straightforward - all the counts/frequency
581 flowing into the jump threading path should flow through the duplicated
582 block and out of the duplicated path.
584 In the joiner case, it is very tricky. Some of the counts flowing into
585 the original path go offpath at the joiner. The problem is that while
586 we know how much total count goes off-path in the original control flow,
587 we don't know how many of the counts corresponding to just the jump
588 threading path go offpath at the joiner.
590 For example, assume we have the following control flow and identified
591 jump threading paths:
593 A B C
594 \ | /
595 Ea \ |Eb / Ec
596 \ | /
597 v v v
598 J <-- Joiner
600 Eoff/ \Eon
603 Soff Son <--- Normal
605 Ed/ \ Ee
610 Jump threading paths: A -> J -> Son -> D (path 1)
611 C -> J -> Son -> E (path 2)
613 Note that the control flow could be more complicated:
614 - Each jump threading path may have more than one incoming edge. I.e. A and
615 Ea could represent multiple incoming blocks/edges that are included in
616 path 1.
617 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
618 before or after the "normal" copy block). These are not duplicated onto
619 the jump threading path, as they are single-successor.
620 - Any of the blocks along the path may have other incoming edges that
621 are not part of any jump threading path, but add profile counts along
622 the path.
624 In the aboe example, after all jump threading is complete, we will
625 end up with the following control flow:
627 A B C
628 | | |
629 Ea| |Eb |Ec
630 | | |
631 v v v
632 Ja J Jc
633 / \ / \Eon' / \
634 Eona/ \ ---/---\-------- \Eonc
635 / \ / / \ \
636 v v v v v
637 Sona Soff Son Sonc
638 \ /\ /
639 \___________ / \ _____/
640 \ / \/
641 vv v
644 The main issue to notice here is that when we are processing path 1
645 (A->J->Son->D) we need to figure out the outgoing edge weights to
646 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
647 sum of the incoming weights to D remain Ed. The problem with simply
648 assuming that Ja (and Jc when processing path 2) has the same outgoing
649 probabilities to its successors as the original block J, is that after
650 all paths are processed and other edges/counts removed (e.g. none
651 of Ec will reach D after processing path 2), we may end up with not
652 enough count flowing along duplicated edge Sona->D.
654 Therefore, in the case of a joiner, we keep track of all counts
655 coming in along the current path, as well as from predecessors not
656 on any jump threading path (Eb in the above example). While we
657 first assume that the duplicated Eona for Ja->Sona has the same
658 probability as the original, we later compensate for other jump
659 threading paths that may eliminate edges. We do that by keep track
660 of all counts coming into the original path that are not in a jump
661 thread (Eb in the above example, but as noted earlier, there could
662 be other predecessors incoming to the path at various points, such
663 as at Son). Call this cumulative non-path count coming into the path
664 before D as Enonpath. We then ensure that the count from Sona->D is as at
665 least as big as (Ed - Enonpath), but no bigger than the minimum
666 weight along the jump threading path. The probabilities of both the
667 original and duplicated joiner block J and Ja will be adjusted
668 accordingly after the updates. */
670 static bool
671 compute_path_counts (struct redirection_data *rd,
672 ssa_local_info_t *local_info,
673 gcov_type *path_in_count_ptr,
674 gcov_type *path_out_count_ptr,
675 int *path_in_freq_ptr)
677 edge e = rd->incoming_edges->e;
678 vec<jump_thread_edge *> *path = THREAD_PATH (e);
679 edge elast = path->last ()->e;
680 gcov_type nonpath_count = 0;
681 bool has_joiner = false;
682 gcov_type path_in_count = 0;
683 int path_in_freq = 0;
685 /* Start by accumulating incoming edge counts to the path's first bb
686 into a couple buckets:
687 path_in_count: total count of incoming edges that flow into the
688 current path.
689 nonpath_count: total count of incoming edges that are not
690 flowing along *any* path. These are the counts
691 that will still flow along the original path after
692 all path duplication is done by potentially multiple
693 calls to this routine.
694 (any other incoming edge counts are for a different jump threading
695 path that will be handled by a later call to this routine.)
696 To make this easier, start by recording all incoming edges that flow into
697 the current path in a bitmap. We could add up the path's incoming edge
698 counts here, but we still need to walk all the first bb's incoming edges
699 below to add up the counts of the other edges not included in this jump
700 threading path. */
701 struct el *next, *el;
702 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
703 for (el = rd->incoming_edges; el; el = next)
705 next = el->next;
706 bitmap_set_bit (in_edge_srcs, el->e->src->index);
708 edge ein;
709 edge_iterator ei;
710 FOR_EACH_EDGE (ein, ei, e->dest->preds)
712 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
713 /* Simply check the incoming edge src against the set captured above. */
714 if (ein_path
715 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
717 /* It is necessary but not sufficient that the last path edges
718 are identical. There may be different paths that share the
719 same last path edge in the case where the last edge has a nocopy
720 source block. */
721 gcc_assert (ein_path->last ()->e == elast);
722 path_in_count += ein->count;
723 path_in_freq += EDGE_FREQUENCY (ein);
725 else if (!ein_path)
727 /* Keep track of the incoming edges that are not on any jump-threading
728 path. These counts will still flow out of original path after all
729 jump threading is complete. */
730 nonpath_count += ein->count;
733 BITMAP_FREE (in_edge_srcs);
735 /* Now compute the fraction of the total count coming into the first
736 path bb that is from the current threading path. */
737 gcov_type total_count = e->dest->count;
738 /* Handle incoming profile insanities. */
739 if (total_count < path_in_count)
740 path_in_count = total_count;
741 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
743 /* Walk the entire path to do some more computation in order to estimate
744 how much of the path_in_count will flow out of the duplicated threading
745 path. In the non-joiner case this is straightforward (it should be
746 the same as path_in_count, although we will handle incoming profile
747 insanities by setting it equal to the minimum count along the path).
749 In the joiner case, we need to estimate how much of the path_in_count
750 will stay on the threading path after the joiner's conditional branch.
751 We don't really know for sure how much of the counts
752 associated with this path go to each successor of the joiner, but we'll
753 estimate based on the fraction of the total count coming into the path
754 bb was from the threading paths (computed above in onpath_scale).
755 Afterwards, we will need to do some fixup to account for other threading
756 paths and possible profile insanities.
758 In order to estimate the joiner case's counts we also need to update
759 nonpath_count with any additional counts coming into the path. Other
760 blocks along the path may have additional predecessors from outside
761 the path. */
762 gcov_type path_out_count = path_in_count;
763 gcov_type min_path_count = path_in_count;
764 for (unsigned int i = 1; i < path->length (); i++)
766 edge epath = (*path)[i]->e;
767 gcov_type cur_count = epath->count;
768 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
770 has_joiner = true;
771 cur_count = apply_probability (cur_count, onpath_scale);
773 /* In the joiner case we need to update nonpath_count for any edges
774 coming into the path that will contribute to the count flowing
775 into the path successor. */
776 if (has_joiner && epath != elast)
778 /* Look for other incoming edges after joiner. */
779 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
781 if (ein != epath
782 /* Ignore in edges from blocks we have duplicated for a
783 threading path, which have duplicated edge counts until
784 they are redirected by an invocation of this routine. */
785 && !bitmap_bit_p (local_info->duplicate_blocks,
786 ein->src->index))
787 nonpath_count += ein->count;
790 if (cur_count < path_out_count)
791 path_out_count = cur_count;
792 if (epath->count < min_path_count)
793 min_path_count = epath->count;
796 /* We computed path_out_count above assuming that this path targeted
797 the joiner's on-path successor with the same likelihood as it
798 reached the joiner. However, other thread paths through the joiner
799 may take a different path through the normal copy source block
800 (i.e. they have a different elast), meaning that they do not
801 contribute any counts to this path's elast. As a result, it may
802 turn out that this path must have more count flowing to the on-path
803 successor of the joiner. Essentially, all of this path's elast
804 count must be contributed by this path and any nonpath counts
805 (since any path through the joiner with a different elast will not
806 include a copy of this elast in its duplicated path).
807 So ensure that this path's path_out_count is at least the
808 difference between elast->count and nonpath_count. Otherwise the edge
809 counts after threading will not be sane. */
810 if (has_joiner && path_out_count < elast->count - nonpath_count)
812 path_out_count = elast->count - nonpath_count;
813 /* But neither can we go above the minimum count along the path
814 we are duplicating. This can be an issue due to profile
815 insanities coming in to this pass. */
816 if (path_out_count > min_path_count)
817 path_out_count = min_path_count;
820 *path_in_count_ptr = path_in_count;
821 *path_out_count_ptr = path_out_count;
822 *path_in_freq_ptr = path_in_freq;
823 return has_joiner;
827 /* Update the counts and frequencies for both an original path
828 edge EPATH and its duplicate EDUP. The duplicate source block
829 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
830 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
831 static void
832 update_profile (edge epath, edge edup, gcov_type path_in_count,
833 gcov_type path_out_count, int path_in_freq)
836 /* First update the duplicated block's count / frequency. */
837 if (edup)
839 basic_block dup_block = edup->src;
840 gcc_assert (dup_block->count == 0);
841 gcc_assert (dup_block->frequency == 0);
842 dup_block->count = path_in_count;
843 dup_block->frequency = path_in_freq;
846 /* Now update the original block's count and frequency in the
847 opposite manner - remove the counts/freq that will flow
848 into the duplicated block. Handle underflow due to precision/
849 rounding issues. */
850 epath->src->count -= path_in_count;
851 if (epath->src->count < 0)
852 epath->src->count = 0;
853 epath->src->frequency -= path_in_freq;
854 if (epath->src->frequency < 0)
855 epath->src->frequency = 0;
857 /* Next update this path edge's original and duplicated counts. We know
858 that the duplicated path will have path_out_count flowing
859 out of it (in the joiner case this is the count along the duplicated path
860 out of the duplicated joiner). This count can then be removed from the
861 original path edge. */
862 if (edup)
863 edup->count = path_out_count;
864 epath->count -= path_out_count;
865 gcc_assert (epath->count >= 0);
869 /* The duplicate and original joiner blocks may end up with different
870 probabilities (different from both the original and from each other).
871 Recompute the probabilities here once we have updated the edge
872 counts and frequencies. */
874 static void
875 recompute_probabilities (basic_block bb)
877 edge esucc;
878 edge_iterator ei;
879 FOR_EACH_EDGE (esucc, ei, bb->succs)
881 if (!bb->count)
882 continue;
884 /* Prevent overflow computation due to insane profiles. */
885 if (esucc->count < bb->count)
886 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
887 bb->count);
888 else
889 /* Can happen with missing/guessed probabilities, since we
890 may determine that more is flowing along duplicated
891 path than joiner succ probabilities allowed.
892 Counts and freqs will be insane after jump threading,
893 at least make sure probability is sane or we will
894 get a flow verification error.
895 Not much we can do to make counts/freqs sane without
896 redoing the profile estimation. */
897 esucc->probability = REG_BR_PROB_BASE;
902 /* Update the counts of the original and duplicated edges from a joiner
903 that go off path, given that we have already determined that the
904 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
905 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
906 edge from joiner is EPATH. */
908 static void
909 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
910 gcov_type path_in_count,
911 gcov_type path_out_count)
913 /* Compute the count that currently flows off path from the joiner.
914 In other words, the total count of joiner's out edges other than
915 epath. Compute this by walking the successors instead of
916 subtracting epath's count from the joiner bb count, since there
917 are sometimes slight insanities where the total out edge count is
918 larger than the bb count (possibly due to rounding/truncation
919 errors). */
920 gcov_type total_orig_off_path_count = 0;
921 edge enonpath;
922 edge_iterator ei;
923 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
925 if (enonpath == epath)
926 continue;
927 total_orig_off_path_count += enonpath->count;
930 /* For the path that we are duplicating, the amount that will flow
931 off path from the duplicated joiner is the delta between the
932 path's cumulative in count and the portion of that count we
933 estimated above as flowing from the joiner along the duplicated
934 path. */
935 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
937 /* Now do the actual updates of the off-path edges. */
938 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
940 /* Look for edges going off of the threading path. */
941 if (enonpath == epath)
942 continue;
944 /* Find the corresponding edge out of the duplicated joiner. */
945 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
946 gcc_assert (enonpathdup);
948 /* We can't use the original probability of the joiner's out
949 edges, since the probabilities of the original branch
950 and the duplicated branches may vary after all threading is
951 complete. But apportion the duplicated joiner's off-path
952 total edge count computed earlier (total_dup_off_path_count)
953 among the duplicated off-path edges based on their original
954 ratio to the full off-path count (total_orig_off_path_count).
956 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
957 total_orig_off_path_count);
958 /* Give the duplicated offpath edge a portion of the duplicated
959 total. */
960 enonpathdup->count = apply_scale (scale,
961 total_dup_off_path_count);
962 /* Now update the original offpath edge count, handling underflow
963 due to rounding errors. */
964 enonpath->count -= enonpathdup->count;
965 if (enonpath->count < 0)
966 enonpath->count = 0;
971 /* Check if the paths through RD all have estimated frequencies but zero
972 profile counts. This is more accurate than checking the entry block
973 for a zero profile count, since profile insanities sometimes creep in. */
975 static bool
976 estimated_freqs_path (struct redirection_data *rd)
978 edge e = rd->incoming_edges->e;
979 vec<jump_thread_edge *> *path = THREAD_PATH (e);
980 edge ein;
981 edge_iterator ei;
982 bool non_zero_freq = false;
983 FOR_EACH_EDGE (ein, ei, e->dest->preds)
985 if (ein->count)
986 return false;
987 non_zero_freq |= ein->src->frequency != 0;
990 for (unsigned int i = 1; i < path->length (); i++)
992 edge epath = (*path)[i]->e;
993 if (epath->src->count)
994 return false;
995 non_zero_freq |= epath->src->frequency != 0;
996 edge esucc;
997 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
999 if (esucc->count)
1000 return false;
1001 non_zero_freq |= esucc->src->frequency != 0;
1004 return non_zero_freq;
1008 /* Invoked for routines that have guessed frequencies and no profile
1009 counts to record the block and edge frequencies for paths through RD
1010 in the profile count fields of those blocks and edges. This is because
1011 ssa_fix_duplicate_block_edges incrementally updates the block and
1012 edge counts as edges are redirected, and it is difficult to do that
1013 for edge frequencies which are computed on the fly from the source
1014 block frequency and probability. When a block frequency is updated
1015 its outgoing edge frequencies are affected and become difficult to
1016 adjust. */
1018 static void
1019 freqs_to_counts_path (struct redirection_data *rd)
1021 edge e = rd->incoming_edges->e;
1022 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1023 edge ein;
1024 edge_iterator ei;
1025 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1027 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1028 errors applying the probability when the frequencies are very
1029 small. */
1030 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1031 ein->probability);
1034 for (unsigned int i = 1; i < path->length (); i++)
1036 edge epath = (*path)[i]->e;
1037 edge esucc;
1038 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1039 errors applying the edge probability when the frequencies are very
1040 small. */
1041 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1042 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1043 esucc->count = apply_probability (esucc->src->count,
1044 esucc->probability);
1049 /* For routines that have guessed frequencies and no profile counts, where we
1050 used freqs_to_counts_path to record block and edge frequencies for paths
1051 through RD, we clear the counts after completing all updates for RD.
1052 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1053 but the block frequencies and edge probabilities were updated as well,
1054 so we can simply clear the count fields. */
1056 static void
1057 clear_counts_path (struct redirection_data *rd)
1059 edge e = rd->incoming_edges->e;
1060 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1061 edge ein, esucc;
1062 edge_iterator ei;
1063 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1064 ein->count = 0;
1066 /* First clear counts along original path. */
1067 for (unsigned int i = 1; i < path->length (); i++)
1069 edge epath = (*path)[i]->e;
1070 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1071 esucc->count = 0;
1072 epath->src->count = 0;
1074 /* Also need to clear the counts along duplicated path. */
1075 for (unsigned int i = 0; i < 2; i++)
1077 basic_block dup = rd->dup_blocks[i];
1078 if (!dup)
1079 continue;
1080 FOR_EACH_EDGE (esucc, ei, dup->succs)
1081 esucc->count = 0;
1082 dup->count = 0;
1086 /* Wire up the outgoing edges from the duplicate blocks and
1087 update any PHIs as needed. Also update the profile counts
1088 on the original and duplicate blocks and edges. */
1089 void
1090 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1091 ssa_local_info_t *local_info)
1093 bool multi_incomings = (rd->incoming_edges->next != NULL);
1094 edge e = rd->incoming_edges->e;
1095 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1096 edge elast = path->last ()->e;
1097 gcov_type path_in_count = 0;
1098 gcov_type path_out_count = 0;
1099 int path_in_freq = 0;
1101 /* This routine updates profile counts, frequencies, and probabilities
1102 incrementally. Since it is difficult to do the incremental updates
1103 using frequencies/probabilities alone, for routines without profile
1104 data we first take a snapshot of the existing block and edge frequencies
1105 by copying them into the empty profile count fields. These counts are
1106 then used to do the incremental updates, and cleared at the end of this
1107 routine. If the function is marked as having a profile, we still check
1108 to see if the paths through RD are using estimated frequencies because
1109 the routine had zero profile counts. */
1110 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1111 || estimated_freqs_path (rd));
1112 if (do_freqs_to_counts)
1113 freqs_to_counts_path (rd);
1115 /* First determine how much profile count to move from original
1116 path to the duplicate path. This is tricky in the presence of
1117 a joiner (see comments for compute_path_counts), where some portion
1118 of the path's counts will flow off-path from the joiner. In the
1119 non-joiner case the path_in_count and path_out_count should be the
1120 same. */
1121 bool has_joiner = compute_path_counts (rd, local_info,
1122 &path_in_count, &path_out_count,
1123 &path_in_freq);
1125 int cur_path_freq = path_in_freq;
1126 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1128 edge epath = (*path)[i]->e;
1130 /* If we were threading through an joiner block, then we want
1131 to keep its control statement and redirect an outgoing edge.
1132 Else we want to remove the control statement & edges, then create
1133 a new outgoing edge. In both cases we may need to update PHIs. */
1134 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1136 edge victim;
1137 edge e2;
1139 gcc_assert (has_joiner);
1141 /* This updates the PHIs at the destination of the duplicate
1142 block. Pass 0 instead of i if we are threading a path which
1143 has multiple incoming edges. */
1144 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1145 path, multi_incomings ? 0 : i);
1147 /* Find the edge from the duplicate block to the block we're
1148 threading through. That's the edge we want to redirect. */
1149 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1151 /* If there are no remaining blocks on the path to duplicate,
1152 then redirect VICTIM to the final destination of the jump
1153 threading path. */
1154 if (!any_remaining_duplicated_blocks (path, i))
1156 e2 = redirect_edge_and_branch (victim, elast->dest);
1157 /* If we redirected the edge, then we need to copy PHI arguments
1158 at the target. If the edge already existed (e2 != victim
1159 case), then the PHIs in the target already have the correct
1160 arguments. */
1161 if (e2 == victim)
1162 copy_phi_args (e2->dest, elast, e2,
1163 path, multi_incomings ? 0 : i);
1165 else
1167 /* Redirect VICTIM to the next duplicated block in the path. */
1168 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1170 /* We need to update the PHIs in the next duplicated block. We
1171 want the new PHI args to have the same value as they had
1172 in the source of the next duplicate block.
1174 Thus, we need to know which edge we traversed into the
1175 source of the duplicate. Furthermore, we may have
1176 traversed many edges to reach the source of the duplicate.
1178 Walk through the path starting at element I until we
1179 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1180 the edge from the prior element. */
1181 for (unsigned int j = i + 1; j < path->length (); j++)
1183 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1185 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1186 break;
1191 /* Update the counts and frequency of both the original block
1192 and path edge, and the duplicates. The path duplicate's
1193 incoming count and frequency are the totals for all edges
1194 incoming to this jump threading path computed earlier.
1195 And we know that the duplicated path will have path_out_count
1196 flowing out of it (i.e. along the duplicated path out of the
1197 duplicated joiner). */
1198 update_profile (epath, e2, path_in_count, path_out_count,
1199 path_in_freq);
1201 /* Next we need to update the counts of the original and duplicated
1202 edges from the joiner that go off path. */
1203 update_joiner_offpath_counts (epath, e2->src, path_in_count,
1204 path_out_count);
1206 /* Finally, we need to set the probabilities on the duplicated
1207 edges out of the duplicated joiner (e2->src). The probabilities
1208 along the original path will all be updated below after we finish
1209 processing the whole path. */
1210 recompute_probabilities (e2->src);
1212 /* Record the frequency flowing to the downstream duplicated
1213 path blocks. */
1214 cur_path_freq = EDGE_FREQUENCY (e2);
1216 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1218 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1219 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1220 multi_incomings ? 0 : i);
1221 if (count == 1)
1222 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1224 /* Update the counts and frequency of both the original block
1225 and path edge, and the duplicates. Since we are now after
1226 any joiner that may have existed on the path, the count
1227 flowing along the duplicated threaded path is path_out_count.
1228 If we didn't have a joiner, then cur_path_freq was the sum
1229 of the total frequencies along all incoming edges to the
1230 thread path (path_in_freq). If we had a joiner, it would have
1231 been updated at the end of that handling to the edge frequency
1232 along the duplicated joiner path edge. */
1233 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1234 path_out_count, path_out_count,
1235 cur_path_freq);
1237 else
1239 /* No copy case. In this case we don't have an equivalent block
1240 on the duplicated thread path to update, but we do need
1241 to remove the portion of the counts/freqs that were moved
1242 to the duplicated path from the counts/freqs flowing through
1243 this block on the original path. Since all the no-copy edges
1244 are after any joiner, the removed count is the same as
1245 path_out_count.
1247 If we didn't have a joiner, then cur_path_freq was the sum
1248 of the total frequencies along all incoming edges to the
1249 thread path (path_in_freq). If we had a joiner, it would have
1250 been updated at the end of that handling to the edge frequency
1251 along the duplicated joiner path edge. */
1252 update_profile (epath, NULL, path_out_count, path_out_count,
1253 cur_path_freq);
1256 /* Increment the index into the duplicated path when we processed
1257 a duplicated block. */
1258 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1259 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1261 count++;
1265 /* Now walk orig blocks and update their probabilities, since the
1266 counts and freqs should be updated properly by above loop. */
1267 for (unsigned int i = 1; i < path->length (); i++)
1269 edge epath = (*path)[i]->e;
1270 recompute_probabilities (epath->src);
1273 /* Done with all profile and frequency updates, clear counts if they
1274 were copied. */
1275 if (do_freqs_to_counts)
1276 clear_counts_path (rd);
1279 /* Hash table traversal callback routine to create duplicate blocks. */
1282 ssa_create_duplicates (struct redirection_data **slot,
1283 ssa_local_info_t *local_info)
1285 struct redirection_data *rd = *slot;
1287 /* The second duplicated block in a jump threading path is specific
1288 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1290 Each time we're called, we have to look through the path and see
1291 if a second block needs to be duplicated.
1293 Note the search starts with the third edge on the path. The first
1294 edge is the incoming edge, the second edge always has its source
1295 duplicated. Thus we start our search with the third edge. */
1296 vec<jump_thread_edge *> *path = rd->path;
1297 for (unsigned int i = 2; i < path->length (); i++)
1299 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1300 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1302 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1303 &local_info->duplicate_blocks);
1304 break;
1308 /* Create a template block if we have not done so already. Otherwise
1309 use the template to create a new block. */
1310 if (local_info->template_block == NULL)
1312 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1313 &local_info->duplicate_blocks);
1314 local_info->template_block = rd->dup_blocks[0];
1316 /* We do not create any outgoing edges for the template. We will
1317 take care of that in a later traversal. That way we do not
1318 create edges that are going to just be deleted. */
1320 else
1322 create_block_for_threading (local_info->template_block, rd, 0,
1323 &local_info->duplicate_blocks);
1325 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1326 block. */
1327 ssa_fix_duplicate_block_edges (rd, local_info);
1330 /* Keep walking the hash table. */
1331 return 1;
1334 /* We did not create any outgoing edges for the template block during
1335 block creation. This hash table traversal callback creates the
1336 outgoing edge for the template block. */
1338 inline int
1339 ssa_fixup_template_block (struct redirection_data **slot,
1340 ssa_local_info_t *local_info)
1342 struct redirection_data *rd = *slot;
1344 /* If this is the template block halt the traversal after updating
1345 it appropriately.
1347 If we were threading through an joiner block, then we want
1348 to keep its control statement and redirect an outgoing edge.
1349 Else we want to remove the control statement & edges, then create
1350 a new outgoing edge. In both cases we may need to update PHIs. */
1351 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1353 ssa_fix_duplicate_block_edges (rd, local_info);
1354 return 0;
1357 return 1;
1360 /* Hash table traversal callback to redirect each incoming edge
1361 associated with this hash table element to its new destination. */
1364 ssa_redirect_edges (struct redirection_data **slot,
1365 ssa_local_info_t *local_info)
1367 struct redirection_data *rd = *slot;
1368 struct el *next, *el;
1370 /* Walk over all the incoming edges associated associated with this
1371 hash table entry. */
1372 for (el = rd->incoming_edges; el; el = next)
1374 edge e = el->e;
1375 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1377 /* Go ahead and free this element from the list. Doing this now
1378 avoids the need for another list walk when we destroy the hash
1379 table. */
1380 next = el->next;
1381 free (el);
1383 thread_stats.num_threaded_edges++;
1385 if (rd->dup_blocks[0])
1387 edge e2;
1389 if (dump_file && (dump_flags & TDF_DETAILS))
1390 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1391 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1393 /* If we redirect a loop latch edge cancel its loop. */
1394 if (e->src == e->src->loop_father->latch)
1395 mark_loop_for_removal (e->src->loop_father);
1397 /* Redirect the incoming edge (possibly to the joiner block) to the
1398 appropriate duplicate block. */
1399 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1400 gcc_assert (e == e2);
1401 flush_pending_stmts (e2);
1404 /* Go ahead and clear E->aux. It's not needed anymore and failure
1405 to clear it will cause all kinds of unpleasant problems later. */
1406 delete_jump_thread_path (path);
1407 e->aux = NULL;
1411 /* Indicate that we actually threaded one or more jumps. */
1412 if (rd->incoming_edges)
1413 local_info->jumps_threaded = true;
1415 return 1;
1418 /* Return true if this block has no executable statements other than
1419 a simple ctrl flow instruction. When the number of outgoing edges
1420 is one, this is equivalent to a "forwarder" block. */
1422 static bool
1423 redirection_block_p (basic_block bb)
1425 gimple_stmt_iterator gsi;
1427 /* Advance to the first executable statement. */
1428 gsi = gsi_start_bb (bb);
1429 while (!gsi_end_p (gsi)
1430 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1431 || is_gimple_debug (gsi_stmt (gsi))
1432 || gimple_nop_p (gsi_stmt (gsi))))
1433 gsi_next (&gsi);
1435 /* Check if this is an empty block. */
1436 if (gsi_end_p (gsi))
1437 return true;
1439 /* Test that we've reached the terminating control statement. */
1440 return gsi_stmt (gsi)
1441 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1442 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1443 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1446 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1447 is reached via one or more specific incoming edges, we know which
1448 outgoing edge from BB will be traversed.
1450 We want to redirect those incoming edges to the target of the
1451 appropriate outgoing edge. Doing so avoids a conditional branch
1452 and may expose new optimization opportunities. Note that we have
1453 to update dominator tree and SSA graph after such changes.
1455 The key to keeping the SSA graph update manageable is to duplicate
1456 the side effects occurring in BB so that those side effects still
1457 occur on the paths which bypass BB after redirecting edges.
1459 We accomplish this by creating duplicates of BB and arranging for
1460 the duplicates to unconditionally pass control to one specific
1461 successor of BB. We then revector the incoming edges into BB to
1462 the appropriate duplicate of BB.
1464 If NOLOOP_ONLY is true, we only perform the threading as long as it
1465 does not affect the structure of the loops in a nontrivial way.
1467 If JOINERS is true, then thread through joiner blocks as well. */
1469 static bool
1470 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1472 /* E is an incoming edge into BB that we may or may not want to
1473 redirect to a duplicate of BB. */
1474 edge e, e2;
1475 edge_iterator ei;
1476 ssa_local_info_t local_info;
1478 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1480 /* To avoid scanning a linear array for the element we need we instead
1481 use a hash table. For normal code there should be no noticeable
1482 difference. However, if we have a block with a large number of
1483 incoming and outgoing edges such linear searches can get expensive. */
1484 redirection_data
1485 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1487 /* Record each unique threaded destination into a hash table for
1488 efficient lookups. */
1489 FOR_EACH_EDGE (e, ei, bb->preds)
1491 if (e->aux == NULL)
1492 continue;
1494 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1496 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1497 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1498 continue;
1500 e2 = path->last ()->e;
1501 if (!e2 || noloop_only)
1503 /* If NOLOOP_ONLY is true, we only allow threading through the
1504 header of a loop to exit edges. */
1506 /* One case occurs when there was loop header buried in a jump
1507 threading path that crosses loop boundaries. We do not try
1508 and thread this elsewhere, so just cancel the jump threading
1509 request by clearing the AUX field now. */
1510 if ((bb->loop_father != e2->src->loop_father
1511 && !loop_exit_edge_p (e2->src->loop_father, e2))
1512 || (e2->src->loop_father != e2->dest->loop_father
1513 && !loop_exit_edge_p (e2->src->loop_father, e2)))
1515 /* Since this case is not handled by our special code
1516 to thread through a loop header, we must explicitly
1517 cancel the threading request here. */
1518 delete_jump_thread_path (path);
1519 e->aux = NULL;
1520 continue;
1523 /* Another case occurs when trying to thread through our
1524 own loop header, possibly from inside the loop. We will
1525 thread these later. */
1526 unsigned int i;
1527 for (i = 1; i < path->length (); i++)
1529 if ((*path)[i]->e->src == bb->loop_father->header
1530 && (!loop_exit_edge_p (bb->loop_father, e2)
1531 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1532 break;
1535 if (i != path->length ())
1536 continue;
1539 /* Insert the outgoing edge into the hash table if it is not
1540 already in the hash table. */
1541 lookup_redirection_data (e, INSERT);
1544 /* We do not update dominance info. */
1545 free_dominance_info (CDI_DOMINATORS);
1547 /* We know we only thread through the loop header to loop exits.
1548 Let the basic block duplication hook know we are not creating
1549 a multiple entry loop. */
1550 if (noloop_only
1551 && bb == bb->loop_father->header)
1552 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1554 /* Now create duplicates of BB.
1556 Note that for a block with a high outgoing degree we can waste
1557 a lot of time and memory creating and destroying useless edges.
1559 So we first duplicate BB and remove the control structure at the
1560 tail of the duplicate as well as all outgoing edges from the
1561 duplicate. We then use that duplicate block as a template for
1562 the rest of the duplicates. */
1563 local_info.template_block = NULL;
1564 local_info.bb = bb;
1565 local_info.jumps_threaded = false;
1566 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1567 (&local_info);
1569 /* The template does not have an outgoing edge. Create that outgoing
1570 edge and update PHI nodes as the edge's target as necessary.
1572 We do this after creating all the duplicates to avoid creating
1573 unnecessary edges. */
1574 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1575 (&local_info);
1577 /* The hash table traversals above created the duplicate blocks (and the
1578 statements within the duplicate blocks). This loop creates PHI nodes for
1579 the duplicated blocks and redirects the incoming edges into BB to reach
1580 the duplicates of BB. */
1581 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1582 (&local_info);
1584 /* Done with this block. Clear REDIRECTION_DATA. */
1585 delete redirection_data;
1586 redirection_data = NULL;
1588 if (noloop_only
1589 && bb == bb->loop_father->header)
1590 set_loop_copy (bb->loop_father, NULL);
1592 BITMAP_FREE (local_info.duplicate_blocks);
1593 local_info.duplicate_blocks = NULL;
1595 /* Indicate to our caller whether or not any jumps were threaded. */
1596 return local_info.jumps_threaded;
1599 /* Wrapper for thread_block_1 so that we can first handle jump
1600 thread paths which do not involve copying joiner blocks, then
1601 handle jump thread paths which have joiner blocks.
1603 By doing things this way we can be as aggressive as possible and
1604 not worry that copying a joiner block will create a jump threading
1605 opportunity. */
1607 static bool
1608 thread_block (basic_block bb, bool noloop_only)
1610 bool retval;
1611 retval = thread_block_1 (bb, noloop_only, false);
1612 retval |= thread_block_1 (bb, noloop_only, true);
1613 return retval;
1617 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1618 copy of E->dest created during threading, or E->dest if it was not necessary
1619 to copy it (E is its single predecessor). */
1621 static basic_block
1622 thread_single_edge (edge e)
1624 basic_block bb = e->dest;
1625 struct redirection_data rd;
1626 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1627 edge eto = (*path)[1]->e;
1629 for (unsigned int i = 0; i < path->length (); i++)
1630 delete (*path)[i];
1631 delete path;
1632 e->aux = NULL;
1634 thread_stats.num_threaded_edges++;
1636 if (single_pred_p (bb))
1638 /* If BB has just a single predecessor, we should only remove the
1639 control statements at its end, and successors except for ETO. */
1640 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1642 /* And fixup the flags on the single remaining edge. */
1643 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1644 eto->flags |= EDGE_FALLTHRU;
1646 return bb;
1649 /* Otherwise, we need to create a copy. */
1650 if (e->dest == eto->src)
1651 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1653 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1654 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1655 npath->safe_push (x);
1657 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1658 npath->safe_push (x);
1659 rd.path = npath;
1661 create_block_for_threading (bb, &rd, 0, NULL);
1662 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1663 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1665 if (dump_file && (dump_flags & TDF_DETAILS))
1666 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1667 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1669 rd.dup_blocks[0]->count = e->count;
1670 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1671 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1672 redirect_edge_and_branch (e, rd.dup_blocks[0]);
1673 flush_pending_stmts (e);
1675 return rd.dup_blocks[0];
1678 /* Callback for dfs_enumerate_from. Returns true if BB is different
1679 from STOP and DBDS_CE_STOP. */
1681 static basic_block dbds_ce_stop;
1682 static bool
1683 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1685 return (bb != (const_basic_block) stop
1686 && bb != dbds_ce_stop);
1689 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1690 returns the state. */
1692 enum bb_dom_status
1694 /* BB does not dominate latch of the LOOP. */
1695 DOMST_NONDOMINATING,
1696 /* The LOOP is broken (there is no path from the header to its latch. */
1697 DOMST_LOOP_BROKEN,
1698 /* BB dominates the latch of the LOOP. */
1699 DOMST_DOMINATING
1702 static enum bb_dom_status
1703 determine_bb_domination_status (struct loop *loop, basic_block bb)
1705 basic_block *bblocks;
1706 unsigned nblocks, i;
1707 bool bb_reachable = false;
1708 edge_iterator ei;
1709 edge e;
1711 /* This function assumes BB is a successor of LOOP->header.
1712 If that is not the case return DOMST_NONDOMINATING which
1713 is always safe. */
1715 bool ok = false;
1717 FOR_EACH_EDGE (e, ei, bb->preds)
1719 if (e->src == loop->header)
1721 ok = true;
1722 break;
1726 if (!ok)
1727 return DOMST_NONDOMINATING;
1730 if (bb == loop->latch)
1731 return DOMST_DOMINATING;
1733 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1734 from it. */
1736 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1737 dbds_ce_stop = loop->header;
1738 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1739 bblocks, loop->num_nodes, bb);
1740 for (i = 0; i < nblocks; i++)
1741 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1743 if (e->src == loop->header)
1745 free (bblocks);
1746 return DOMST_NONDOMINATING;
1748 if (e->src == bb)
1749 bb_reachable = true;
1752 free (bblocks);
1753 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1756 /* Return true if BB is part of the new pre-header that is created
1757 when threading the latch to DATA. */
1759 static bool
1760 def_split_header_continue_p (const_basic_block bb, const void *data)
1762 const_basic_block new_header = (const_basic_block) data;
1763 const struct loop *l;
1765 if (bb == new_header
1766 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1767 return false;
1768 for (l = bb->loop_father; l; l = loop_outer (l))
1769 if (l == new_header->loop_father)
1770 return true;
1771 return false;
1774 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1775 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1776 to the inside of the loop. */
1778 static bool
1779 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1781 basic_block header = loop->header;
1782 edge e, tgt_edge, latch = loop_latch_edge (loop);
1783 edge_iterator ei;
1784 basic_block tgt_bb, atgt_bb;
1785 enum bb_dom_status domst;
1787 /* We have already threaded through headers to exits, so all the threading
1788 requests now are to the inside of the loop. We need to avoid creating
1789 irreducible regions (i.e., loops with more than one entry block), and
1790 also loop with several latch edges, or new subloops of the loop (although
1791 there are cases where it might be appropriate, it is difficult to decide,
1792 and doing it wrongly may confuse other optimizers).
1794 We could handle more general cases here. However, the intention is to
1795 preserve some information about the loop, which is impossible if its
1796 structure changes significantly, in a way that is not well understood.
1797 Thus we only handle few important special cases, in which also updating
1798 of the loop-carried information should be feasible:
1800 1) Propagation of latch edge to a block that dominates the latch block
1801 of a loop. This aims to handle the following idiom:
1803 first = 1;
1804 while (1)
1806 if (first)
1807 initialize;
1808 first = 0;
1809 body;
1812 After threading the latch edge, this becomes
1814 first = 1;
1815 if (first)
1816 initialize;
1817 while (1)
1819 first = 0;
1820 body;
1823 The original header of the loop is moved out of it, and we may thread
1824 the remaining edges through it without further constraints.
1826 2) All entry edges are propagated to a single basic block that dominates
1827 the latch block of the loop. This aims to handle the following idiom
1828 (normally created for "for" loops):
1830 i = 0;
1831 while (1)
1833 if (i >= 100)
1834 break;
1835 body;
1836 i++;
1839 This becomes
1841 i = 0;
1842 while (1)
1844 body;
1845 i++;
1846 if (i >= 100)
1847 break;
1851 /* Threading through the header won't improve the code if the header has just
1852 one successor. */
1853 if (single_succ_p (header))
1854 goto fail;
1856 /* If we threaded the latch using a joiner block, we cancel the
1857 threading opportunity out of an abundance of caution. However,
1858 still allow threading from outside to inside the loop. */
1859 if (latch->aux)
1861 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1862 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1864 delete_jump_thread_path (path);
1865 latch->aux = NULL;
1869 if (latch->aux)
1871 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1872 tgt_edge = (*path)[1]->e;
1873 tgt_bb = tgt_edge->dest;
1875 else if (!may_peel_loop_headers
1876 && !redirection_block_p (loop->header))
1877 goto fail;
1878 else
1880 tgt_bb = NULL;
1881 tgt_edge = NULL;
1882 FOR_EACH_EDGE (e, ei, header->preds)
1884 if (!e->aux)
1886 if (e == latch)
1887 continue;
1889 /* If latch is not threaded, and there is a header
1890 edge that is not threaded, we would create loop
1891 with multiple entries. */
1892 goto fail;
1895 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1897 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1898 goto fail;
1899 tgt_edge = (*path)[1]->e;
1900 atgt_bb = tgt_edge->dest;
1901 if (!tgt_bb)
1902 tgt_bb = atgt_bb;
1903 /* Two targets of threading would make us create loop
1904 with multiple entries. */
1905 else if (tgt_bb != atgt_bb)
1906 goto fail;
1909 if (!tgt_bb)
1911 /* There are no threading requests. */
1912 return false;
1915 /* Redirecting to empty loop latch is useless. */
1916 if (tgt_bb == loop->latch
1917 && empty_block_p (loop->latch))
1918 goto fail;
1921 /* The target block must dominate the loop latch, otherwise we would be
1922 creating a subloop. */
1923 domst = determine_bb_domination_status (loop, tgt_bb);
1924 if (domst == DOMST_NONDOMINATING)
1925 goto fail;
1926 if (domst == DOMST_LOOP_BROKEN)
1928 /* If the loop ceased to exist, mark it as such, and thread through its
1929 original header. */
1930 mark_loop_for_removal (loop);
1931 return thread_block (header, false);
1934 if (tgt_bb->loop_father->header == tgt_bb)
1936 /* If the target of the threading is a header of a subloop, we need
1937 to create a preheader for it, so that the headers of the two loops
1938 do not merge. */
1939 if (EDGE_COUNT (tgt_bb->preds) > 2)
1941 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1942 gcc_assert (tgt_bb != NULL);
1944 else
1945 tgt_bb = split_edge (tgt_edge);
1948 if (latch->aux)
1950 basic_block *bblocks;
1951 unsigned nblocks, i;
1953 /* First handle the case latch edge is redirected. We are copying
1954 the loop header but not creating a multiple entry loop. Make the
1955 cfg manipulation code aware of that fact. */
1956 set_loop_copy (loop, loop);
1957 loop->latch = thread_single_edge (latch);
1958 set_loop_copy (loop, NULL);
1959 gcc_assert (single_succ (loop->latch) == tgt_bb);
1960 loop->header = tgt_bb;
1962 /* Remove the new pre-header blocks from our loop. */
1963 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1964 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1965 bblocks, loop->num_nodes, tgt_bb);
1966 for (i = 0; i < nblocks; i++)
1967 if (bblocks[i]->loop_father == loop)
1969 remove_bb_from_loops (bblocks[i]);
1970 add_bb_to_loop (bblocks[i], loop_outer (loop));
1972 free (bblocks);
1974 /* If the new header has multiple latches mark it so. */
1975 FOR_EACH_EDGE (e, ei, loop->header->preds)
1976 if (e->src->loop_father == loop
1977 && e->src != loop->latch)
1979 loop->latch = NULL;
1980 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1983 /* Cancel remaining threading requests that would make the
1984 loop a multiple entry loop. */
1985 FOR_EACH_EDGE (e, ei, header->preds)
1987 edge e2;
1989 if (e->aux == NULL)
1990 continue;
1992 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1993 e2 = path->last ()->e;
1995 if (e->src->loop_father != e2->dest->loop_father
1996 && e2->dest != loop->header)
1998 delete_jump_thread_path (path);
1999 e->aux = NULL;
2003 /* Thread the remaining edges through the former header. */
2004 thread_block (header, false);
2006 else
2008 basic_block new_preheader;
2010 /* Now consider the case entry edges are redirected to the new entry
2011 block. Remember one entry edge, so that we can find the new
2012 preheader (its destination after threading). */
2013 FOR_EACH_EDGE (e, ei, header->preds)
2015 if (e->aux)
2016 break;
2019 /* The duplicate of the header is the new preheader of the loop. Ensure
2020 that it is placed correctly in the loop hierarchy. */
2021 set_loop_copy (loop, loop_outer (loop));
2023 thread_block (header, false);
2024 set_loop_copy (loop, NULL);
2025 new_preheader = e->dest;
2027 /* Create the new latch block. This is always necessary, as the latch
2028 must have only a single successor, but the original header had at
2029 least two successors. */
2030 loop->latch = NULL;
2031 mfb_kj_edge = single_succ_edge (new_preheader);
2032 loop->header = mfb_kj_edge->dest;
2033 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2034 loop->header = latch->dest;
2035 loop->latch = latch->src;
2038 return true;
2040 fail:
2041 /* We failed to thread anything. Cancel the requests. */
2042 FOR_EACH_EDGE (e, ei, header->preds)
2044 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2046 if (path)
2048 delete_jump_thread_path (path);
2049 e->aux = NULL;
2052 return false;
2055 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2056 PHI arguments associated with those edges are equal or there are no
2057 PHI arguments, otherwise return FALSE. */
2059 static bool
2060 phi_args_equal_on_edges (edge e1, edge e2)
2062 gimple_stmt_iterator gsi;
2063 int indx1 = e1->dest_idx;
2064 int indx2 = e2->dest_idx;
2066 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2068 gimple phi = gsi_stmt (gsi);
2070 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2071 gimple_phi_arg_def (phi, indx2), 0))
2072 return false;
2074 return true;
2077 /* Walk through the registered jump threads and convert them into a
2078 form convenient for this pass.
2080 Any block which has incoming edges threaded to outgoing edges
2081 will have its entry in THREADED_BLOCK set.
2083 Any threaded edge will have its new outgoing edge stored in the
2084 original edge's AUX field.
2086 This form avoids the need to walk all the edges in the CFG to
2087 discover blocks which need processing and avoids unnecessary
2088 hash table lookups to map from threaded edge to new target. */
2090 static void
2091 mark_threaded_blocks (bitmap threaded_blocks)
2093 unsigned int i;
2094 bitmap_iterator bi;
2095 bitmap tmp = BITMAP_ALLOC (NULL);
2096 basic_block bb;
2097 edge e;
2098 edge_iterator ei;
2100 /* It is possible to have jump threads in which one is a subpath
2101 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2102 block and (B, C), (C, D) where no joiner block exists.
2104 When this occurs ignore the jump thread request with the joiner
2105 block. It's totally subsumed by the simpler jump thread request.
2107 This results in less block copying, simpler CFGs. More importantly,
2108 when we duplicate the joiner block, B, in this case we will create
2109 a new threading opportunity that we wouldn't be able to optimize
2110 until the next jump threading iteration.
2112 So first convert the jump thread requests which do not require a
2113 joiner block. */
2114 for (i = 0; i < paths.length (); i++)
2116 vec<jump_thread_edge *> *path = paths[i];
2118 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2120 edge e = (*path)[0]->e;
2121 e->aux = (void *)path;
2122 bitmap_set_bit (tmp, e->dest->index);
2126 /* Now iterate again, converting cases where we want to thread
2127 through a joiner block, but only if no other edge on the path
2128 already has a jump thread attached to it. We do this in two passes,
2129 to avoid situations where the order in the paths vec can hide overlapping
2130 threads (the path is recorded on the incoming edge, so we would miss
2131 cases where the second path starts at a downstream edge on the same
2132 path). First record all joiner paths, deleting any in the unexpected
2133 case where there is already a path for that incoming edge. */
2134 for (i = 0; i < paths.length (); i++)
2136 vec<jump_thread_edge *> *path = paths[i];
2138 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2140 /* Attach the path to the starting edge if none is yet recorded. */
2141 if ((*path)[0]->e->aux == NULL)
2142 (*path)[0]->e->aux = path;
2143 else if (dump_file && (dump_flags & TDF_DETAILS))
2144 dump_jump_thread_path (dump_file, *path, false);
2147 /* Second, look for paths that have any other jump thread attached to
2148 them, and either finish converting them or cancel them. */
2149 for (i = 0; i < paths.length (); i++)
2151 vec<jump_thread_edge *> *path = paths[i];
2152 edge e = (*path)[0]->e;
2154 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2156 unsigned int j;
2157 for (j = 1; j < path->length (); j++)
2158 if ((*path)[j]->e->aux != NULL)
2159 break;
2161 /* If we iterated through the entire path without exiting the loop,
2162 then we are good to go, record it. */
2163 if (j == path->length ())
2164 bitmap_set_bit (tmp, e->dest->index);
2165 else
2167 e->aux = NULL;
2168 if (dump_file && (dump_flags & TDF_DETAILS))
2169 dump_jump_thread_path (dump_file, *path, false);
2174 /* If optimizing for size, only thread through block if we don't have
2175 to duplicate it or it's an otherwise empty redirection block. */
2176 if (optimize_function_for_size_p (cfun))
2178 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2180 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2181 if (EDGE_COUNT (bb->preds) > 1
2182 && !redirection_block_p (bb))
2184 FOR_EACH_EDGE (e, ei, bb->preds)
2186 if (e->aux)
2188 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2189 delete_jump_thread_path (path);
2190 e->aux = NULL;
2194 else
2195 bitmap_set_bit (threaded_blocks, i);
2198 else
2199 bitmap_copy (threaded_blocks, tmp);
2201 /* Look for jump threading paths which cross multiple loop headers.
2203 The code to thread through loop headers will change the CFG in ways
2204 that break assumptions made by the loop optimization code.
2206 We don't want to blindly cancel the requests. We can instead do better
2207 by trimming off the end of the jump thread path. */
2208 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2210 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2211 FOR_EACH_EDGE (e, ei, bb->preds)
2213 if (e->aux)
2215 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2217 for (unsigned int i = 0, crossed_headers = 0;
2218 i < path->length ();
2219 i++)
2221 basic_block dest = (*path)[i]->e->dest;
2222 crossed_headers += (dest == dest->loop_father->header);
2223 if (crossed_headers > 1)
2225 /* Trim from entry I onwards. */
2226 for (unsigned int j = i; j < path->length (); j++)
2227 delete (*path)[j];
2228 path->truncate (i);
2230 /* Now that we've truncated the path, make sure
2231 what's left is still valid. We need at least
2232 two edges on the path and the last edge can not
2233 be a joiner. This should never happen, but let's
2234 be safe. */
2235 if (path->length () < 2
2236 || (path->last ()->type
2237 == EDGE_COPY_SRC_JOINER_BLOCK))
2239 delete_jump_thread_path (path);
2240 e->aux = NULL;
2242 break;
2249 /* If we have a joiner block (J) which has two successors S1 and S2 and
2250 we are threading though S1 and the final destination of the thread
2251 is S2, then we must verify that any PHI nodes in S2 have the same
2252 PHI arguments for the edge J->S2 and J->S1->...->S2.
2254 We used to detect this prior to registering the jump thread, but
2255 that prohibits propagation of edge equivalences into non-dominated
2256 PHI nodes as the equivalency test might occur before propagation.
2258 This must also occur after we truncate any jump threading paths
2259 as this scenario may only show up after truncation.
2261 This works for now, but will need improvement as part of the FSA
2262 optimization.
2264 Note since we've moved the thread request data to the edges,
2265 we have to iterate on those rather than the threaded_edges vector. */
2266 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2268 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2269 FOR_EACH_EDGE (e, ei, bb->preds)
2271 if (e->aux)
2273 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2274 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2276 if (have_joiner)
2278 basic_block joiner = e->dest;
2279 edge final_edge = path->last ()->e;
2280 basic_block final_dest = final_edge->dest;
2281 edge e2 = find_edge (joiner, final_dest);
2283 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2285 delete_jump_thread_path (path);
2286 e->aux = NULL;
2293 BITMAP_FREE (tmp);
2297 /* Return TRUE if BB ends with a switch statement or a computed goto.
2298 Otherwise return false. */
2299 static bool
2300 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2302 gimple stmt = last_stmt (bb);
2303 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2304 return true;
2305 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2306 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2307 return true;
2308 return false;
2311 /* Walk through all blocks and thread incoming edges to the appropriate
2312 outgoing edge for each edge pair recorded in THREADED_EDGES.
2314 It is the caller's responsibility to fix the dominance information
2315 and rewrite duplicated SSA_NAMEs back into SSA form.
2317 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2318 loop headers if it does not simplify the loop.
2320 Returns true if one or more edges were threaded, false otherwise. */
2322 bool
2323 thread_through_all_blocks (bool may_peel_loop_headers)
2325 bool retval = false;
2326 unsigned int i;
2327 bitmap_iterator bi;
2328 bitmap threaded_blocks;
2329 struct loop *loop;
2331 if (!paths.exists ())
2332 return false;
2334 threaded_blocks = BITMAP_ALLOC (NULL);
2335 memset (&thread_stats, 0, sizeof (thread_stats));
2337 mark_threaded_blocks (threaded_blocks);
2339 initialize_original_copy_tables ();
2341 /* First perform the threading requests that do not affect
2342 loop structure. */
2343 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2345 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2347 if (EDGE_COUNT (bb->preds) > 0)
2348 retval |= thread_block (bb, true);
2351 /* Then perform the threading through loop headers. We start with the
2352 innermost loop, so that the changes in cfg we perform won't affect
2353 further threading. */
2354 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2356 if (!loop->header
2357 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2358 continue;
2360 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2363 /* Any jump threading paths that are still attached to edges at this
2364 point must be one of two cases.
2366 First, we could have a jump threading path which went from outside
2367 a loop to inside a loop that was ignored because a prior jump thread
2368 across a backedge was realized (which indirectly causes the loop
2369 above to ignore the latter thread). We can detect these because the
2370 loop structures will be different and we do not currently try to
2371 optimize this case.
2373 Second, we could be threading across a backedge to a point within the
2374 same loop. This occurrs for the FSA/FSM optimization and we would
2375 like to optimize it. However, we have to be very careful as this
2376 may completely scramble the loop structures, with the result being
2377 irreducible loops causing us to throw away our loop structure.
2379 As a compromise for the latter case, if the thread path ends in
2380 a block where the last statement is a multiway branch, then go
2381 ahead and thread it, else ignore it. */
2382 basic_block bb;
2383 edge e;
2384 FOR_EACH_BB_FN (bb, cfun)
2386 /* If we do end up threading here, we can remove elements from
2387 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2388 for (edge_iterator ei = ei_start (bb->preds);
2389 (e = ei_safe_edge (ei));)
2390 if (e->aux)
2392 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2394 /* Case 1, threading from outside to inside the loop
2395 after we'd already threaded through the header. */
2396 if ((*path)[0]->e->dest->loop_father
2397 != path->last ()->e->src->loop_father)
2399 delete_jump_thread_path (path);
2400 e->aux = NULL;
2401 ei_next (&ei);
2403 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2405 /* The code to thread through loop headers may have
2406 split a block with jump threads attached to it.
2408 We can identify this with a disjoint jump threading
2409 path. If found, just remove it. */
2410 for (unsigned int i = 0; i < path->length () - 1; i++)
2411 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2413 delete_jump_thread_path (path);
2414 e->aux = NULL;
2415 ei_next (&ei);
2416 break;
2419 /* Our path is still valid, thread it. */
2420 if (e->aux)
2422 struct loop *loop = (*path)[0]->e->dest->loop_father;
2424 if (thread_block ((*path)[0]->e->dest, false))
2426 /* This jump thread likely totally scrambled this loop.
2427 So arrange for it to be fixed up. */
2428 loop->header = NULL;
2429 loop->latch = NULL;
2430 e->aux = NULL;
2432 else
2434 delete_jump_thread_path (path);
2435 e->aux = NULL;
2436 ei_next (&ei);
2440 else
2442 delete_jump_thread_path (path);
2443 e->aux = NULL;
2444 ei_next (&ei);
2447 else
2448 ei_next (&ei);
2451 statistics_counter_event (cfun, "Jumps threaded",
2452 thread_stats.num_threaded_edges);
2454 free_original_copy_tables ();
2456 BITMAP_FREE (threaded_blocks);
2457 threaded_blocks = NULL;
2458 paths.release ();
2460 if (retval)
2461 loops_state_set (LOOPS_NEED_FIXUP);
2463 return retval;
2466 /* Delete the jump threading path PATH. We have to explcitly delete
2467 each entry in the vector, then the container. */
2469 void
2470 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2472 for (unsigned int i = 0; i < path->length (); i++)
2473 delete (*path)[i];
2474 path->release();
2477 /* Register a jump threading opportunity. We queue up all the jump
2478 threading opportunities discovered by a pass and update the CFG
2479 and SSA form all at once.
2481 E is the edge we can thread, E2 is the new target edge, i.e., we
2482 are effectively recording that E->dest can be changed to E2->dest
2483 after fixing the SSA graph. */
2485 void
2486 register_jump_thread (vec<jump_thread_edge *> *path)
2488 if (!dbg_cnt (registered_jump_thread))
2490 delete_jump_thread_path (path);
2491 return;
2494 /* First make sure there are no NULL outgoing edges on the jump threading
2495 path. That can happen for jumping to a constant address. */
2496 for (unsigned int i = 0; i < path->length (); i++)
2497 if ((*path)[i]->e == NULL)
2499 if (dump_file && (dump_flags & TDF_DETAILS))
2501 fprintf (dump_file,
2502 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2503 dump_jump_thread_path (dump_file, *path, false);
2506 delete_jump_thread_path (path);
2507 return;
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 dump_jump_thread_path (dump_file, *path, true);
2513 if (!paths.exists ())
2514 paths.create (5);
2516 paths.safe_push (path);