gcc/
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blob4b1902210973c580ac75d7a5bbb8e6b2b4a48fd0
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "alias.h"
24 #include "symtab.h"
25 #include "options.h"
26 #include "tree.h"
27 #include "fold-const.h"
28 #include "flags.h"
29 #include "predict.h"
30 #include "tm.h"
31 #include "hard-reg-set.h"
32 #include "function.h"
33 #include "dominance.h"
34 #include "cfg.h"
35 #include "cfganal.h"
36 #include "basic-block.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "gimple-expr.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimple-ssa.h"
43 #include "tree-phinodes.h"
44 #include "tree-ssa.h"
45 #include "tree-ssa-threadupdate.h"
46 #include "ssa-iterators.h"
47 #include "dumpfile.h"
48 #include "cfgloop.h"
49 #include "dbgcnt.h"
50 #include "tree-cfg.h"
51 #include "tree-pass.h"
53 /* Given a block B, update the CFG and SSA graph to reflect redirecting
54 one or more in-edges to B to instead reach the destination of an
55 out-edge from B while preserving any side effects in B.
57 i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
58 side effects of executing B.
60 1. Make a copy of B (including its outgoing edges and statements). Call
61 the copy B'. Note B' has no incoming edges or PHIs at this time.
63 2. Remove the control statement at the end of B' and all outgoing edges
64 except B'->C.
66 3. Add a new argument to each PHI in C with the same value as the existing
67 argument associated with edge B->C. Associate the new PHI arguments
68 with the edge B'->C.
70 4. For each PHI in B, find or create a PHI in B' with an identical
71 PHI_RESULT. Add an argument to the PHI in B' which has the same
72 value as the PHI in B associated with the edge A->B. Associate
73 the new argument in the PHI in B' with the edge A->B.
75 5. Change the edge A->B to A->B'.
77 5a. This automatically deletes any PHI arguments associated with the
78 edge A->B in B.
80 5b. This automatically associates each new argument added in step 4
81 with the edge A->B'.
83 6. Repeat for other incoming edges into B.
85 7. Put the duplicated resources in B and all the B' blocks into SSA form.
87 Note that block duplication can be minimized by first collecting the
88 set of unique destination blocks that the incoming edges should
89 be threaded to.
91 We reduce the number of edges and statements we create by not copying all
92 the outgoing edges and the control statement in step #1. We instead create
93 a template block without the outgoing edges and duplicate the template.
95 Another case this code handles is threading through a "joiner" block. In
96 this case, we do not know the destination of the joiner block, but one
97 of the outgoing edges from the joiner block leads to a threadable path. This
98 case largely works as outlined above, except the duplicate of the joiner
99 block still contains a full set of outgoing edges and its control statement.
100 We just redirect one of its outgoing edges to our jump threading path. */
103 /* Steps #5 and #6 of the above algorithm are best implemented by walking
104 all the incoming edges which thread to the same destination edge at
105 the same time. That avoids lots of table lookups to get information
106 for the destination edge.
108 To realize that implementation we create a list of incoming edges
109 which thread to the same outgoing edge. Thus to implement steps
110 #5 and #6 we traverse our hash table of outgoing edge information.
111 For each entry we walk the list of incoming edges which thread to
112 the current outgoing edge. */
114 struct el
116 edge e;
117 struct el *next;
120 /* Main data structure recording information regarding B's duplicate
121 blocks. */
123 /* We need to efficiently record the unique thread destinations of this
124 block and specific information associated with those destinations. We
125 may have many incoming edges threaded to the same outgoing edge. This
126 can be naturally implemented with a hash table. */
128 struct redirection_data : free_ptr_hash<redirection_data>
130 /* We support wiring up two block duplicates in a jump threading path.
132 One is a normal block copy where we remove the control statement
133 and wire up its single remaining outgoing edge to the thread path.
135 The other is a joiner block where we leave the control statement
136 in place, but wire one of the outgoing edges to a thread path.
138 In theory we could have multiple block duplicates in a jump
139 threading path, but I haven't tried that.
141 The duplicate blocks appear in this array in the same order in
142 which they appear in the jump thread path. */
143 basic_block dup_blocks[2];
145 /* The jump threading path. */
146 vec<jump_thread_edge *> *path;
148 /* A list of incoming edges which we want to thread to the
149 same path. */
150 struct el *incoming_edges;
152 /* hash_table support. */
153 static inline hashval_t hash (const redirection_data *);
154 static inline int equal (const redirection_data *, const redirection_data *);
157 /* Dump a jump threading path, including annotations about each
158 edge in the path. */
160 static void
161 dump_jump_thread_path (FILE *dump_file, vec<jump_thread_edge *> path,
162 bool registering)
164 fprintf (dump_file,
165 " %s%s jump thread: (%d, %d) incoming edge; ",
166 (registering ? "Registering" : "Cancelling"),
167 (path[0]->type == EDGE_FSM_THREAD ? " FSM": ""),
168 path[0]->e->src->index, path[0]->e->dest->index);
170 for (unsigned int i = 1; i < path.length (); i++)
172 /* We can get paths with a NULL edge when the final destination
173 of a jump thread turns out to be a constant address. We dump
174 those paths when debugging, so we have to be prepared for that
175 possibility here. */
176 if (path[i]->e == NULL)
177 continue;
179 if (path[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
180 fprintf (dump_file, " (%d, %d) joiner; ",
181 path[i]->e->src->index, path[i]->e->dest->index);
182 if (path[i]->type == EDGE_COPY_SRC_BLOCK)
183 fprintf (dump_file, " (%d, %d) normal;",
184 path[i]->e->src->index, path[i]->e->dest->index);
185 if (path[i]->type == EDGE_NO_COPY_SRC_BLOCK)
186 fprintf (dump_file, " (%d, %d) nocopy;",
187 path[i]->e->src->index, path[i]->e->dest->index);
188 if (path[0]->type == EDGE_FSM_THREAD)
189 fprintf (dump_file, " (%d, %d) ",
190 path[i]->e->src->index, path[i]->e->dest->index);
192 fputc ('\n', dump_file);
195 /* Simple hashing function. For any given incoming edge E, we're going
196 to be most concerned with the final destination of its jump thread
197 path. So hash on the block index of the final edge in the path. */
199 inline hashval_t
200 redirection_data::hash (const redirection_data *p)
202 vec<jump_thread_edge *> *path = p->path;
203 return path->last ()->e->dest->index;
206 /* Given two hash table entries, return true if they have the same
207 jump threading path. */
208 inline int
209 redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
211 vec<jump_thread_edge *> *path1 = p1->path;
212 vec<jump_thread_edge *> *path2 = p2->path;
214 if (path1->length () != path2->length ())
215 return false;
217 for (unsigned int i = 1; i < path1->length (); i++)
219 if ((*path1)[i]->type != (*path2)[i]->type
220 || (*path1)[i]->e != (*path2)[i]->e)
221 return false;
224 return true;
227 /* Data structure of information to pass to hash table traversal routines. */
228 struct ssa_local_info_t
230 /* The current block we are working on. */
231 basic_block bb;
233 /* We only create a template block for the first duplicated block in a
234 jump threading path as we may need many duplicates of that block.
236 The second duplicate block in a path is specific to that path. Creating
237 and sharing a template for that block is considerably more difficult. */
238 basic_block template_block;
240 /* TRUE if we thread one or more jumps, FALSE otherwise. */
241 bool jumps_threaded;
243 /* Blocks duplicated for the thread. */
244 bitmap duplicate_blocks;
247 /* Passes which use the jump threading code register jump threading
248 opportunities as they are discovered. We keep the registered
249 jump threading opportunities in this vector as edge pairs
250 (original_edge, target_edge). */
251 static vec<vec<jump_thread_edge *> *> paths;
253 /* When we start updating the CFG for threading, data necessary for jump
254 threading is attached to the AUX field for the incoming edge. Use these
255 macros to access the underlying structure attached to the AUX field. */
256 #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
258 /* Jump threading statistics. */
260 struct thread_stats_d
262 unsigned long num_threaded_edges;
265 struct thread_stats_d thread_stats;
268 /* Remove the last statement in block BB if it is a control statement
269 Also remove all outgoing edges except the edge which reaches DEST_BB.
270 If DEST_BB is NULL, then remove all outgoing edges. */
272 static void
273 remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
275 gimple_stmt_iterator gsi;
276 edge e;
277 edge_iterator ei;
279 gsi = gsi_last_bb (bb);
281 /* If the duplicate ends with a control statement, then remove it.
283 Note that if we are duplicating the template block rather than the
284 original basic block, then the duplicate might not have any real
285 statements in it. */
286 if (!gsi_end_p (gsi)
287 && gsi_stmt (gsi)
288 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
289 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
290 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
291 gsi_remove (&gsi, true);
293 for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
295 if (e->dest != dest_bb)
296 remove_edge (e);
297 else
298 ei_next (&ei);
302 /* Create a duplicate of BB. Record the duplicate block in an array
303 indexed by COUNT stored in RD. */
305 static void
306 create_block_for_threading (basic_block bb,
307 struct redirection_data *rd,
308 unsigned int count,
309 bitmap *duplicate_blocks)
311 edge_iterator ei;
312 edge e;
314 /* We can use the generic block duplication code and simply remove
315 the stuff we do not need. */
316 rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
318 FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
319 e->aux = NULL;
321 /* Zero out the profile, since the block is unreachable for now. */
322 rd->dup_blocks[count]->frequency = 0;
323 rd->dup_blocks[count]->count = 0;
324 if (duplicate_blocks)
325 bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
328 /* Main data structure to hold information for duplicates of BB. */
330 static hash_table<redirection_data> *redirection_data;
332 /* Given an outgoing edge E lookup and return its entry in our hash table.
334 If INSERT is true, then we insert the entry into the hash table if
335 it is not already present. INCOMING_EDGE is added to the list of incoming
336 edges associated with E in the hash table. */
338 static struct redirection_data *
339 lookup_redirection_data (edge e, enum insert_option insert)
341 struct redirection_data **slot;
342 struct redirection_data *elt;
343 vec<jump_thread_edge *> *path = THREAD_PATH (e);
345 /* Build a hash table element so we can see if E is already
346 in the table. */
347 elt = XNEW (struct redirection_data);
348 elt->path = path;
349 elt->dup_blocks[0] = NULL;
350 elt->dup_blocks[1] = NULL;
351 elt->incoming_edges = NULL;
353 slot = redirection_data->find_slot (elt, insert);
355 /* This will only happen if INSERT is false and the entry is not
356 in the hash table. */
357 if (slot == NULL)
359 free (elt);
360 return NULL;
363 /* This will only happen if E was not in the hash table and
364 INSERT is true. */
365 if (*slot == NULL)
367 *slot = elt;
368 elt->incoming_edges = XNEW (struct el);
369 elt->incoming_edges->e = e;
370 elt->incoming_edges->next = NULL;
371 return elt;
373 /* E was in the hash table. */
374 else
376 /* Free ELT as we do not need it anymore, we will extract the
377 relevant entry from the hash table itself. */
378 free (elt);
380 /* Get the entry stored in the hash table. */
381 elt = *slot;
383 /* If insertion was requested, then we need to add INCOMING_EDGE
384 to the list of incoming edges associated with E. */
385 if (insert)
387 struct el *el = XNEW (struct el);
388 el->next = elt->incoming_edges;
389 el->e = e;
390 elt->incoming_edges = el;
393 return elt;
397 /* Similar to copy_phi_args, except that the PHI arg exists, it just
398 does not have a value associated with it. */
400 static void
401 copy_phi_arg_into_existing_phi (edge src_e, edge tgt_e)
403 int src_idx = src_e->dest_idx;
404 int tgt_idx = tgt_e->dest_idx;
406 /* Iterate over each PHI in e->dest. */
407 for (gphi_iterator gsi = gsi_start_phis (src_e->dest),
408 gsi2 = gsi_start_phis (tgt_e->dest);
409 !gsi_end_p (gsi);
410 gsi_next (&gsi), gsi_next (&gsi2))
412 gphi *src_phi = gsi.phi ();
413 gphi *dest_phi = gsi2.phi ();
414 tree val = gimple_phi_arg_def (src_phi, src_idx);
415 source_location locus = gimple_phi_arg_location (src_phi, src_idx);
417 SET_PHI_ARG_DEF (dest_phi, tgt_idx, val);
418 gimple_phi_arg_set_location (dest_phi, tgt_idx, locus);
422 /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
423 to see if it has constant value in a flow sensitive manner. Set
424 LOCUS to location of the constant phi arg and return the value.
425 Return DEF directly if either PATH or idx is ZERO. */
427 static tree
428 get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
429 basic_block bb, int idx, source_location *locus)
431 tree arg;
432 gphi *def_phi;
433 basic_block def_bb;
435 if (path == NULL || idx == 0)
436 return def;
438 def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
439 if (!def_phi)
440 return def;
442 def_bb = gimple_bb (def_phi);
443 /* Don't propagate loop invariants into deeper loops. */
444 if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
445 return def;
447 /* Backtrack jump threading path from IDX to see if def has constant
448 value. */
449 for (int j = idx - 1; j >= 0; j--)
451 edge e = (*path)[j]->e;
452 if (e->dest == def_bb)
454 arg = gimple_phi_arg_def (def_phi, e->dest_idx);
455 if (is_gimple_min_invariant (arg))
457 *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
458 return arg;
460 break;
464 return def;
467 /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
468 Try to backtrack jump threading PATH from node IDX to see if the arg
469 has constant value, copy constant value instead of argument itself
470 if yes. */
472 static void
473 copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
474 vec<jump_thread_edge *> *path, int idx)
476 gphi_iterator gsi;
477 int src_indx = src_e->dest_idx;
479 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
481 gphi *phi = gsi.phi ();
482 tree def = gimple_phi_arg_def (phi, src_indx);
483 source_location locus = gimple_phi_arg_location (phi, src_indx);
485 if (TREE_CODE (def) == SSA_NAME
486 && !virtual_operand_p (gimple_phi_result (phi)))
487 def = get_value_locus_in_path (def, path, bb, idx, &locus);
489 add_phi_arg (phi, def, tgt_e, locus);
493 /* We have recently made a copy of ORIG_BB, including its outgoing
494 edges. The copy is NEW_BB. Every PHI node in every direct successor of
495 ORIG_BB has a new argument associated with edge from NEW_BB to the
496 successor. Initialize the PHI argument so that it is equal to the PHI
497 argument associated with the edge from ORIG_BB to the successor.
498 PATH and IDX are used to check if the new PHI argument has constant
499 value in a flow sensitive manner. */
501 static void
502 update_destination_phis (basic_block orig_bb, basic_block new_bb,
503 vec<jump_thread_edge *> *path, int idx)
505 edge_iterator ei;
506 edge e;
508 FOR_EACH_EDGE (e, ei, orig_bb->succs)
510 edge e2 = find_edge (new_bb, e->dest);
511 copy_phi_args (e->dest, e, e2, path, idx);
515 /* Given a duplicate block and its single destination (both stored
516 in RD). Create an edge between the duplicate and its single
517 destination.
519 Add an additional argument to any PHI nodes at the single
520 destination. IDX is the start node in jump threading path
521 we start to check to see if the new PHI argument has constant
522 value along the jump threading path. */
524 static void
525 create_edge_and_update_destination_phis (struct redirection_data *rd,
526 basic_block bb, int idx)
528 edge e = make_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
530 rescan_loop_exit (e, true, false);
531 e->probability = REG_BR_PROB_BASE;
532 e->count = bb->count;
534 /* We used to copy the thread path here. That was added in 2007
535 and dutifully updated through the representation changes in 2013.
537 In 2013 we added code to thread from an interior node through
538 the backedge to another interior node. That runs after the code
539 to thread through loop headers from outside the loop.
541 The latter may delete edges in the CFG, including those
542 which appeared in the jump threading path we copied here. Thus
543 we'd end up using a dangling pointer.
545 After reviewing the 2007/2011 code, I can't see how anything
546 depended on copying the AUX field and clearly copying the jump
547 threading path is problematical due to embedded edge pointers.
548 It has been removed. */
549 e->aux = NULL;
551 /* If there are any PHI nodes at the destination of the outgoing edge
552 from the duplicate block, then we will need to add a new argument
553 to them. The argument should have the same value as the argument
554 associated with the outgoing edge stored in RD. */
555 copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
558 /* Look through PATH beginning at START and return TRUE if there are
559 any additional blocks that need to be duplicated. Otherwise,
560 return FALSE. */
561 static bool
562 any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
563 unsigned int start)
565 for (unsigned int i = start + 1; i < path->length (); i++)
567 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
568 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
569 return true;
571 return false;
575 /* Compute the amount of profile count/frequency coming into the jump threading
576 path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
577 PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
578 duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
579 identify blocks duplicated for jump threading, which have duplicated
580 edges that need to be ignored in the analysis. Return true if path contains
581 a joiner, false otherwise.
583 In the non-joiner case, this is straightforward - all the counts/frequency
584 flowing into the jump threading path should flow through the duplicated
585 block and out of the duplicated path.
587 In the joiner case, it is very tricky. Some of the counts flowing into
588 the original path go offpath at the joiner. The problem is that while
589 we know how much total count goes off-path in the original control flow,
590 we don't know how many of the counts corresponding to just the jump
591 threading path go offpath at the joiner.
593 For example, assume we have the following control flow and identified
594 jump threading paths:
596 A B C
597 \ | /
598 Ea \ |Eb / Ec
599 \ | /
600 v v v
601 J <-- Joiner
603 Eoff/ \Eon
606 Soff Son <--- Normal
608 Ed/ \ Ee
613 Jump threading paths: A -> J -> Son -> D (path 1)
614 C -> J -> Son -> E (path 2)
616 Note that the control flow could be more complicated:
617 - Each jump threading path may have more than one incoming edge. I.e. A and
618 Ea could represent multiple incoming blocks/edges that are included in
619 path 1.
620 - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
621 before or after the "normal" copy block). These are not duplicated onto
622 the jump threading path, as they are single-successor.
623 - Any of the blocks along the path may have other incoming edges that
624 are not part of any jump threading path, but add profile counts along
625 the path.
627 In the aboe example, after all jump threading is complete, we will
628 end up with the following control flow:
630 A B C
631 | | |
632 Ea| |Eb |Ec
633 | | |
634 v v v
635 Ja J Jc
636 / \ / \Eon' / \
637 Eona/ \ ---/---\-------- \Eonc
638 / \ / / \ \
639 v v v v v
640 Sona Soff Son Sonc
641 \ /\ /
642 \___________ / \ _____/
643 \ / \/
644 vv v
647 The main issue to notice here is that when we are processing path 1
648 (A->J->Son->D) we need to figure out the outgoing edge weights to
649 the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
650 sum of the incoming weights to D remain Ed. The problem with simply
651 assuming that Ja (and Jc when processing path 2) has the same outgoing
652 probabilities to its successors as the original block J, is that after
653 all paths are processed and other edges/counts removed (e.g. none
654 of Ec will reach D after processing path 2), we may end up with not
655 enough count flowing along duplicated edge Sona->D.
657 Therefore, in the case of a joiner, we keep track of all counts
658 coming in along the current path, as well as from predecessors not
659 on any jump threading path (Eb in the above example). While we
660 first assume that the duplicated Eona for Ja->Sona has the same
661 probability as the original, we later compensate for other jump
662 threading paths that may eliminate edges. We do that by keep track
663 of all counts coming into the original path that are not in a jump
664 thread (Eb in the above example, but as noted earlier, there could
665 be other predecessors incoming to the path at various points, such
666 as at Son). Call this cumulative non-path count coming into the path
667 before D as Enonpath. We then ensure that the count from Sona->D is as at
668 least as big as (Ed - Enonpath), but no bigger than the minimum
669 weight along the jump threading path. The probabilities of both the
670 original and duplicated joiner block J and Ja will be adjusted
671 accordingly after the updates. */
673 static bool
674 compute_path_counts (struct redirection_data *rd,
675 ssa_local_info_t *local_info,
676 gcov_type *path_in_count_ptr,
677 gcov_type *path_out_count_ptr,
678 int *path_in_freq_ptr)
680 edge e = rd->incoming_edges->e;
681 vec<jump_thread_edge *> *path = THREAD_PATH (e);
682 edge elast = path->last ()->e;
683 gcov_type nonpath_count = 0;
684 bool has_joiner = false;
685 gcov_type path_in_count = 0;
686 int path_in_freq = 0;
688 /* Start by accumulating incoming edge counts to the path's first bb
689 into a couple buckets:
690 path_in_count: total count of incoming edges that flow into the
691 current path.
692 nonpath_count: total count of incoming edges that are not
693 flowing along *any* path. These are the counts
694 that will still flow along the original path after
695 all path duplication is done by potentially multiple
696 calls to this routine.
697 (any other incoming edge counts are for a different jump threading
698 path that will be handled by a later call to this routine.)
699 To make this easier, start by recording all incoming edges that flow into
700 the current path in a bitmap. We could add up the path's incoming edge
701 counts here, but we still need to walk all the first bb's incoming edges
702 below to add up the counts of the other edges not included in this jump
703 threading path. */
704 struct el *next, *el;
705 bitmap in_edge_srcs = BITMAP_ALLOC (NULL);
706 for (el = rd->incoming_edges; el; el = next)
708 next = el->next;
709 bitmap_set_bit (in_edge_srcs, el->e->src->index);
711 edge ein;
712 edge_iterator ei;
713 FOR_EACH_EDGE (ein, ei, e->dest->preds)
715 vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
716 /* Simply check the incoming edge src against the set captured above. */
717 if (ein_path
718 && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
720 /* It is necessary but not sufficient that the last path edges
721 are identical. There may be different paths that share the
722 same last path edge in the case where the last edge has a nocopy
723 source block. */
724 gcc_assert (ein_path->last ()->e == elast);
725 path_in_count += ein->count;
726 path_in_freq += EDGE_FREQUENCY (ein);
728 else if (!ein_path)
730 /* Keep track of the incoming edges that are not on any jump-threading
731 path. These counts will still flow out of original path after all
732 jump threading is complete. */
733 nonpath_count += ein->count;
737 /* This is needed due to insane incoming frequencies. */
738 if (path_in_freq > BB_FREQ_MAX)
739 path_in_freq = BB_FREQ_MAX;
741 BITMAP_FREE (in_edge_srcs);
743 /* Now compute the fraction of the total count coming into the first
744 path bb that is from the current threading path. */
745 gcov_type total_count = e->dest->count;
746 /* Handle incoming profile insanities. */
747 if (total_count < path_in_count)
748 path_in_count = total_count;
749 int onpath_scale = GCOV_COMPUTE_SCALE (path_in_count, total_count);
751 /* Walk the entire path to do some more computation in order to estimate
752 how much of the path_in_count will flow out of the duplicated threading
753 path. In the non-joiner case this is straightforward (it should be
754 the same as path_in_count, although we will handle incoming profile
755 insanities by setting it equal to the minimum count along the path).
757 In the joiner case, we need to estimate how much of the path_in_count
758 will stay on the threading path after the joiner's conditional branch.
759 We don't really know for sure how much of the counts
760 associated with this path go to each successor of the joiner, but we'll
761 estimate based on the fraction of the total count coming into the path
762 bb was from the threading paths (computed above in onpath_scale).
763 Afterwards, we will need to do some fixup to account for other threading
764 paths and possible profile insanities.
766 In order to estimate the joiner case's counts we also need to update
767 nonpath_count with any additional counts coming into the path. Other
768 blocks along the path may have additional predecessors from outside
769 the path. */
770 gcov_type path_out_count = path_in_count;
771 gcov_type min_path_count = path_in_count;
772 for (unsigned int i = 1; i < path->length (); i++)
774 edge epath = (*path)[i]->e;
775 gcov_type cur_count = epath->count;
776 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
778 has_joiner = true;
779 cur_count = apply_probability (cur_count, onpath_scale);
781 /* In the joiner case we need to update nonpath_count for any edges
782 coming into the path that will contribute to the count flowing
783 into the path successor. */
784 if (has_joiner && epath != elast)
786 /* Look for other incoming edges after joiner. */
787 FOR_EACH_EDGE (ein, ei, epath->dest->preds)
789 if (ein != epath
790 /* Ignore in edges from blocks we have duplicated for a
791 threading path, which have duplicated edge counts until
792 they are redirected by an invocation of this routine. */
793 && !bitmap_bit_p (local_info->duplicate_blocks,
794 ein->src->index))
795 nonpath_count += ein->count;
798 if (cur_count < path_out_count)
799 path_out_count = cur_count;
800 if (epath->count < min_path_count)
801 min_path_count = epath->count;
804 /* We computed path_out_count above assuming that this path targeted
805 the joiner's on-path successor with the same likelihood as it
806 reached the joiner. However, other thread paths through the joiner
807 may take a different path through the normal copy source block
808 (i.e. they have a different elast), meaning that they do not
809 contribute any counts to this path's elast. As a result, it may
810 turn out that this path must have more count flowing to the on-path
811 successor of the joiner. Essentially, all of this path's elast
812 count must be contributed by this path and any nonpath counts
813 (since any path through the joiner with a different elast will not
814 include a copy of this elast in its duplicated path).
815 So ensure that this path's path_out_count is at least the
816 difference between elast->count and nonpath_count. Otherwise the edge
817 counts after threading will not be sane. */
818 if (has_joiner && path_out_count < elast->count - nonpath_count)
820 path_out_count = elast->count - nonpath_count;
821 /* But neither can we go above the minimum count along the path
822 we are duplicating. This can be an issue due to profile
823 insanities coming in to this pass. */
824 if (path_out_count > min_path_count)
825 path_out_count = min_path_count;
828 *path_in_count_ptr = path_in_count;
829 *path_out_count_ptr = path_out_count;
830 *path_in_freq_ptr = path_in_freq;
831 return has_joiner;
835 /* Update the counts and frequencies for both an original path
836 edge EPATH and its duplicate EDUP. The duplicate source block
837 will get a count/frequency of PATH_IN_COUNT and PATH_IN_FREQ,
838 and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
839 static void
840 update_profile (edge epath, edge edup, gcov_type path_in_count,
841 gcov_type path_out_count, int path_in_freq)
844 /* First update the duplicated block's count / frequency. */
845 if (edup)
847 basic_block dup_block = edup->src;
848 gcc_assert (dup_block->count == 0);
849 gcc_assert (dup_block->frequency == 0);
850 dup_block->count = path_in_count;
851 dup_block->frequency = path_in_freq;
854 /* Now update the original block's count and frequency in the
855 opposite manner - remove the counts/freq that will flow
856 into the duplicated block. Handle underflow due to precision/
857 rounding issues. */
858 epath->src->count -= path_in_count;
859 if (epath->src->count < 0)
860 epath->src->count = 0;
861 epath->src->frequency -= path_in_freq;
862 if (epath->src->frequency < 0)
863 epath->src->frequency = 0;
865 /* Next update this path edge's original and duplicated counts. We know
866 that the duplicated path will have path_out_count flowing
867 out of it (in the joiner case this is the count along the duplicated path
868 out of the duplicated joiner). This count can then be removed from the
869 original path edge. */
870 if (edup)
871 edup->count = path_out_count;
872 epath->count -= path_out_count;
873 gcc_assert (epath->count >= 0);
877 /* The duplicate and original joiner blocks may end up with different
878 probabilities (different from both the original and from each other).
879 Recompute the probabilities here once we have updated the edge
880 counts and frequencies. */
882 static void
883 recompute_probabilities (basic_block bb)
885 edge esucc;
886 edge_iterator ei;
887 FOR_EACH_EDGE (esucc, ei, bb->succs)
889 if (!bb->count)
890 continue;
892 /* Prevent overflow computation due to insane profiles. */
893 if (esucc->count < bb->count)
894 esucc->probability = GCOV_COMPUTE_SCALE (esucc->count,
895 bb->count);
896 else
897 /* Can happen with missing/guessed probabilities, since we
898 may determine that more is flowing along duplicated
899 path than joiner succ probabilities allowed.
900 Counts and freqs will be insane after jump threading,
901 at least make sure probability is sane or we will
902 get a flow verification error.
903 Not much we can do to make counts/freqs sane without
904 redoing the profile estimation. */
905 esucc->probability = REG_BR_PROB_BASE;
910 /* Update the counts of the original and duplicated edges from a joiner
911 that go off path, given that we have already determined that the
912 duplicate joiner DUP_BB has incoming count PATH_IN_COUNT and
913 outgoing count along the path PATH_OUT_COUNT. The original (on-)path
914 edge from joiner is EPATH. */
916 static void
917 update_joiner_offpath_counts (edge epath, basic_block dup_bb,
918 gcov_type path_in_count,
919 gcov_type path_out_count)
921 /* Compute the count that currently flows off path from the joiner.
922 In other words, the total count of joiner's out edges other than
923 epath. Compute this by walking the successors instead of
924 subtracting epath's count from the joiner bb count, since there
925 are sometimes slight insanities where the total out edge count is
926 larger than the bb count (possibly due to rounding/truncation
927 errors). */
928 gcov_type total_orig_off_path_count = 0;
929 edge enonpath;
930 edge_iterator ei;
931 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
933 if (enonpath == epath)
934 continue;
935 total_orig_off_path_count += enonpath->count;
938 /* For the path that we are duplicating, the amount that will flow
939 off path from the duplicated joiner is the delta between the
940 path's cumulative in count and the portion of that count we
941 estimated above as flowing from the joiner along the duplicated
942 path. */
943 gcov_type total_dup_off_path_count = path_in_count - path_out_count;
945 /* Now do the actual updates of the off-path edges. */
946 FOR_EACH_EDGE (enonpath, ei, epath->src->succs)
948 /* Look for edges going off of the threading path. */
949 if (enonpath == epath)
950 continue;
952 /* Find the corresponding edge out of the duplicated joiner. */
953 edge enonpathdup = find_edge (dup_bb, enonpath->dest);
954 gcc_assert (enonpathdup);
956 /* We can't use the original probability of the joiner's out
957 edges, since the probabilities of the original branch
958 and the duplicated branches may vary after all threading is
959 complete. But apportion the duplicated joiner's off-path
960 total edge count computed earlier (total_dup_off_path_count)
961 among the duplicated off-path edges based on their original
962 ratio to the full off-path count (total_orig_off_path_count).
964 int scale = GCOV_COMPUTE_SCALE (enonpath->count,
965 total_orig_off_path_count);
966 /* Give the duplicated offpath edge a portion of the duplicated
967 total. */
968 enonpathdup->count = apply_scale (scale,
969 total_dup_off_path_count);
970 /* Now update the original offpath edge count, handling underflow
971 due to rounding errors. */
972 enonpath->count -= enonpathdup->count;
973 if (enonpath->count < 0)
974 enonpath->count = 0;
979 /* Check if the paths through RD all have estimated frequencies but zero
980 profile counts. This is more accurate than checking the entry block
981 for a zero profile count, since profile insanities sometimes creep in. */
983 static bool
984 estimated_freqs_path (struct redirection_data *rd)
986 edge e = rd->incoming_edges->e;
987 vec<jump_thread_edge *> *path = THREAD_PATH (e);
988 edge ein;
989 edge_iterator ei;
990 bool non_zero_freq = false;
991 FOR_EACH_EDGE (ein, ei, e->dest->preds)
993 if (ein->count)
994 return false;
995 non_zero_freq |= ein->src->frequency != 0;
998 for (unsigned int i = 1; i < path->length (); i++)
1000 edge epath = (*path)[i]->e;
1001 if (epath->src->count)
1002 return false;
1003 non_zero_freq |= epath->src->frequency != 0;
1004 edge esucc;
1005 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1007 if (esucc->count)
1008 return false;
1009 non_zero_freq |= esucc->src->frequency != 0;
1012 return non_zero_freq;
1016 /* Invoked for routines that have guessed frequencies and no profile
1017 counts to record the block and edge frequencies for paths through RD
1018 in the profile count fields of those blocks and edges. This is because
1019 ssa_fix_duplicate_block_edges incrementally updates the block and
1020 edge counts as edges are redirected, and it is difficult to do that
1021 for edge frequencies which are computed on the fly from the source
1022 block frequency and probability. When a block frequency is updated
1023 its outgoing edge frequencies are affected and become difficult to
1024 adjust. */
1026 static void
1027 freqs_to_counts_path (struct redirection_data *rd)
1029 edge e = rd->incoming_edges->e;
1030 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1031 edge ein;
1032 edge_iterator ei;
1033 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1035 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1036 errors applying the probability when the frequencies are very
1037 small. */
1038 ein->count = apply_probability (ein->src->frequency * REG_BR_PROB_BASE,
1039 ein->probability);
1042 for (unsigned int i = 1; i < path->length (); i++)
1044 edge epath = (*path)[i]->e;
1045 edge esucc;
1046 /* Scale up the frequency by REG_BR_PROB_BASE, to avoid rounding
1047 errors applying the edge probability when the frequencies are very
1048 small. */
1049 epath->src->count = epath->src->frequency * REG_BR_PROB_BASE;
1050 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1051 esucc->count = apply_probability (esucc->src->count,
1052 esucc->probability);
1057 /* For routines that have guessed frequencies and no profile counts, where we
1058 used freqs_to_counts_path to record block and edge frequencies for paths
1059 through RD, we clear the counts after completing all updates for RD.
1060 The updates in ssa_fix_duplicate_block_edges are based off the count fields,
1061 but the block frequencies and edge probabilities were updated as well,
1062 so we can simply clear the count fields. */
1064 static void
1065 clear_counts_path (struct redirection_data *rd)
1067 edge e = rd->incoming_edges->e;
1068 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1069 edge ein, esucc;
1070 edge_iterator ei;
1071 FOR_EACH_EDGE (ein, ei, e->dest->preds)
1072 ein->count = 0;
1074 /* First clear counts along original path. */
1075 for (unsigned int i = 1; i < path->length (); i++)
1077 edge epath = (*path)[i]->e;
1078 FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1079 esucc->count = 0;
1080 epath->src->count = 0;
1082 /* Also need to clear the counts along duplicated path. */
1083 for (unsigned int i = 0; i < 2; i++)
1085 basic_block dup = rd->dup_blocks[i];
1086 if (!dup)
1087 continue;
1088 FOR_EACH_EDGE (esucc, ei, dup->succs)
1089 esucc->count = 0;
1090 dup->count = 0;
1094 /* Wire up the outgoing edges from the duplicate blocks and
1095 update any PHIs as needed. Also update the profile counts
1096 on the original and duplicate blocks and edges. */
1097 void
1098 ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1099 ssa_local_info_t *local_info)
1101 bool multi_incomings = (rd->incoming_edges->next != NULL);
1102 edge e = rd->incoming_edges->e;
1103 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1104 edge elast = path->last ()->e;
1105 gcov_type path_in_count = 0;
1106 gcov_type path_out_count = 0;
1107 int path_in_freq = 0;
1109 /* This routine updates profile counts, frequencies, and probabilities
1110 incrementally. Since it is difficult to do the incremental updates
1111 using frequencies/probabilities alone, for routines without profile
1112 data we first take a snapshot of the existing block and edge frequencies
1113 by copying them into the empty profile count fields. These counts are
1114 then used to do the incremental updates, and cleared at the end of this
1115 routine. If the function is marked as having a profile, we still check
1116 to see if the paths through RD are using estimated frequencies because
1117 the routine had zero profile counts. */
1118 bool do_freqs_to_counts = (profile_status_for_fn (cfun) != PROFILE_READ
1119 || estimated_freqs_path (rd));
1120 if (do_freqs_to_counts)
1121 freqs_to_counts_path (rd);
1123 /* First determine how much profile count to move from original
1124 path to the duplicate path. This is tricky in the presence of
1125 a joiner (see comments for compute_path_counts), where some portion
1126 of the path's counts will flow off-path from the joiner. In the
1127 non-joiner case the path_in_count and path_out_count should be the
1128 same. */
1129 bool has_joiner = compute_path_counts (rd, local_info,
1130 &path_in_count, &path_out_count,
1131 &path_in_freq);
1133 int cur_path_freq = path_in_freq;
1134 for (unsigned int count = 0, i = 1; i < path->length (); i++)
1136 edge epath = (*path)[i]->e;
1138 /* If we were threading through an joiner block, then we want
1139 to keep its control statement and redirect an outgoing edge.
1140 Else we want to remove the control statement & edges, then create
1141 a new outgoing edge. In both cases we may need to update PHIs. */
1142 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1144 edge victim;
1145 edge e2;
1147 gcc_assert (has_joiner);
1149 /* This updates the PHIs at the destination of the duplicate
1150 block. Pass 0 instead of i if we are threading a path which
1151 has multiple incoming edges. */
1152 update_destination_phis (local_info->bb, rd->dup_blocks[count],
1153 path, multi_incomings ? 0 : i);
1155 /* Find the edge from the duplicate block to the block we're
1156 threading through. That's the edge we want to redirect. */
1157 victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1159 /* If there are no remaining blocks on the path to duplicate,
1160 then redirect VICTIM to the final destination of the jump
1161 threading path. */
1162 if (!any_remaining_duplicated_blocks (path, i))
1164 e2 = redirect_edge_and_branch (victim, elast->dest);
1165 /* If we redirected the edge, then we need to copy PHI arguments
1166 at the target. If the edge already existed (e2 != victim
1167 case), then the PHIs in the target already have the correct
1168 arguments. */
1169 if (e2 == victim)
1170 copy_phi_args (e2->dest, elast, e2,
1171 path, multi_incomings ? 0 : i);
1173 else
1175 /* Redirect VICTIM to the next duplicated block in the path. */
1176 e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1178 /* We need to update the PHIs in the next duplicated block. We
1179 want the new PHI args to have the same value as they had
1180 in the source of the next duplicate block.
1182 Thus, we need to know which edge we traversed into the
1183 source of the duplicate. Furthermore, we may have
1184 traversed many edges to reach the source of the duplicate.
1186 Walk through the path starting at element I until we
1187 hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1188 the edge from the prior element. */
1189 for (unsigned int j = i + 1; j < path->length (); j++)
1191 if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1193 copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1194 break;
1199 /* Update the counts and frequency of both the original block
1200 and path edge, and the duplicates. The path duplicate's
1201 incoming count and frequency are the totals for all edges
1202 incoming to this jump threading path computed earlier.
1203 And we know that the duplicated path will have path_out_count
1204 flowing out of it (i.e. along the duplicated path out of the
1205 duplicated joiner). */
1206 update_profile (epath, e2, path_in_count, path_out_count,
1207 path_in_freq);
1209 /* Next we need to update the counts of the original and duplicated
1210 edges from the joiner that go off path. */
1211 update_joiner_offpath_counts (epath, e2->src, path_in_count,
1212 path_out_count);
1214 /* Finally, we need to set the probabilities on the duplicated
1215 edges out of the duplicated joiner (e2->src). The probabilities
1216 along the original path will all be updated below after we finish
1217 processing the whole path. */
1218 recompute_probabilities (e2->src);
1220 /* Record the frequency flowing to the downstream duplicated
1221 path blocks. */
1222 cur_path_freq = EDGE_FREQUENCY (e2);
1224 else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1226 remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1227 create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1228 multi_incomings ? 0 : i);
1229 if (count == 1)
1230 single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1232 /* Update the counts and frequency of both the original block
1233 and path edge, and the duplicates. Since we are now after
1234 any joiner that may have existed on the path, the count
1235 flowing along the duplicated threaded path is path_out_count.
1236 If we didn't have a joiner, then cur_path_freq was the sum
1237 of the total frequencies along all incoming edges to the
1238 thread path (path_in_freq). If we had a joiner, it would have
1239 been updated at the end of that handling to the edge frequency
1240 along the duplicated joiner path edge. */
1241 update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1242 path_out_count, path_out_count,
1243 cur_path_freq);
1245 else
1247 /* No copy case. In this case we don't have an equivalent block
1248 on the duplicated thread path to update, but we do need
1249 to remove the portion of the counts/freqs that were moved
1250 to the duplicated path from the counts/freqs flowing through
1251 this block on the original path. Since all the no-copy edges
1252 are after any joiner, the removed count is the same as
1253 path_out_count.
1255 If we didn't have a joiner, then cur_path_freq was the sum
1256 of the total frequencies along all incoming edges to the
1257 thread path (path_in_freq). If we had a joiner, it would have
1258 been updated at the end of that handling to the edge frequency
1259 along the duplicated joiner path edge. */
1260 update_profile (epath, NULL, path_out_count, path_out_count,
1261 cur_path_freq);
1264 /* Increment the index into the duplicated path when we processed
1265 a duplicated block. */
1266 if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1267 || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1269 count++;
1273 /* Now walk orig blocks and update their probabilities, since the
1274 counts and freqs should be updated properly by above loop. */
1275 for (unsigned int i = 1; i < path->length (); i++)
1277 edge epath = (*path)[i]->e;
1278 recompute_probabilities (epath->src);
1281 /* Done with all profile and frequency updates, clear counts if they
1282 were copied. */
1283 if (do_freqs_to_counts)
1284 clear_counts_path (rd);
1287 /* Hash table traversal callback routine to create duplicate blocks. */
1290 ssa_create_duplicates (struct redirection_data **slot,
1291 ssa_local_info_t *local_info)
1293 struct redirection_data *rd = *slot;
1295 /* The second duplicated block in a jump threading path is specific
1296 to the path. So it gets stored in RD rather than in LOCAL_DATA.
1298 Each time we're called, we have to look through the path and see
1299 if a second block needs to be duplicated.
1301 Note the search starts with the third edge on the path. The first
1302 edge is the incoming edge, the second edge always has its source
1303 duplicated. Thus we start our search with the third edge. */
1304 vec<jump_thread_edge *> *path = rd->path;
1305 for (unsigned int i = 2; i < path->length (); i++)
1307 if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1308 || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1310 create_block_for_threading ((*path)[i]->e->src, rd, 1,
1311 &local_info->duplicate_blocks);
1312 break;
1316 /* Create a template block if we have not done so already. Otherwise
1317 use the template to create a new block. */
1318 if (local_info->template_block == NULL)
1320 create_block_for_threading ((*path)[1]->e->src, rd, 0,
1321 &local_info->duplicate_blocks);
1322 local_info->template_block = rd->dup_blocks[0];
1324 /* We do not create any outgoing edges for the template. We will
1325 take care of that in a later traversal. That way we do not
1326 create edges that are going to just be deleted. */
1328 else
1330 create_block_for_threading (local_info->template_block, rd, 0,
1331 &local_info->duplicate_blocks);
1333 /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1334 block. */
1335 ssa_fix_duplicate_block_edges (rd, local_info);
1338 /* Keep walking the hash table. */
1339 return 1;
1342 /* We did not create any outgoing edges for the template block during
1343 block creation. This hash table traversal callback creates the
1344 outgoing edge for the template block. */
1346 inline int
1347 ssa_fixup_template_block (struct redirection_data **slot,
1348 ssa_local_info_t *local_info)
1350 struct redirection_data *rd = *slot;
1352 /* If this is the template block halt the traversal after updating
1353 it appropriately.
1355 If we were threading through an joiner block, then we want
1356 to keep its control statement and redirect an outgoing edge.
1357 Else we want to remove the control statement & edges, then create
1358 a new outgoing edge. In both cases we may need to update PHIs. */
1359 if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1361 ssa_fix_duplicate_block_edges (rd, local_info);
1362 return 0;
1365 return 1;
1368 /* Hash table traversal callback to redirect each incoming edge
1369 associated with this hash table element to its new destination. */
1372 ssa_redirect_edges (struct redirection_data **slot,
1373 ssa_local_info_t *local_info)
1375 struct redirection_data *rd = *slot;
1376 struct el *next, *el;
1378 /* Walk over all the incoming edges associated associated with this
1379 hash table entry. */
1380 for (el = rd->incoming_edges; el; el = next)
1382 edge e = el->e;
1383 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1385 /* Go ahead and free this element from the list. Doing this now
1386 avoids the need for another list walk when we destroy the hash
1387 table. */
1388 next = el->next;
1389 free (el);
1391 thread_stats.num_threaded_edges++;
1393 if (rd->dup_blocks[0])
1395 edge e2;
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1398 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1399 e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1401 /* If we redirect a loop latch edge cancel its loop. */
1402 if (e->src == e->src->loop_father->latch)
1403 mark_loop_for_removal (e->src->loop_father);
1405 /* Redirect the incoming edge (possibly to the joiner block) to the
1406 appropriate duplicate block. */
1407 e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1408 gcc_assert (e == e2);
1409 flush_pending_stmts (e2);
1412 /* Go ahead and clear E->aux. It's not needed anymore and failure
1413 to clear it will cause all kinds of unpleasant problems later. */
1414 delete_jump_thread_path (path);
1415 e->aux = NULL;
1419 /* Indicate that we actually threaded one or more jumps. */
1420 if (rd->incoming_edges)
1421 local_info->jumps_threaded = true;
1423 return 1;
1426 /* Return true if this block has no executable statements other than
1427 a simple ctrl flow instruction. When the number of outgoing edges
1428 is one, this is equivalent to a "forwarder" block. */
1430 static bool
1431 redirection_block_p (basic_block bb)
1433 gimple_stmt_iterator gsi;
1435 /* Advance to the first executable statement. */
1436 gsi = gsi_start_bb (bb);
1437 while (!gsi_end_p (gsi)
1438 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1439 || is_gimple_debug (gsi_stmt (gsi))
1440 || gimple_nop_p (gsi_stmt (gsi))
1441 || gimple_clobber_p (gsi_stmt (gsi))))
1442 gsi_next (&gsi);
1444 /* Check if this is an empty block. */
1445 if (gsi_end_p (gsi))
1446 return true;
1448 /* Test that we've reached the terminating control statement. */
1449 return gsi_stmt (gsi)
1450 && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1451 || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1452 || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1455 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1456 is reached via one or more specific incoming edges, we know which
1457 outgoing edge from BB will be traversed.
1459 We want to redirect those incoming edges to the target of the
1460 appropriate outgoing edge. Doing so avoids a conditional branch
1461 and may expose new optimization opportunities. Note that we have
1462 to update dominator tree and SSA graph after such changes.
1464 The key to keeping the SSA graph update manageable is to duplicate
1465 the side effects occurring in BB so that those side effects still
1466 occur on the paths which bypass BB after redirecting edges.
1468 We accomplish this by creating duplicates of BB and arranging for
1469 the duplicates to unconditionally pass control to one specific
1470 successor of BB. We then revector the incoming edges into BB to
1471 the appropriate duplicate of BB.
1473 If NOLOOP_ONLY is true, we only perform the threading as long as it
1474 does not affect the structure of the loops in a nontrivial way.
1476 If JOINERS is true, then thread through joiner blocks as well. */
1478 static bool
1479 thread_block_1 (basic_block bb, bool noloop_only, bool joiners)
1481 /* E is an incoming edge into BB that we may or may not want to
1482 redirect to a duplicate of BB. */
1483 edge e, e2;
1484 edge_iterator ei;
1485 ssa_local_info_t local_info;
1487 local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1489 /* To avoid scanning a linear array for the element we need we instead
1490 use a hash table. For normal code there should be no noticeable
1491 difference. However, if we have a block with a large number of
1492 incoming and outgoing edges such linear searches can get expensive. */
1493 redirection_data
1494 = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1496 /* Record each unique threaded destination into a hash table for
1497 efficient lookups. */
1498 FOR_EACH_EDGE (e, ei, bb->preds)
1500 if (e->aux == NULL)
1501 continue;
1503 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1505 if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1506 || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1507 continue;
1509 e2 = path->last ()->e;
1510 if (!e2 || noloop_only)
1512 /* If NOLOOP_ONLY is true, we only allow threading through the
1513 header of a loop to exit edges. */
1515 /* One case occurs when there was loop header buried in a jump
1516 threading path that crosses loop boundaries. We do not try
1517 and thread this elsewhere, so just cancel the jump threading
1518 request by clearing the AUX field now. */
1519 if ((bb->loop_father != e2->src->loop_father
1520 && !loop_exit_edge_p (e2->src->loop_father, e2))
1521 || (e2->src->loop_father != e2->dest->loop_father
1522 && !loop_exit_edge_p (e2->src->loop_father, e2)))
1524 /* Since this case is not handled by our special code
1525 to thread through a loop header, we must explicitly
1526 cancel the threading request here. */
1527 delete_jump_thread_path (path);
1528 e->aux = NULL;
1529 continue;
1532 /* Another case occurs when trying to thread through our
1533 own loop header, possibly from inside the loop. We will
1534 thread these later. */
1535 unsigned int i;
1536 for (i = 1; i < path->length (); i++)
1538 if ((*path)[i]->e->src == bb->loop_father->header
1539 && (!loop_exit_edge_p (bb->loop_father, e2)
1540 || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1541 break;
1544 if (i != path->length ())
1545 continue;
1548 /* Insert the outgoing edge into the hash table if it is not
1549 already in the hash table. */
1550 lookup_redirection_data (e, INSERT);
1553 /* We do not update dominance info. */
1554 free_dominance_info (CDI_DOMINATORS);
1556 /* We know we only thread through the loop header to loop exits.
1557 Let the basic block duplication hook know we are not creating
1558 a multiple entry loop. */
1559 if (noloop_only
1560 && bb == bb->loop_father->header)
1561 set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1563 /* Now create duplicates of BB.
1565 Note that for a block with a high outgoing degree we can waste
1566 a lot of time and memory creating and destroying useless edges.
1568 So we first duplicate BB and remove the control structure at the
1569 tail of the duplicate as well as all outgoing edges from the
1570 duplicate. We then use that duplicate block as a template for
1571 the rest of the duplicates. */
1572 local_info.template_block = NULL;
1573 local_info.bb = bb;
1574 local_info.jumps_threaded = false;
1575 redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1576 (&local_info);
1578 /* The template does not have an outgoing edge. Create that outgoing
1579 edge and update PHI nodes as the edge's target as necessary.
1581 We do this after creating all the duplicates to avoid creating
1582 unnecessary edges. */
1583 redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1584 (&local_info);
1586 /* The hash table traversals above created the duplicate blocks (and the
1587 statements within the duplicate blocks). This loop creates PHI nodes for
1588 the duplicated blocks and redirects the incoming edges into BB to reach
1589 the duplicates of BB. */
1590 redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1591 (&local_info);
1593 /* Done with this block. Clear REDIRECTION_DATA. */
1594 delete redirection_data;
1595 redirection_data = NULL;
1597 if (noloop_only
1598 && bb == bb->loop_father->header)
1599 set_loop_copy (bb->loop_father, NULL);
1601 BITMAP_FREE (local_info.duplicate_blocks);
1602 local_info.duplicate_blocks = NULL;
1604 /* Indicate to our caller whether or not any jumps were threaded. */
1605 return local_info.jumps_threaded;
1608 /* Wrapper for thread_block_1 so that we can first handle jump
1609 thread paths which do not involve copying joiner blocks, then
1610 handle jump thread paths which have joiner blocks.
1612 By doing things this way we can be as aggressive as possible and
1613 not worry that copying a joiner block will create a jump threading
1614 opportunity. */
1616 static bool
1617 thread_block (basic_block bb, bool noloop_only)
1619 bool retval;
1620 retval = thread_block_1 (bb, noloop_only, false);
1621 retval |= thread_block_1 (bb, noloop_only, true);
1622 return retval;
1626 /* Threads edge E through E->dest to the edge THREAD_TARGET (E). Returns the
1627 copy of E->dest created during threading, or E->dest if it was not necessary
1628 to copy it (E is its single predecessor). */
1630 static basic_block
1631 thread_single_edge (edge e)
1633 basic_block bb = e->dest;
1634 struct redirection_data rd;
1635 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1636 edge eto = (*path)[1]->e;
1638 delete_jump_thread_path (path);
1639 e->aux = NULL;
1641 thread_stats.num_threaded_edges++;
1643 if (single_pred_p (bb))
1645 /* If BB has just a single predecessor, we should only remove the
1646 control statements at its end, and successors except for ETO. */
1647 remove_ctrl_stmt_and_useless_edges (bb, eto->dest);
1649 /* And fixup the flags on the single remaining edge. */
1650 eto->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
1651 eto->flags |= EDGE_FALLTHRU;
1653 return bb;
1656 /* Otherwise, we need to create a copy. */
1657 if (e->dest == eto->src)
1658 update_bb_profile_for_threading (bb, EDGE_FREQUENCY (e), e->count, eto);
1660 vec<jump_thread_edge *> *npath = new vec<jump_thread_edge *> ();
1661 jump_thread_edge *x = new jump_thread_edge (e, EDGE_START_JUMP_THREAD);
1662 npath->safe_push (x);
1664 x = new jump_thread_edge (eto, EDGE_COPY_SRC_BLOCK);
1665 npath->safe_push (x);
1666 rd.path = npath;
1668 create_block_for_threading (bb, &rd, 0, NULL);
1669 remove_ctrl_stmt_and_useless_edges (rd.dup_blocks[0], NULL);
1670 create_edge_and_update_destination_phis (&rd, rd.dup_blocks[0], 0);
1672 if (dump_file && (dump_flags & TDF_DETAILS))
1673 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1674 e->src->index, e->dest->index, rd.dup_blocks[0]->index);
1676 rd.dup_blocks[0]->count = e->count;
1677 rd.dup_blocks[0]->frequency = EDGE_FREQUENCY (e);
1678 single_succ_edge (rd.dup_blocks[0])->count = e->count;
1679 redirect_edge_and_branch (e, rd.dup_blocks[0]);
1680 flush_pending_stmts (e);
1682 delete_jump_thread_path (npath);
1683 return rd.dup_blocks[0];
1686 /* Callback for dfs_enumerate_from. Returns true if BB is different
1687 from STOP and DBDS_CE_STOP. */
1689 static basic_block dbds_ce_stop;
1690 static bool
1691 dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1693 return (bb != (const_basic_block) stop
1694 && bb != dbds_ce_stop);
1697 /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1698 returns the state. */
1700 enum bb_dom_status
1702 /* BB does not dominate latch of the LOOP. */
1703 DOMST_NONDOMINATING,
1704 /* The LOOP is broken (there is no path from the header to its latch. */
1705 DOMST_LOOP_BROKEN,
1706 /* BB dominates the latch of the LOOP. */
1707 DOMST_DOMINATING
1710 static enum bb_dom_status
1711 determine_bb_domination_status (struct loop *loop, basic_block bb)
1713 basic_block *bblocks;
1714 unsigned nblocks, i;
1715 bool bb_reachable = false;
1716 edge_iterator ei;
1717 edge e;
1719 /* This function assumes BB is a successor of LOOP->header.
1720 If that is not the case return DOMST_NONDOMINATING which
1721 is always safe. */
1723 bool ok = false;
1725 FOR_EACH_EDGE (e, ei, bb->preds)
1727 if (e->src == loop->header)
1729 ok = true;
1730 break;
1734 if (!ok)
1735 return DOMST_NONDOMINATING;
1738 if (bb == loop->latch)
1739 return DOMST_DOMINATING;
1741 /* Check that BB dominates LOOP->latch, and that it is back-reachable
1742 from it. */
1744 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1745 dbds_ce_stop = loop->header;
1746 nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1747 bblocks, loop->num_nodes, bb);
1748 for (i = 0; i < nblocks; i++)
1749 FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1751 if (e->src == loop->header)
1753 free (bblocks);
1754 return DOMST_NONDOMINATING;
1756 if (e->src == bb)
1757 bb_reachable = true;
1760 free (bblocks);
1761 return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1764 /* Return true if BB is part of the new pre-header that is created
1765 when threading the latch to DATA. */
1767 static bool
1768 def_split_header_continue_p (const_basic_block bb, const void *data)
1770 const_basic_block new_header = (const_basic_block) data;
1771 const struct loop *l;
1773 if (bb == new_header
1774 || loop_depth (bb->loop_father) < loop_depth (new_header->loop_father))
1775 return false;
1776 for (l = bb->loop_father; l; l = loop_outer (l))
1777 if (l == new_header->loop_father)
1778 return true;
1779 return false;
1782 /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1783 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1784 to the inside of the loop. */
1786 static bool
1787 thread_through_loop_header (struct loop *loop, bool may_peel_loop_headers)
1789 basic_block header = loop->header;
1790 edge e, tgt_edge, latch = loop_latch_edge (loop);
1791 edge_iterator ei;
1792 basic_block tgt_bb, atgt_bb;
1793 enum bb_dom_status domst;
1795 /* We have already threaded through headers to exits, so all the threading
1796 requests now are to the inside of the loop. We need to avoid creating
1797 irreducible regions (i.e., loops with more than one entry block), and
1798 also loop with several latch edges, or new subloops of the loop (although
1799 there are cases where it might be appropriate, it is difficult to decide,
1800 and doing it wrongly may confuse other optimizers).
1802 We could handle more general cases here. However, the intention is to
1803 preserve some information about the loop, which is impossible if its
1804 structure changes significantly, in a way that is not well understood.
1805 Thus we only handle few important special cases, in which also updating
1806 of the loop-carried information should be feasible:
1808 1) Propagation of latch edge to a block that dominates the latch block
1809 of a loop. This aims to handle the following idiom:
1811 first = 1;
1812 while (1)
1814 if (first)
1815 initialize;
1816 first = 0;
1817 body;
1820 After threading the latch edge, this becomes
1822 first = 1;
1823 if (first)
1824 initialize;
1825 while (1)
1827 first = 0;
1828 body;
1831 The original header of the loop is moved out of it, and we may thread
1832 the remaining edges through it without further constraints.
1834 2) All entry edges are propagated to a single basic block that dominates
1835 the latch block of the loop. This aims to handle the following idiom
1836 (normally created for "for" loops):
1838 i = 0;
1839 while (1)
1841 if (i >= 100)
1842 break;
1843 body;
1844 i++;
1847 This becomes
1849 i = 0;
1850 while (1)
1852 body;
1853 i++;
1854 if (i >= 100)
1855 break;
1859 /* Threading through the header won't improve the code if the header has just
1860 one successor. */
1861 if (single_succ_p (header))
1862 goto fail;
1864 /* If we threaded the latch using a joiner block, we cancel the
1865 threading opportunity out of an abundance of caution. However,
1866 still allow threading from outside to inside the loop. */
1867 if (latch->aux)
1869 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1870 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1872 delete_jump_thread_path (path);
1873 latch->aux = NULL;
1877 if (latch->aux)
1879 vec<jump_thread_edge *> *path = THREAD_PATH (latch);
1880 tgt_edge = (*path)[1]->e;
1881 tgt_bb = tgt_edge->dest;
1883 else if (!may_peel_loop_headers
1884 && !redirection_block_p (loop->header))
1885 goto fail;
1886 else
1888 tgt_bb = NULL;
1889 tgt_edge = NULL;
1890 FOR_EACH_EDGE (e, ei, header->preds)
1892 if (!e->aux)
1894 if (e == latch)
1895 continue;
1897 /* If latch is not threaded, and there is a header
1898 edge that is not threaded, we would create loop
1899 with multiple entries. */
1900 goto fail;
1903 vec<jump_thread_edge *> *path = THREAD_PATH (e);
1905 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1906 goto fail;
1907 tgt_edge = (*path)[1]->e;
1908 atgt_bb = tgt_edge->dest;
1909 if (!tgt_bb)
1910 tgt_bb = atgt_bb;
1911 /* Two targets of threading would make us create loop
1912 with multiple entries. */
1913 else if (tgt_bb != atgt_bb)
1914 goto fail;
1917 if (!tgt_bb)
1919 /* There are no threading requests. */
1920 return false;
1923 /* Redirecting to empty loop latch is useless. */
1924 if (tgt_bb == loop->latch
1925 && empty_block_p (loop->latch))
1926 goto fail;
1929 /* The target block must dominate the loop latch, otherwise we would be
1930 creating a subloop. */
1931 domst = determine_bb_domination_status (loop, tgt_bb);
1932 if (domst == DOMST_NONDOMINATING)
1933 goto fail;
1934 if (domst == DOMST_LOOP_BROKEN)
1936 /* If the loop ceased to exist, mark it as such, and thread through its
1937 original header. */
1938 mark_loop_for_removal (loop);
1939 return thread_block (header, false);
1942 if (tgt_bb->loop_father->header == tgt_bb)
1944 /* If the target of the threading is a header of a subloop, we need
1945 to create a preheader for it, so that the headers of the two loops
1946 do not merge. */
1947 if (EDGE_COUNT (tgt_bb->preds) > 2)
1949 tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1950 gcc_assert (tgt_bb != NULL);
1952 else
1953 tgt_bb = split_edge (tgt_edge);
1956 if (latch->aux)
1958 basic_block *bblocks;
1959 unsigned nblocks, i;
1961 /* First handle the case latch edge is redirected. We are copying
1962 the loop header but not creating a multiple entry loop. Make the
1963 cfg manipulation code aware of that fact. */
1964 set_loop_copy (loop, loop);
1965 loop->latch = thread_single_edge (latch);
1966 set_loop_copy (loop, NULL);
1967 gcc_assert (single_succ (loop->latch) == tgt_bb);
1968 loop->header = tgt_bb;
1970 /* Remove the new pre-header blocks from our loop. */
1971 bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1972 nblocks = dfs_enumerate_from (header, 0, def_split_header_continue_p,
1973 bblocks, loop->num_nodes, tgt_bb);
1974 for (i = 0; i < nblocks; i++)
1975 if (bblocks[i]->loop_father == loop)
1977 remove_bb_from_loops (bblocks[i]);
1978 add_bb_to_loop (bblocks[i], loop_outer (loop));
1980 free (bblocks);
1982 /* If the new header has multiple latches mark it so. */
1983 FOR_EACH_EDGE (e, ei, loop->header->preds)
1984 if (e->src->loop_father == loop
1985 && e->src != loop->latch)
1987 loop->latch = NULL;
1988 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
1991 /* Cancel remaining threading requests that would make the
1992 loop a multiple entry loop. */
1993 FOR_EACH_EDGE (e, ei, header->preds)
1995 edge e2;
1997 if (e->aux == NULL)
1998 continue;
2000 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2001 e2 = path->last ()->e;
2003 if (e->src->loop_father != e2->dest->loop_father
2004 && e2->dest != loop->header)
2006 delete_jump_thread_path (path);
2007 e->aux = NULL;
2011 /* Thread the remaining edges through the former header. */
2012 thread_block (header, false);
2014 else
2016 basic_block new_preheader;
2018 /* Now consider the case entry edges are redirected to the new entry
2019 block. Remember one entry edge, so that we can find the new
2020 preheader (its destination after threading). */
2021 FOR_EACH_EDGE (e, ei, header->preds)
2023 if (e->aux)
2024 break;
2027 /* The duplicate of the header is the new preheader of the loop. Ensure
2028 that it is placed correctly in the loop hierarchy. */
2029 set_loop_copy (loop, loop_outer (loop));
2031 thread_block (header, false);
2032 set_loop_copy (loop, NULL);
2033 new_preheader = e->dest;
2035 /* Create the new latch block. This is always necessary, as the latch
2036 must have only a single successor, but the original header had at
2037 least two successors. */
2038 loop->latch = NULL;
2039 mfb_kj_edge = single_succ_edge (new_preheader);
2040 loop->header = mfb_kj_edge->dest;
2041 latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
2042 loop->header = latch->dest;
2043 loop->latch = latch->src;
2046 return true;
2048 fail:
2049 /* We failed to thread anything. Cancel the requests. */
2050 FOR_EACH_EDGE (e, ei, header->preds)
2052 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2054 if (path)
2056 delete_jump_thread_path (path);
2057 e->aux = NULL;
2060 return false;
2063 /* E1 and E2 are edges into the same basic block. Return TRUE if the
2064 PHI arguments associated with those edges are equal or there are no
2065 PHI arguments, otherwise return FALSE. */
2067 static bool
2068 phi_args_equal_on_edges (edge e1, edge e2)
2070 gphi_iterator gsi;
2071 int indx1 = e1->dest_idx;
2072 int indx2 = e2->dest_idx;
2074 for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2076 gphi *phi = gsi.phi ();
2078 if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
2079 gimple_phi_arg_def (phi, indx2), 0))
2080 return false;
2082 return true;
2085 /* Walk through the registered jump threads and convert them into a
2086 form convenient for this pass.
2088 Any block which has incoming edges threaded to outgoing edges
2089 will have its entry in THREADED_BLOCK set.
2091 Any threaded edge will have its new outgoing edge stored in the
2092 original edge's AUX field.
2094 This form avoids the need to walk all the edges in the CFG to
2095 discover blocks which need processing and avoids unnecessary
2096 hash table lookups to map from threaded edge to new target. */
2098 static void
2099 mark_threaded_blocks (bitmap threaded_blocks)
2101 unsigned int i;
2102 bitmap_iterator bi;
2103 bitmap tmp = BITMAP_ALLOC (NULL);
2104 basic_block bb;
2105 edge e;
2106 edge_iterator ei;
2108 /* It is possible to have jump threads in which one is a subpath
2109 of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
2110 block and (B, C), (C, D) where no joiner block exists.
2112 When this occurs ignore the jump thread request with the joiner
2113 block. It's totally subsumed by the simpler jump thread request.
2115 This results in less block copying, simpler CFGs. More importantly,
2116 when we duplicate the joiner block, B, in this case we will create
2117 a new threading opportunity that we wouldn't be able to optimize
2118 until the next jump threading iteration.
2120 So first convert the jump thread requests which do not require a
2121 joiner block. */
2122 for (i = 0; i < paths.length (); i++)
2124 vec<jump_thread_edge *> *path = paths[i];
2126 if ((*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
2128 edge e = (*path)[0]->e;
2129 e->aux = (void *)path;
2130 bitmap_set_bit (tmp, e->dest->index);
2134 /* Now iterate again, converting cases where we want to thread
2135 through a joiner block, but only if no other edge on the path
2136 already has a jump thread attached to it. We do this in two passes,
2137 to avoid situations where the order in the paths vec can hide overlapping
2138 threads (the path is recorded on the incoming edge, so we would miss
2139 cases where the second path starts at a downstream edge on the same
2140 path). First record all joiner paths, deleting any in the unexpected
2141 case where there is already a path for that incoming edge. */
2142 for (i = 0; i < paths.length (); i++)
2144 vec<jump_thread_edge *> *path = paths[i];
2146 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2148 /* Attach the path to the starting edge if none is yet recorded. */
2149 if ((*path)[0]->e->aux == NULL)
2151 (*path)[0]->e->aux = path;
2153 else
2155 paths.unordered_remove (i);
2156 if (dump_file && (dump_flags & TDF_DETAILS))
2157 dump_jump_thread_path (dump_file, *path, false);
2158 delete_jump_thread_path (path);
2162 /* Second, look for paths that have any other jump thread attached to
2163 them, and either finish converting them or cancel them. */
2164 for (i = 0; i < paths.length (); i++)
2166 vec<jump_thread_edge *> *path = paths[i];
2167 edge e = (*path)[0]->e;
2169 if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2171 unsigned int j;
2172 for (j = 1; j < path->length (); j++)
2173 if ((*path)[j]->e->aux != NULL)
2174 break;
2176 /* If we iterated through the entire path without exiting the loop,
2177 then we are good to go, record it. */
2178 if (j == path->length ())
2179 bitmap_set_bit (tmp, e->dest->index);
2180 else
2182 e->aux = NULL;
2183 paths.unordered_remove (i);
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 dump_jump_thread_path (dump_file, *path, false);
2186 delete_jump_thread_path (path);
2191 /* If optimizing for size, only thread through block if we don't have
2192 to duplicate it or it's an otherwise empty redirection block. */
2193 if (optimize_function_for_size_p (cfun))
2195 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2197 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2198 if (EDGE_COUNT (bb->preds) > 1
2199 && !redirection_block_p (bb))
2201 FOR_EACH_EDGE (e, ei, bb->preds)
2203 if (e->aux)
2205 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2206 delete_jump_thread_path (path);
2207 e->aux = NULL;
2211 else
2212 bitmap_set_bit (threaded_blocks, i);
2215 else
2216 bitmap_copy (threaded_blocks, tmp);
2218 /* Look for jump threading paths which cross multiple loop headers.
2220 The code to thread through loop headers will change the CFG in ways
2221 that break assumptions made by the loop optimization code.
2223 We don't want to blindly cancel the requests. We can instead do better
2224 by trimming off the end of the jump thread path. */
2225 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2227 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2228 FOR_EACH_EDGE (e, ei, bb->preds)
2230 if (e->aux)
2232 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2234 for (unsigned int i = 0, crossed_headers = 0;
2235 i < path->length ();
2236 i++)
2238 basic_block dest = (*path)[i]->e->dest;
2239 crossed_headers += (dest == dest->loop_father->header);
2240 if (crossed_headers > 1)
2242 /* Trim from entry I onwards. */
2243 for (unsigned int j = i; j < path->length (); j++)
2244 delete (*path)[j];
2245 path->truncate (i);
2247 /* Now that we've truncated the path, make sure
2248 what's left is still valid. We need at least
2249 two edges on the path and the last edge can not
2250 be a joiner. This should never happen, but let's
2251 be safe. */
2252 if (path->length () < 2
2253 || (path->last ()->type
2254 == EDGE_COPY_SRC_JOINER_BLOCK))
2256 delete_jump_thread_path (path);
2257 e->aux = NULL;
2259 break;
2266 /* If we have a joiner block (J) which has two successors S1 and S2 and
2267 we are threading though S1 and the final destination of the thread
2268 is S2, then we must verify that any PHI nodes in S2 have the same
2269 PHI arguments for the edge J->S2 and J->S1->...->S2.
2271 We used to detect this prior to registering the jump thread, but
2272 that prohibits propagation of edge equivalences into non-dominated
2273 PHI nodes as the equivalency test might occur before propagation.
2275 This must also occur after we truncate any jump threading paths
2276 as this scenario may only show up after truncation.
2278 This works for now, but will need improvement as part of the FSA
2279 optimization.
2281 Note since we've moved the thread request data to the edges,
2282 we have to iterate on those rather than the threaded_edges vector. */
2283 EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2285 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2286 FOR_EACH_EDGE (e, ei, bb->preds)
2288 if (e->aux)
2290 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2291 bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2293 if (have_joiner)
2295 basic_block joiner = e->dest;
2296 edge final_edge = path->last ()->e;
2297 basic_block final_dest = final_edge->dest;
2298 edge e2 = find_edge (joiner, final_dest);
2300 if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2302 delete_jump_thread_path (path);
2303 e->aux = NULL;
2310 BITMAP_FREE (tmp);
2314 /* Return TRUE if BB ends with a switch statement or a computed goto.
2315 Otherwise return false. */
2316 static bool
2317 bb_ends_with_multiway_branch (basic_block bb ATTRIBUTE_UNUSED)
2319 gimple stmt = last_stmt (bb);
2320 if (stmt && gimple_code (stmt) == GIMPLE_SWITCH)
2321 return true;
2322 if (stmt && gimple_code (stmt) == GIMPLE_GOTO
2323 && TREE_CODE (gimple_goto_dest (stmt)) == SSA_NAME)
2324 return true;
2325 return false;
2328 /* Verify that the REGION is a valid jump thread. A jump thread is a special
2329 case of SEME Single Entry Multiple Exits region in which all nodes in the
2330 REGION have exactly one incoming edge. The only exception is the first block
2331 that may not have been connected to the rest of the cfg yet. */
2333 DEBUG_FUNCTION void
2334 verify_jump_thread (basic_block *region, unsigned n_region)
2336 for (unsigned i = 0; i < n_region; i++)
2337 gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2340 /* Return true when BB is one of the first N items in BBS. */
2342 static inline bool
2343 bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2345 for (int i = 0; i < n; i++)
2346 if (bb == bbs[i])
2347 return true;
2349 return false;
2352 /* Duplicates a jump-thread path of N_REGION basic blocks.
2353 The ENTRY edge is redirected to the duplicate of the region.
2355 Remove the last conditional statement in the last basic block in the REGION,
2356 and create a single fallthru edge pointing to the same destination as the
2357 EXIT edge.
2359 The new basic blocks are stored to REGION_COPY in the same order as they had
2360 in REGION, provided that REGION_COPY is not NULL.
2362 Returns false if it is unable to copy the region, true otherwise. */
2364 static bool
2365 duplicate_thread_path (edge entry, edge exit,
2366 basic_block *region, unsigned n_region,
2367 basic_block *region_copy)
2369 unsigned i;
2370 bool free_region_copy = false;
2371 struct loop *loop = entry->dest->loop_father;
2372 edge exit_copy;
2373 edge redirected;
2374 int total_freq = 0, entry_freq = 0;
2375 gcov_type total_count = 0, entry_count = 0;
2377 if (!can_copy_bbs_p (region, n_region))
2378 return false;
2380 /* Some sanity checking. Note that we do not check for all possible
2381 missuses of the functions. I.e. if you ask to copy something weird,
2382 it will work, but the state of structures probably will not be
2383 correct. */
2384 for (i = 0; i < n_region; i++)
2386 /* We do not handle subloops, i.e. all the blocks must belong to the
2387 same loop. */
2388 if (region[i]->loop_father != loop)
2389 return false;
2392 initialize_original_copy_tables ();
2394 set_loop_copy (loop, loop);
2396 if (!region_copy)
2398 region_copy = XNEWVEC (basic_block, n_region);
2399 free_region_copy = true;
2402 if (entry->dest->count)
2404 total_count = entry->dest->count;
2405 entry_count = entry->count;
2406 /* Fix up corner cases, to avoid division by zero or creation of negative
2407 frequencies. */
2408 if (entry_count > total_count)
2409 entry_count = total_count;
2411 else
2413 total_freq = entry->dest->frequency;
2414 entry_freq = EDGE_FREQUENCY (entry);
2415 /* Fix up corner cases, to avoid division by zero or creation of negative
2416 frequencies. */
2417 if (total_freq == 0)
2418 total_freq = 1;
2419 else if (entry_freq > total_freq)
2420 entry_freq = total_freq;
2423 copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2424 split_edge_bb_loc (entry), false);
2426 /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2427 following code ensures that all the edges exiting the jump-thread path are
2428 redirected back to the original code: these edges are exceptions
2429 invalidating the property that is propagated by executing all the blocks of
2430 the jump-thread path in order. */
2432 for (i = 0; i < n_region; i++)
2434 edge e;
2435 edge_iterator ei;
2436 basic_block bb = region_copy[i];
2438 if (single_succ_p (bb))
2440 /* Make sure the successor is the next node in the path. */
2441 gcc_assert (i + 1 == n_region
2442 || region_copy[i + 1] == single_succ_edge (bb)->dest);
2443 continue;
2446 /* Special case the last block on the path: make sure that it does not
2447 jump back on the copied path. */
2448 if (i + 1 == n_region)
2450 FOR_EACH_EDGE (e, ei, bb->succs)
2451 if (bb_in_bbs (e->dest, region_copy, n_region - 1))
2453 basic_block orig = get_bb_original (e->dest);
2454 if (orig)
2455 redirect_edge_and_branch_force (e, orig);
2457 continue;
2460 /* Redirect all other edges jumping to non-adjacent blocks back to the
2461 original code. */
2462 FOR_EACH_EDGE (e, ei, bb->succs)
2463 if (region_copy[i + 1] != e->dest)
2465 basic_block orig = get_bb_original (e->dest);
2466 if (orig)
2467 redirect_edge_and_branch_force (e, orig);
2471 if (total_count)
2473 scale_bbs_frequencies_gcov_type (region, n_region,
2474 total_count - entry_count,
2475 total_count);
2476 scale_bbs_frequencies_gcov_type (region_copy, n_region, entry_count,
2477 total_count);
2479 else
2481 scale_bbs_frequencies_int (region, n_region, total_freq - entry_freq,
2482 total_freq);
2483 scale_bbs_frequencies_int (region_copy, n_region, entry_freq, total_freq);
2486 #ifdef ENABLE_CHECKING
2487 verify_jump_thread (region_copy, n_region);
2488 #endif
2490 /* Remove the last branch in the jump thread path. */
2491 remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2492 edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2494 if (e) {
2495 rescan_loop_exit (e, true, false);
2496 e->probability = REG_BR_PROB_BASE;
2497 e->count = region_copy[n_region - 1]->count;
2500 /* Redirect the entry and add the phi node arguments. */
2501 if (entry->dest == loop->header)
2502 mark_loop_for_removal (loop);
2503 redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2504 gcc_assert (redirected != NULL);
2505 flush_pending_stmts (entry);
2507 /* Add the other PHI node arguments. */
2508 add_phi_args_after_copy (region_copy, n_region, NULL);
2510 if (free_region_copy)
2511 free (region_copy);
2513 free_original_copy_tables ();
2514 return true;
2517 /* Return true when PATH is a valid jump-thread path. */
2519 static bool
2520 valid_jump_thread_path (vec<jump_thread_edge *> *path)
2522 unsigned len = path->length ();
2524 /* Check that the path is connected. */
2525 for (unsigned int j = 0; j < len - 1; j++)
2526 if ((*path)[j]->e->dest != (*path)[j+1]->e->src)
2527 return false;
2529 return true;
2532 /* Walk through all blocks and thread incoming edges to the appropriate
2533 outgoing edge for each edge pair recorded in THREADED_EDGES.
2535 It is the caller's responsibility to fix the dominance information
2536 and rewrite duplicated SSA_NAMEs back into SSA form.
2538 If MAY_PEEL_LOOP_HEADERS is false, we avoid threading edges through
2539 loop headers if it does not simplify the loop.
2541 Returns true if one or more edges were threaded, false otherwise. */
2543 bool
2544 thread_through_all_blocks (bool may_peel_loop_headers)
2546 bool retval = false;
2547 unsigned int i;
2548 bitmap_iterator bi;
2549 bitmap threaded_blocks;
2550 struct loop *loop;
2552 if (!paths.exists ())
2553 return false;
2555 threaded_blocks = BITMAP_ALLOC (NULL);
2556 memset (&thread_stats, 0, sizeof (thread_stats));
2558 /* Jump-thread all FSM threads before other jump-threads. */
2559 for (i = 0; i < paths.length ();)
2561 vec<jump_thread_edge *> *path = paths[i];
2562 edge entry = (*path)[0]->e;
2564 /* Only code-generate FSM jump-threads in this loop. */
2565 if ((*path)[0]->type != EDGE_FSM_THREAD)
2567 i++;
2568 continue;
2571 /* Do not jump-thread twice from the same block. */
2572 if (bitmap_bit_p (threaded_blocks, entry->src->index)
2573 /* Verify that the jump thread path is still valid: a
2574 previous jump-thread may have changed the CFG, and
2575 invalidated the current path. */
2576 || !valid_jump_thread_path (path))
2578 /* Remove invalid FSM jump-thread paths. */
2579 delete_jump_thread_path (path);
2580 paths.unordered_remove (i);
2581 continue;
2584 unsigned len = path->length ();
2585 edge exit = (*path)[len - 1]->e;
2586 basic_block *region = XNEWVEC (basic_block, len - 1);
2588 for (unsigned int j = 0; j < len - 1; j++)
2589 region[j] = (*path)[j]->e->dest;
2591 if (duplicate_thread_path (entry, exit, region, len - 1, NULL))
2593 /* We do not update dominance info. */
2594 free_dominance_info (CDI_DOMINATORS);
2595 bitmap_set_bit (threaded_blocks, entry->src->index);
2596 retval = true;
2599 delete_jump_thread_path (path);
2600 paths.unordered_remove (i);
2603 /* Remove from PATHS all the jump-threads starting with an edge already
2604 jump-threaded. */
2605 for (i = 0; i < paths.length ();)
2607 vec<jump_thread_edge *> *path = paths[i];
2608 edge entry = (*path)[0]->e;
2610 /* Do not jump-thread twice from the same block. */
2611 if (bitmap_bit_p (threaded_blocks, entry->src->index))
2613 delete_jump_thread_path (path);
2614 paths.unordered_remove (i);
2616 else
2617 i++;
2620 bitmap_clear (threaded_blocks);
2622 mark_threaded_blocks (threaded_blocks);
2624 initialize_original_copy_tables ();
2626 /* First perform the threading requests that do not affect
2627 loop structure. */
2628 EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi)
2630 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2632 if (EDGE_COUNT (bb->preds) > 0)
2633 retval |= thread_block (bb, true);
2636 /* Then perform the threading through loop headers. We start with the
2637 innermost loop, so that the changes in cfg we perform won't affect
2638 further threading. */
2639 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
2641 if (!loop->header
2642 || !bitmap_bit_p (threaded_blocks, loop->header->index))
2643 continue;
2645 retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2648 /* Any jump threading paths that are still attached to edges at this
2649 point must be one of two cases.
2651 First, we could have a jump threading path which went from outside
2652 a loop to inside a loop that was ignored because a prior jump thread
2653 across a backedge was realized (which indirectly causes the loop
2654 above to ignore the latter thread). We can detect these because the
2655 loop structures will be different and we do not currently try to
2656 optimize this case.
2658 Second, we could be threading across a backedge to a point within the
2659 same loop. This occurrs for the FSA/FSM optimization and we would
2660 like to optimize it. However, we have to be very careful as this
2661 may completely scramble the loop structures, with the result being
2662 irreducible loops causing us to throw away our loop structure.
2664 As a compromise for the latter case, if the thread path ends in
2665 a block where the last statement is a multiway branch, then go
2666 ahead and thread it, else ignore it. */
2667 basic_block bb;
2668 edge e;
2669 FOR_EACH_BB_FN (bb, cfun)
2671 /* If we do end up threading here, we can remove elements from
2672 BB->preds. Thus we can not use the FOR_EACH_EDGE iterator. */
2673 for (edge_iterator ei = ei_start (bb->preds);
2674 (e = ei_safe_edge (ei));)
2675 if (e->aux)
2677 vec<jump_thread_edge *> *path = THREAD_PATH (e);
2679 /* Case 1, threading from outside to inside the loop
2680 after we'd already threaded through the header. */
2681 if ((*path)[0]->e->dest->loop_father
2682 != path->last ()->e->src->loop_father)
2684 delete_jump_thread_path (path);
2685 e->aux = NULL;
2686 ei_next (&ei);
2688 else if (bb_ends_with_multiway_branch (path->last ()->e->src))
2690 /* The code to thread through loop headers may have
2691 split a block with jump threads attached to it.
2693 We can identify this with a disjoint jump threading
2694 path. If found, just remove it. */
2695 for (unsigned int i = 0; i < path->length () - 1; i++)
2696 if ((*path)[i]->e->dest != (*path)[i + 1]->e->src)
2698 delete_jump_thread_path (path);
2699 e->aux = NULL;
2700 ei_next (&ei);
2701 break;
2704 /* Our path is still valid, thread it. */
2705 if (e->aux)
2707 if (thread_block ((*path)[0]->e->dest, false))
2708 e->aux = NULL;
2709 else
2711 delete_jump_thread_path (path);
2712 e->aux = NULL;
2713 ei_next (&ei);
2717 else
2719 delete_jump_thread_path (path);
2720 e->aux = NULL;
2721 ei_next (&ei);
2724 else
2725 ei_next (&ei);
2728 statistics_counter_event (cfun, "Jumps threaded",
2729 thread_stats.num_threaded_edges);
2731 free_original_copy_tables ();
2733 BITMAP_FREE (threaded_blocks);
2734 threaded_blocks = NULL;
2735 paths.release ();
2737 if (retval)
2738 loops_state_set (LOOPS_NEED_FIXUP);
2740 return retval;
2743 /* Delete the jump threading path PATH. We have to explcitly delete
2744 each entry in the vector, then the container. */
2746 void
2747 delete_jump_thread_path (vec<jump_thread_edge *> *path)
2749 for (unsigned int i = 0; i < path->length (); i++)
2750 delete (*path)[i];
2751 path->release();
2752 delete path;
2755 /* Register a jump threading opportunity. We queue up all the jump
2756 threading opportunities discovered by a pass and update the CFG
2757 and SSA form all at once.
2759 E is the edge we can thread, E2 is the new target edge, i.e., we
2760 are effectively recording that E->dest can be changed to E2->dest
2761 after fixing the SSA graph. */
2763 void
2764 register_jump_thread (vec<jump_thread_edge *> *path)
2766 if (!dbg_cnt (registered_jump_thread))
2768 delete_jump_thread_path (path);
2769 return;
2772 /* First make sure there are no NULL outgoing edges on the jump threading
2773 path. That can happen for jumping to a constant address. */
2774 for (unsigned int i = 0; i < path->length (); i++)
2775 if ((*path)[i]->e == NULL)
2777 if (dump_file && (dump_flags & TDF_DETAILS))
2779 fprintf (dump_file,
2780 "Found NULL edge in jump threading path. Cancelling jump thread:\n");
2781 dump_jump_thread_path (dump_file, *path, false);
2784 delete_jump_thread_path (path);
2785 return;
2788 if (dump_file && (dump_flags & TDF_DETAILS))
2789 dump_jump_thread_path (dump_file, *path, true);
2791 if (!paths.exists ())
2792 paths.create (5);
2794 paths.safe_push (path);