Check for Altivec mode when returning altivec register.
[official-gcc.git] / gcc / tree-ssa-threadupdate.c
blob37c893073de01dd9bfc9c1933d96d6c20e0cfab4
1 /* Thread edges through blocks and update the control flow and SSA graphs.
2 Copyright (C) 2004 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "flags.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "ggc.h"
30 #include "basic-block.h"
31 #include "output.h"
32 #include "errors.h"
33 #include "expr.h"
34 #include "function.h"
35 #include "diagnostic.h"
36 #include "tree-flow.h"
37 #include "tree-dump.h"
38 #include "tree-pass.h"
40 /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 one or more in-edges to B to instead reach the destination of an
42 out-edge from B while preserving any side effects in B.
44 ie, given A->B and B->C, change A->B to be A->C yet still preserve the
45 side effects of executing B.
47 1. Make a copy of B (including its outgoing edges and statements). Call
48 the copy B'. Note B' has no incoming edges or PHIs at this time.
50 2. Remove the control statement at the end of B' and all outgoing edges
51 except B'->C.
53 3. Add a new argument to each PHI in C with the same value as the existing
54 argument associated with edge B->C. Associate the new PHI arguments
55 with the edge B'->C.
57 4. For each PHI in B, find or create a PHI in B' with an identical
58 PHI_RESULT. Add an argument to the PHI in B' which as the same
59 value as the PHI in B associated with the edge A->B. Associate
60 the new argument in the PHI in B' with the edge A->B.
62 5. Change the edge A->B to A->B'.
64 5a. This automatically deletes any PHI arguments associated with the
65 edge A->B in B.
67 5b. This automatically associates each new argument added in step 4
68 with the edge A->B'.
70 6. Repeat for other incoming edges into B.
72 7. Put the duplicated resources in B and all the B' blocks into SSA form.
74 Note that block duplication can be minimized by first collecting the
75 the set of unique destination blocks that the incoming edges should
76 be threaded to. Block duplication can be further minimized by using
77 B instead of creating B' for one destination if all edges into B are
78 going to be threaded to a successor of B. */
81 /* Main data structure recording information regarding B's duplicate
82 blocks. */
84 struct redirection_data
86 /* A duplicate of B with the trailing control statement removed and which
87 targets a single successor of B. */
88 basic_block dup_block;
90 /* An outgoing edge from B. DUP_BLOCK will have OUTGOING_EDGE->dest as
91 its single successor. */
92 edge outgoing_edge;
95 /* For each PHI node in BB, find or create a PHI node in NEW_BB for the
96 same PHI_RESULT. Add an argument to the PHI node in NEW_BB which
97 corresponds to the same PHI argument associated with edge E in BB. */
99 static void
100 copy_phis_to_block (basic_block new_bb, basic_block bb, edge e)
102 tree phi, arg;
104 /* Walk over every PHI in BB. */
105 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
107 tree new_phi;
109 /* First try to find a PHI node in NEW_BB which has the same
110 PHI_RESULT as the PHI from BB we are currently processing. */
111 for (new_phi = phi_nodes (new_bb); new_phi;
112 new_phi = PHI_CHAIN (new_phi))
113 if (PHI_RESULT (new_phi) == PHI_RESULT (phi))
114 break;
116 /* If we did not find a suitable PHI in NEW_BB, create one. */
117 if (!new_phi)
118 new_phi = create_phi_node (PHI_RESULT (phi), new_bb);
120 /* Extract the argument corresponding to E from the current PHI
121 node in BB. */
122 arg = PHI_ARG_DEF_TREE (phi, phi_arg_from_edge (phi, e));
124 /* Now add that same argument to the new PHI node in block NEW_BB. */
125 add_phi_arg (&new_phi, arg, e);
129 /* Remove the last statement in block BB which must be a COND_EXPR or
130 SWITCH_EXPR. Also remove all outgoing edges except the edge which
131 reaches DEST_BB.
133 This is only used by jump threading which knows the last statement in
134 BB should be a COND_EXPR or SWITCH_EXPR. If the block ends with any other
135 statement, then we abort. */
137 static void
138 remove_last_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
140 block_stmt_iterator bsi;
141 edge e, next;
143 bsi = bsi_last (bb);
145 #ifdef ENABLE_CHECKING
146 if (TREE_CODE (bsi_stmt (bsi)) != COND_EXPR
147 && TREE_CODE (bsi_stmt (bsi)) != SWITCH_EXPR)
148 abort ();
149 #endif
151 bsi_remove (&bsi);
153 next = NULL;
154 for (e = bb->succ; e; e = next)
156 next = e->succ_next;
158 if (e->dest != dest_bb)
159 ssa_remove_edge (e);
162 /* BB now has a single outgoing edge. We need to update the flags for
163 that single outgoing edge. */
164 bb->succ->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
165 bb->succ->flags |= EDGE_FALLTHRU;
168 /* Create a duplicate of BB which only reaches the destination of the edge
169 stored in RD. Record the duplicate block in RD. */
171 static void
172 create_block_for_threading (basic_block bb, struct redirection_data *rd)
174 tree phi;
176 /* We can use the generic block duplication code and simply remove
177 the stuff we do not need. */
178 rd->dup_block = duplicate_block (bb, NULL);
180 /* The call to duplicate_block will copy everything, including the
181 useless COND_EXPR or SWITCH_EXPR at the end of the block. We just remove
182 the useless COND_EXPR or SWITCH_EXPR here rather than having a
183 specialized block copier. */
184 remove_last_stmt_and_useless_edges (rd->dup_block, rd->outgoing_edge->dest);
186 /* If there are any PHI nodes at the destination of the outgoing edge
187 from the duplicate block, then we will need to add a new argument
188 to them. The argument should have the same value as the argument
189 associated with the outgoing edge stored in RD. */
190 for (phi = phi_nodes (rd->dup_block->succ->dest); phi;
191 phi = PHI_CHAIN (phi))
193 int indx = phi_arg_from_edge (phi, rd->outgoing_edge);
194 add_phi_arg (&phi, PHI_ARG_DEF_TREE (phi, indx), rd->dup_block->succ);
198 /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
199 is reached via one or more specific incoming edges, we know which
200 outgoing edge from BB will be traversed.
202 We want to redirect those incoming edges to the target of the
203 appropriate outgoing edge. Doing so avoids a conditional branch
204 and may expose new optimization opportunities. Note that we have
205 to update dominator tree and SSA graph after such changes.
207 The key to keeping the SSA graph update managable is to duplicate
208 the side effects occuring in BB so that those side effects still
209 occur on the paths which bypass BB after redirecting edges.
211 We accomplish this by creating duplicates of BB and arranging for
212 the duplicates to unconditionally pass control to one specific
213 successor of BB. We then revector the incoming edges into BB to
214 the appropriate duplicate of BB.
216 BB and its duplicates will have assignments to the same set of
217 SSA_NAMEs. Right now, we just call into rewrite_ssa_into_ssa
218 to update the SSA graph for those names.
220 We are also going to experiment with a true incremental update
221 scheme for the duplicated resources. Of of the interesting
222 properties we can exploit here is that all the resources set
223 in BB will have the same IDFS, so we have one IDFS computation
224 per block with incoming threaded edges, which can lower the
225 cost of the true incremental update algorithm. */
227 static void
228 thread_block (basic_block bb)
230 /* E is an incoming edge into BB that we may or may not want to
231 redirect to a duplicate of BB. */
232 edge e;
234 /* The next edge in a predecessor list. Used in loops where E->pred_next
235 may change within the loop. */
236 edge next;
238 /* ALL indicates whether or not all incoming edges into BB should
239 be threaded to a duplicate of BB. */
240 bool all = true;
242 /* Main data structure to hold information for duplicates of BB. */
243 varray_type redirection_data;
244 unsigned int i;
246 VARRAY_GENERIC_PTR_INIT (redirection_data, 2, "redirection data");
248 /* Look at each incoming edge into BB. Record each unique outgoing
249 edge that we want to thread an incoming edge to. Also note if
250 all incoming edges are threaded or not. */
251 for (e = bb->pred; e; e = e->pred_next)
253 if (!e->aux)
255 all = false;
257 else
259 unsigned int i;
261 /* See if we can find an entry for the destination of this
262 threaded edge that has already been recorded. */
263 for (i = 0; i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
265 struct redirection_data *rd;
266 edge e2;
268 rd = VARRAY_GENERIC_PTR (redirection_data, i);
269 e2 = e->aux;
271 if (e2->dest == rd->outgoing_edge->dest)
272 break;
275 /* If the loop did not terminate early, then we have a new
276 destination for the incoming threaded edges. Record it. */
277 if (i == VARRAY_ACTIVE_SIZE (redirection_data))
279 struct redirection_data *rd;
281 rd = xcalloc (1, sizeof (redirection_data));
282 rd->outgoing_edge = e->aux;
283 VARRAY_PUSH_GENERIC_PTR (redirection_data, rd);
288 /* Now create duplicates of BB. Note that if all incoming edges are
289 threaded, then BB is going to become unreachable. In that case
290 we use BB for one of the duplicates rather than wasting memory
291 duplicating BB. Thus the odd starting condition for the loop. */
292 for (i = (all ? 1 : 0); i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
294 struct redirection_data *rd = VARRAY_GENERIC_PTR (redirection_data, i);
295 create_block_for_threading (bb, rd);
298 /* The loop above created the duplicate blocks (and the statements
299 within the duplicate blocks). This loop creates PHI nodes for the
300 duplicated blocks and redirects the incoming edges into BB to reach
301 the duplicates of BB.
303 Note that redirecting the edge will change e->pred_next, so we have
304 to hold e->pred_next in a temporary.
306 If this turns out to be a performance problem, then we could create
307 a list of incoming edges associated with each entry in
308 REDIRECTION_DATA and walk over that list of edges instead. */
309 next = NULL;
310 for (e = bb->pred; e; e = next)
312 edge new_dest = e->aux;
314 next = e->pred_next;
316 /* E was not threaded, then there is nothing to do. */
317 if (!new_dest)
318 continue;
320 /* Go ahead and clear E->aux. It's not needed anymore and failure
321 to clear it will cause all kinds of unpleasant problems later. */
322 e->aux = NULL;
324 /* We know E is an edge we want to thread. Find the entry associated
325 with E's new destination in the REDIRECTION_DATA array. */
326 for (i = 0; i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
328 struct redirection_data *rd;
330 rd = VARRAY_GENERIC_PTR (redirection_data, i);
332 /* We have found the right entry if the outgoing edge in this
333 entry matches E's new destination. Note that if we have not
334 created a duplicate block (rd->dup_block is NULL), then we
335 are going to re-use BB as a duplicate and we do not need
336 to create PHI nodes or redirect the edge. */
337 if (rd->outgoing_edge == new_dest && rd->dup_block)
339 edge e2;
340 copy_phis_to_block (rd->dup_block, bb, e);
342 if (dump_file && (dump_flags & TDF_DETAILS))
343 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
344 e->src->index, e->dest->index, rd->dup_block->index);
346 e2 = redirect_edge_and_branch (e, rd->dup_block);
347 PENDING_STMT (e2) = NULL;
349 if ((dump_file && (dump_flags & TDF_DETAILS))
350 && e->src != e2->src)
351 fprintf (dump_file, " basic block %d created\n",
352 e2->src->index);
353 break;
358 /* If all the incoming edges where threaded, then we used BB as one
359 of the duplicate blocks. We need to fixup BB in that case so that
360 it no longer has a COND_EXPR or SWITCH_EXPR and reaches one destination
361 unconditionally. */
362 if (all)
364 struct redirection_data *rd;
366 rd = VARRAY_GENERIC_PTR (redirection_data, 0);
368 if (dump_file && (dump_flags & TDF_DETAILS))
369 fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
370 bb->pred->src->index, bb->index, bb->succ->dest->index);
372 remove_last_stmt_and_useless_edges (bb, rd->outgoing_edge->dest);
375 /* Done with this block. Free any memory we have allocated, clear
376 REDIRECTION_DATA and unmark this block as needing incoming
377 edge redirections. */
378 for (i = 0; i < VARRAY_ACTIVE_SIZE (redirection_data); i++)
380 struct redirection_data *rd = VARRAY_GENERIC_PTR (redirection_data, i);
381 free (rd);
383 VARRAY_CLEAR (redirection_data);
386 /* Walk through all blocks and thread incoming edges to the block's
387 destinations as requested. This is the only entry point into this
388 file.
390 Blocks which have one or more incoming edges have INCOMING_EDGE_THREADED
391 set in the block's annotation.
392 this routine.
394 Each edge that should be threaded has the new destination edge stored in
395 the original edge's AUX field.
397 This routine (or one of its callees) will clear INCOMING_EDGE_THREADED
398 in the block annotations and the AUX field in the edges.
400 It is the caller's responsibility to fix the dominance information
401 and rewrite duplicated SSA_NAMEs back into SSA form.
403 Returns true if one or more edges were threaded, false otherwise. */
405 bool
406 thread_through_all_blocks (void)
408 basic_block bb;
409 bool retval = false;
411 FOR_EACH_BB (bb)
413 if (bb_ann (bb)->incoming_edge_threaded)
415 thread_block (bb);
416 retval = true;
417 bb_ann (bb)->incoming_edge_threaded = false;
420 return retval;