* config/i386/i386.c (output_fp_compare): Add generation
[official-gcc.git] / gcc / lcm.c
blob3432332a06a08db40a8311d9b131a3f7fe9050ad
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* These routines are meant to be used by various optimization
23 passes which can be modeled as lazy code motion problems.
24 Including, but not limited to:
26 * Traditional partial redundancy elimination.
28 * Placement of caller/caller register save/restores.
30 * Load/store motion.
32 * Copy motion.
34 * Conversion of flat register files to a stacked register
35 model.
37 * Dead load/store elimination.
39 These routines accept as input:
41 * Basic block information (number of blocks, lists of
42 predecessors and successors). Note the granularity
43 does not need to be basic block, they could be statements
44 or functions.
46 * Bitmaps of local properties (computed, transparent and
47 anticipatable expressions).
49 The output of these routines is bitmap of redundant computations
50 and a bitmap of optimal placement points. */
53 #include "config.h"
54 #include "system.h"
55 #include "coretypes.h"
56 #include "tm.h"
57 #include "rtl.h"
58 #include "regs.h"
59 #include "hard-reg-set.h"
60 #include "flags.h"
61 #include "real.h"
62 #include "insn-config.h"
63 #include "recog.h"
64 #include "basic-block.h"
65 #include "output.h"
66 #include "tm_p.h"
67 #include "function.h"
69 /* We want target macros for the mode switching code to be able to refer
70 to instruction attribute values. */
71 #include "insn-attr.h"
73 /* Edge based LCM routines. */
74 static void compute_antinout_edge (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
75 static void compute_earliest (struct edge_list *, int, sbitmap *, sbitmap *,
76 sbitmap *, sbitmap *, sbitmap *);
77 static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *,
78 sbitmap *, sbitmap *);
79 static void compute_insert_delete (struct edge_list *edge_list, sbitmap *,
80 sbitmap *, sbitmap *, sbitmap *, sbitmap *);
82 /* Edge based LCM routines on a reverse flowgraph. */
83 static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *,
84 sbitmap*, sbitmap *, sbitmap *);
85 static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *,
86 sbitmap *, sbitmap *);
87 static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *,
88 sbitmap *, sbitmap *, sbitmap *,
89 sbitmap *);
91 /* Edge based lcm routines. */
93 /* Compute expression anticipatability at entrance and exit of each block.
94 This is done based on the flow graph, and not on the pred-succ lists.
95 Other than that, its pretty much identical to compute_antinout. */
97 static void
98 compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin,
99 sbitmap *antout)
101 basic_block bb;
102 edge e;
103 basic_block *worklist, *qin, *qout, *qend;
104 unsigned int qlen;
106 /* Allocate a worklist array/queue. Entries are only added to the
107 list if they were not already on the list. So the size is
108 bounded by the number of basic blocks. */
109 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
111 /* We want a maximal solution, so make an optimistic initialization of
112 ANTIN. */
113 sbitmap_vector_ones (antin, last_basic_block);
115 /* Put every block on the worklist; this is necessary because of the
116 optimistic initialization of ANTIN above. */
117 FOR_EACH_BB_REVERSE (bb)
119 *qin++ = bb;
120 bb->aux = bb;
123 qin = worklist;
124 qend = &worklist[n_basic_blocks];
125 qlen = n_basic_blocks;
127 /* Mark blocks which are predecessors of the exit block so that we
128 can easily identify them below. */
129 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
130 e->src->aux = EXIT_BLOCK_PTR;
132 /* Iterate until the worklist is empty. */
133 while (qlen)
135 /* Take the first entry off the worklist. */
136 bb = *qout++;
137 qlen--;
139 if (qout >= qend)
140 qout = worklist;
142 if (bb->aux == EXIT_BLOCK_PTR)
143 /* Do not clear the aux field for blocks which are predecessors of
144 the EXIT block. That way we never add then to the worklist
145 again. */
146 sbitmap_zero (antout[bb->index]);
147 else
149 /* Clear the aux field of this block so that it can be added to
150 the worklist again if necessary. */
151 bb->aux = NULL;
152 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
155 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
156 transp[bb->index], antout[bb->index]))
157 /* If the in state of this block changed, then we need
158 to add the predecessors of this block to the worklist
159 if they are not already on the worklist. */
160 for (e = bb->pred; e; e = e->pred_next)
161 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
163 *qin++ = e->src;
164 e->src->aux = e;
165 qlen++;
166 if (qin >= qend)
167 qin = worklist;
171 clear_aux_for_edges ();
172 clear_aux_for_blocks ();
173 free (worklist);
176 /* Compute the earliest vector for edge based lcm. */
178 static void
179 compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin,
180 sbitmap *antout, sbitmap *avout, sbitmap *kill,
181 sbitmap *earliest)
183 sbitmap difference, temp_bitmap;
184 int x, num_edges;
185 basic_block pred, succ;
187 num_edges = NUM_EDGES (edge_list);
189 difference = sbitmap_alloc (n_exprs);
190 temp_bitmap = sbitmap_alloc (n_exprs);
192 for (x = 0; x < num_edges; x++)
194 pred = INDEX_EDGE_PRED_BB (edge_list, x);
195 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
196 if (pred == ENTRY_BLOCK_PTR)
197 sbitmap_copy (earliest[x], antin[succ->index]);
198 else
200 if (succ == EXIT_BLOCK_PTR)
201 sbitmap_zero (earliest[x]);
202 else
204 sbitmap_difference (difference, antin[succ->index],
205 avout[pred->index]);
206 sbitmap_not (temp_bitmap, antout[pred->index]);
207 sbitmap_a_and_b_or_c (earliest[x], difference,
208 kill[pred->index], temp_bitmap);
213 sbitmap_free (temp_bitmap);
214 sbitmap_free (difference);
217 /* later(p,s) is dependent on the calculation of laterin(p).
218 laterin(p) is dependent on the calculation of later(p2,p).
220 laterin(ENTRY) is defined as all 0's
221 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
222 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
224 If we progress in this manner, starting with all basic blocks
225 in the work list, anytime we change later(bb), we need to add
226 succs(bb) to the worklist if they are not already on the worklist.
228 Boundary conditions:
230 We prime the worklist all the normal basic blocks. The ENTRY block can
231 never be added to the worklist since it is never the successor of any
232 block. We explicitly prevent the EXIT block from being added to the
233 worklist.
235 We optimistically initialize LATER. That is the only time this routine
236 will compute LATER for an edge out of the entry block since the entry
237 block is never on the worklist. Thus, LATERIN is neither used nor
238 computed for the ENTRY block.
240 Since the EXIT block is never added to the worklist, we will neither
241 use nor compute LATERIN for the exit block. Edges which reach the
242 EXIT block are handled in the normal fashion inside the loop. However,
243 the insertion/deletion computation needs LATERIN(EXIT), so we have
244 to compute it. */
246 static void
247 compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
248 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
250 int num_edges, i;
251 edge e;
252 basic_block *worklist, *qin, *qout, *qend, bb;
253 unsigned int qlen;
255 num_edges = NUM_EDGES (edge_list);
257 /* Allocate a worklist array/queue. Entries are only added to the
258 list if they were not already on the list. So the size is
259 bounded by the number of basic blocks. */
260 qin = qout = worklist
261 = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
263 /* Initialize a mapping from each edge to its index. */
264 for (i = 0; i < num_edges; i++)
265 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
267 /* We want a maximal solution, so initially consider LATER true for
268 all edges. This allows propagation through a loop since the incoming
269 loop edge will have LATER set, so if all the other incoming edges
270 to the loop are set, then LATERIN will be set for the head of the
271 loop.
273 If the optimistic setting of LATER on that edge was incorrect (for
274 example the expression is ANTLOC in a block within the loop) then
275 this algorithm will detect it when we process the block at the head
276 of the optimistic edge. That will requeue the affected blocks. */
277 sbitmap_vector_ones (later, num_edges);
279 /* Note that even though we want an optimistic setting of LATER, we
280 do not want to be overly optimistic. Consider an outgoing edge from
281 the entry block. That edge should always have a LATER value the
282 same as EARLIEST for that edge. */
283 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
284 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
286 /* Add all the blocks to the worklist. This prevents an early exit from
287 the loop given our optimistic initialization of LATER above. */
288 FOR_EACH_BB (bb)
290 *qin++ = bb;
291 bb->aux = bb;
294 /* Note that we do not use the last allocated element for our queue,
295 as EXIT_BLOCK is never inserted into it. In fact the above allocation
296 of n_basic_blocks + 1 elements is not necessary. */
297 qin = worklist;
298 qend = &worklist[n_basic_blocks];
299 qlen = n_basic_blocks;
301 /* Iterate until the worklist is empty. */
302 while (qlen)
304 /* Take the first entry off the worklist. */
305 bb = *qout++;
306 bb->aux = NULL;
307 qlen--;
308 if (qout >= qend)
309 qout = worklist;
311 /* Compute the intersection of LATERIN for each incoming edge to B. */
312 sbitmap_ones (laterin[bb->index]);
313 for (e = bb->pred; e != NULL; e = e->pred_next)
314 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index],
315 later[(size_t)e->aux]);
317 /* Calculate LATER for all outgoing edges. */
318 for (e = bb->succ; e != NULL; e = e->succ_next)
319 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
320 earliest[(size_t) e->aux],
321 laterin[e->src->index],
322 antloc[e->src->index])
323 /* If LATER for an outgoing edge was changed, then we need
324 to add the target of the outgoing edge to the worklist. */
325 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
327 *qin++ = e->dest;
328 e->dest->aux = e;
329 qlen++;
330 if (qin >= qend)
331 qin = worklist;
335 /* Computation of insertion and deletion points requires computing LATERIN
336 for the EXIT block. We allocated an extra entry in the LATERIN array
337 for just this purpose. */
338 sbitmap_ones (laterin[last_basic_block]);
339 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
340 sbitmap_a_and_b (laterin[last_basic_block],
341 laterin[last_basic_block],
342 later[(size_t) e->aux]);
344 clear_aux_for_edges ();
345 free (worklist);
348 /* Compute the insertion and deletion points for edge based LCM. */
350 static void
351 compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc,
352 sbitmap *later, sbitmap *laterin, sbitmap *insert,
353 sbitmap *delete)
355 int x;
356 basic_block bb;
358 FOR_EACH_BB (bb)
359 sbitmap_difference (delete[bb->index], antloc[bb->index],
360 laterin[bb->index]);
362 for (x = 0; x < NUM_EDGES (edge_list); x++)
364 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
366 if (b == EXIT_BLOCK_PTR)
367 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
368 else
369 sbitmap_difference (insert[x], later[x], laterin[b->index]);
373 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
374 delete vectors for edge based LCM. Returns an edgelist which is used to
375 map the insert vector to what edge an expression should be inserted on. */
377 struct edge_list *
378 pre_edge_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
379 sbitmap *avloc, sbitmap *antloc, sbitmap *kill,
380 sbitmap **insert, sbitmap **delete)
382 sbitmap *antin, *antout, *earliest;
383 sbitmap *avin, *avout;
384 sbitmap *later, *laterin;
385 struct edge_list *edge_list;
386 int num_edges;
388 edge_list = create_edge_list ();
389 num_edges = NUM_EDGES (edge_list);
391 #ifdef LCM_DEBUG_INFO
392 if (file)
394 fprintf (file, "Edge List:\n");
395 verify_edge_list (file, edge_list);
396 print_edge_list (file, edge_list);
397 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
398 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
399 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
400 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
402 #endif
404 /* Compute global availability. */
405 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
406 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
407 compute_available (avloc, kill, avout, avin);
408 sbitmap_vector_free (avin);
410 /* Compute global anticipatability. */
411 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
412 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
413 compute_antinout_edge (antloc, transp, antin, antout);
415 #ifdef LCM_DEBUG_INFO
416 if (file)
418 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
419 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
421 #endif
423 /* Compute earliestness. */
424 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
425 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
427 #ifdef LCM_DEBUG_INFO
428 if (file)
429 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
430 #endif
432 sbitmap_vector_free (antout);
433 sbitmap_vector_free (antin);
434 sbitmap_vector_free (avout);
436 later = sbitmap_vector_alloc (num_edges, n_exprs);
438 /* Allocate an extra element for the exit block in the laterin vector. */
439 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
440 compute_laterin (edge_list, earliest, antloc, later, laterin);
442 #ifdef LCM_DEBUG_INFO
443 if (file)
445 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
446 dump_sbitmap_vector (file, "later", "", later, num_edges);
448 #endif
450 sbitmap_vector_free (earliest);
452 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
453 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
454 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
456 sbitmap_vector_free (laterin);
457 sbitmap_vector_free (later);
459 #ifdef LCM_DEBUG_INFO
460 if (file)
462 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
463 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
464 last_basic_block);
466 #endif
468 return edge_list;
471 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
472 Return the number of passes we performed to iterate to a solution. */
474 void
475 compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout,
476 sbitmap *avin)
478 edge e;
479 basic_block *worklist, *qin, *qout, *qend, bb;
480 unsigned int qlen;
482 /* Allocate a worklist array/queue. Entries are only added to the
483 list if they were not already on the list. So the size is
484 bounded by the number of basic blocks. */
485 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
487 /* We want a maximal solution. */
488 sbitmap_vector_ones (avout, last_basic_block);
490 /* Put every block on the worklist; this is necessary because of the
491 optimistic initialization of AVOUT above. */
492 FOR_EACH_BB (bb)
494 *qin++ = bb;
495 bb->aux = bb;
498 qin = worklist;
499 qend = &worklist[n_basic_blocks];
500 qlen = n_basic_blocks;
502 /* Mark blocks which are successors of the entry block so that we
503 can easily identify them below. */
504 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
505 e->dest->aux = ENTRY_BLOCK_PTR;
507 /* Iterate until the worklist is empty. */
508 while (qlen)
510 /* Take the first entry off the worklist. */
511 bb = *qout++;
512 qlen--;
514 if (qout >= qend)
515 qout = worklist;
517 /* If one of the predecessor blocks is the ENTRY block, then the
518 intersection of avouts is the null set. We can identify such blocks
519 by the special value in the AUX field in the block structure. */
520 if (bb->aux == ENTRY_BLOCK_PTR)
521 /* Do not clear the aux field for blocks which are successors of the
522 ENTRY block. That way we never add then to the worklist again. */
523 sbitmap_zero (avin[bb->index]);
524 else
526 /* Clear the aux field of this block so that it can be added to
527 the worklist again if necessary. */
528 bb->aux = NULL;
529 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
532 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index],
533 avin[bb->index], kill[bb->index]))
534 /* If the out state of this block changed, then we need
535 to add the successors of this block to the worklist
536 if they are not already on the worklist. */
537 for (e = bb->succ; e; e = e->succ_next)
538 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
540 *qin++ = e->dest;
541 e->dest->aux = e;
542 qlen++;
544 if (qin >= qend)
545 qin = worklist;
549 clear_aux_for_edges ();
550 clear_aux_for_blocks ();
551 free (worklist);
554 /* Compute the farthest vector for edge based lcm. */
556 static void
557 compute_farthest (struct edge_list *edge_list, int n_exprs,
558 sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin,
559 sbitmap *kill, sbitmap *farthest)
561 sbitmap difference, temp_bitmap;
562 int x, num_edges;
563 basic_block pred, succ;
565 num_edges = NUM_EDGES (edge_list);
567 difference = sbitmap_alloc (n_exprs);
568 temp_bitmap = sbitmap_alloc (n_exprs);
570 for (x = 0; x < num_edges; x++)
572 pred = INDEX_EDGE_PRED_BB (edge_list, x);
573 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
574 if (succ == EXIT_BLOCK_PTR)
575 sbitmap_copy (farthest[x], st_avout[pred->index]);
576 else
578 if (pred == ENTRY_BLOCK_PTR)
579 sbitmap_zero (farthest[x]);
580 else
582 sbitmap_difference (difference, st_avout[pred->index],
583 st_antin[succ->index]);
584 sbitmap_not (temp_bitmap, st_avin[succ->index]);
585 sbitmap_a_and_b_or_c (farthest[x], difference,
586 kill[succ->index], temp_bitmap);
591 sbitmap_free (temp_bitmap);
592 sbitmap_free (difference);
595 /* Compute nearer and nearerout vectors for edge based lcm.
597 This is the mirror of compute_laterin, additional comments on the
598 implementation can be found before compute_laterin. */
600 static void
601 compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
602 sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
604 int num_edges, i;
605 edge e;
606 basic_block *worklist, *tos, bb;
608 num_edges = NUM_EDGES (edge_list);
610 /* Allocate a worklist array/queue. Entries are only added to the
611 list if they were not already on the list. So the size is
612 bounded by the number of basic blocks. */
613 tos = worklist = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
615 /* Initialize NEARER for each edge and build a mapping from an edge to
616 its index. */
617 for (i = 0; i < num_edges; i++)
618 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
620 /* We want a maximal solution. */
621 sbitmap_vector_ones (nearer, num_edges);
623 /* Note that even though we want an optimistic setting of NEARER, we
624 do not want to be overly optimistic. Consider an incoming edge to
625 the exit block. That edge should always have a NEARER value the
626 same as FARTHEST for that edge. */
627 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
628 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
630 /* Add all the blocks to the worklist. This prevents an early exit
631 from the loop given our optimistic initialization of NEARER. */
632 FOR_EACH_BB (bb)
634 *tos++ = bb;
635 bb->aux = bb;
638 /* Iterate until the worklist is empty. */
639 while (tos != worklist)
641 /* Take the first entry off the worklist. */
642 bb = *--tos;
643 bb->aux = NULL;
645 /* Compute the intersection of NEARER for each outgoing edge from B. */
646 sbitmap_ones (nearerout[bb->index]);
647 for (e = bb->succ; e != NULL; e = e->succ_next)
648 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
649 nearer[(size_t) e->aux]);
651 /* Calculate NEARER for all incoming edges. */
652 for (e = bb->pred; e != NULL; e = e->pred_next)
653 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
654 farthest[(size_t) e->aux],
655 nearerout[e->dest->index],
656 st_avloc[e->dest->index])
657 /* If NEARER for an incoming edge was changed, then we need
658 to add the source of the incoming edge to the worklist. */
659 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
661 *tos++ = e->src;
662 e->src->aux = e;
666 /* Computation of insertion and deletion points requires computing NEAREROUT
667 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
668 for just this purpose. */
669 sbitmap_ones (nearerout[last_basic_block]);
670 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
671 sbitmap_a_and_b (nearerout[last_basic_block],
672 nearerout[last_basic_block],
673 nearer[(size_t) e->aux]);
675 clear_aux_for_edges ();
676 free (tos);
679 /* Compute the insertion and deletion points for edge based LCM. */
681 static void
682 compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc,
683 sbitmap *nearer, sbitmap *nearerout,
684 sbitmap *insert, sbitmap *delete)
686 int x;
687 basic_block bb;
689 FOR_EACH_BB (bb)
690 sbitmap_difference (delete[bb->index], st_avloc[bb->index],
691 nearerout[bb->index]);
693 for (x = 0; x < NUM_EDGES (edge_list); x++)
695 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
696 if (b == ENTRY_BLOCK_PTR)
697 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
698 else
699 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
703 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
704 insert and delete vectors for edge based reverse LCM. Returns an
705 edgelist which is used to map the insert vector to what edge
706 an expression should be inserted on. */
708 struct edge_list *
709 pre_edge_rev_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
710 sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill,
711 sbitmap **insert, sbitmap **delete)
713 sbitmap *st_antin, *st_antout;
714 sbitmap *st_avout, *st_avin, *farthest;
715 sbitmap *nearer, *nearerout;
716 struct edge_list *edge_list;
717 int num_edges;
719 edge_list = create_edge_list ();
720 num_edges = NUM_EDGES (edge_list);
722 st_antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
723 st_antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
724 sbitmap_vector_zero (st_antin, last_basic_block);
725 sbitmap_vector_zero (st_antout, last_basic_block);
726 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
728 /* Compute global anticipatability. */
729 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
730 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
731 compute_available (st_avloc, kill, st_avout, st_avin);
733 #ifdef LCM_DEBUG_INFO
734 if (file)
736 fprintf (file, "Edge List:\n");
737 verify_edge_list (file, edge_list);
738 print_edge_list (file, edge_list);
739 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
740 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
741 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
742 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
743 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
744 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
746 #endif
748 #ifdef LCM_DEBUG_INFO
749 if (file)
751 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
752 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
754 #endif
756 /* Compute farthestness. */
757 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
758 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
759 kill, farthest);
761 #ifdef LCM_DEBUG_INFO
762 if (file)
763 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
764 #endif
766 sbitmap_vector_free (st_antin);
767 sbitmap_vector_free (st_antout);
769 sbitmap_vector_free (st_avin);
770 sbitmap_vector_free (st_avout);
772 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
774 /* Allocate an extra element for the entry block. */
775 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
776 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
778 #ifdef LCM_DEBUG_INFO
779 if (file)
781 dump_sbitmap_vector (file, "nearerout", "", nearerout,
782 last_basic_block + 1);
783 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
785 #endif
787 sbitmap_vector_free (farthest);
789 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
790 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
791 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
792 *insert, *delete);
794 sbitmap_vector_free (nearerout);
795 sbitmap_vector_free (nearer);
797 #ifdef LCM_DEBUG_INFO
798 if (file)
800 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
801 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
802 last_basic_block);
804 #endif
805 return edge_list;
808 /* Mode switching:
810 The algorithm for setting the modes consists of scanning the insn list
811 and finding all the insns which require a specific mode. Each insn gets
812 a unique struct seginfo element. These structures are inserted into a list
813 for each basic block. For each entity, there is an array of bb_info over
814 the flow graph basic blocks (local var 'bb_info'), and contains a list
815 of all insns within that basic block, in the order they are encountered.
817 For each entity, any basic block WITHOUT any insns requiring a specific
818 mode are given a single entry, without a mode. (Each basic block
819 in the flow graph must have at least one entry in the segment table.)
821 The LCM algorithm is then run over the flow graph to determine where to
822 place the sets to the highest-priority value in respect of first the first
823 insn in any one block. Any adjustments required to the transparency
824 vectors are made, then the next iteration starts for the next-lower
825 priority mode, till for each entity all modes are exhausted.
827 More details are located in the code for optimize_mode_switching(). */
829 /* This structure contains the information for each insn which requires
830 either single or double mode to be set.
831 MODE is the mode this insn must be executed in.
832 INSN_PTR is the insn to be executed (may be the note that marks the
833 beginning of a basic block).
834 BBNUM is the flow graph basic block this insn occurs in.
835 NEXT is the next insn in the same basic block. */
836 struct seginfo
838 int mode;
839 rtx insn_ptr;
840 int bbnum;
841 struct seginfo *next;
842 HARD_REG_SET regs_live;
845 struct bb_info
847 struct seginfo *seginfo;
848 int computing;
851 /* These bitmaps are used for the LCM algorithm. */
853 #ifdef OPTIMIZE_MODE_SWITCHING
854 static sbitmap *antic;
855 static sbitmap *transp;
856 static sbitmap *comp;
857 static sbitmap *delete;
858 static sbitmap *insert;
860 static struct seginfo * new_seginfo (int, rtx, int, HARD_REG_SET);
861 static void add_seginfo (struct bb_info *, struct seginfo *);
862 static void reg_dies (rtx, HARD_REG_SET);
863 static void reg_becomes_live (rtx, rtx, void *);
864 static void make_preds_opaque (basic_block, int);
865 #endif
867 #ifdef OPTIMIZE_MODE_SWITCHING
869 /* This function will allocate a new BBINFO structure, initialized
870 with the MODE, INSN, and basic block BB parameters. */
872 static struct seginfo *
873 new_seginfo (int mode, rtx insn, int bb, HARD_REG_SET regs_live)
875 struct seginfo *ptr;
876 ptr = xmalloc (sizeof (struct seginfo));
877 ptr->mode = mode;
878 ptr->insn_ptr = insn;
879 ptr->bbnum = bb;
880 ptr->next = NULL;
881 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
882 return ptr;
885 /* Add a seginfo element to the end of a list.
886 HEAD is a pointer to the list beginning.
887 INFO is the structure to be linked in. */
889 static void
890 add_seginfo (struct bb_info *head, struct seginfo *info)
892 struct seginfo *ptr;
894 if (head->seginfo == NULL)
895 head->seginfo = info;
896 else
898 ptr = head->seginfo;
899 while (ptr->next != NULL)
900 ptr = ptr->next;
901 ptr->next = info;
905 /* Make all predecessors of basic block B opaque, recursively, till we hit
906 some that are already non-transparent, or an edge where aux is set; that
907 denotes that a mode set is to be done on that edge.
908 J is the bit number in the bitmaps that corresponds to the entity that
909 we are currently handling mode-switching for. */
911 static void
912 make_preds_opaque (basic_block b, int j)
914 edge e;
916 for (e = b->pred; e; e = e->pred_next)
918 basic_block pb = e->src;
920 if (e->aux || ! TEST_BIT (transp[pb->index], j))
921 continue;
923 RESET_BIT (transp[pb->index], j);
924 make_preds_opaque (pb, j);
928 /* Record in LIVE that register REG died. */
930 static void
931 reg_dies (rtx reg, HARD_REG_SET live)
933 int regno, nregs;
935 if (!REG_P (reg))
936 return;
938 regno = REGNO (reg);
939 if (regno < FIRST_PSEUDO_REGISTER)
940 for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
941 nregs--)
942 CLEAR_HARD_REG_BIT (live, regno + nregs);
945 /* Record in LIVE that register REG became live.
946 This is called via note_stores. */
948 static void
949 reg_becomes_live (rtx reg, rtx setter ATTRIBUTE_UNUSED, void *live)
951 int regno, nregs;
953 if (GET_CODE (reg) == SUBREG)
954 reg = SUBREG_REG (reg);
956 if (!REG_P (reg))
957 return;
959 regno = REGNO (reg);
960 if (regno < FIRST_PSEUDO_REGISTER)
961 for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
962 nregs--)
963 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
966 /* Make sure if MODE_ENTRY is defined the MODE_EXIT is defined
967 and vice versa. */
968 #if defined (MODE_ENTRY) != defined (MODE_EXIT)
969 #error "Both MODE_ENTRY and MODE_EXIT must be defined"
970 #endif
972 /* Find all insns that need a particular mode setting, and insert the
973 necessary mode switches. Return true if we did work. */
976 optimize_mode_switching (FILE *file)
978 rtx insn;
979 int e;
980 basic_block bb;
981 int need_commit = 0;
982 sbitmap *kill;
983 struct edge_list *edge_list;
984 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
985 #define N_ENTITIES ARRAY_SIZE (num_modes)
986 int entity_map[N_ENTITIES];
987 struct bb_info *bb_info[N_ENTITIES];
988 int i, j;
989 int n_entities;
990 int max_num_modes = 0;
991 bool emited = false;
992 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
994 clear_bb_flags ();
996 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
997 if (OPTIMIZE_MODE_SWITCHING (e))
999 int entry_exit_extra = 0;
1001 /* Create the list of segments within each basic block.
1002 If NORMAL_MODE is defined, allow for two extra
1003 blocks split from the entry and exit block. */
1004 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1005 entry_exit_extra = 2;
1006 #endif
1007 bb_info[n_entities]
1008 = xcalloc (last_basic_block + entry_exit_extra, sizeof **bb_info);
1009 entity_map[n_entities++] = e;
1010 if (num_modes[e] > max_num_modes)
1011 max_num_modes = num_modes[e];
1014 if (! n_entities)
1015 return 0;
1017 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1019 /* Split the edge from the entry block and the fallthrough edge to the
1020 exit block, so that we can note that there NORMAL_MODE is supplied /
1021 required. */
1022 edge eg;
1023 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1024 /* The only non-call predecessor at this stage is a block with a
1025 fallthrough edge; there can be at most one, but there could be
1026 none at all, e.g. when exit is called. */
1027 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1028 if (eg->flags & EDGE_FALLTHRU)
1030 regset live_at_end = eg->src->global_live_at_end;
1032 if (pre_exit)
1033 abort ();
1034 pre_exit = split_edge (eg);
1035 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1036 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1039 #endif
1041 /* Create the bitmap vectors. */
1043 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1044 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1045 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1047 sbitmap_vector_ones (transp, last_basic_block);
1049 for (j = n_entities - 1; j >= 0; j--)
1051 int e = entity_map[j];
1052 int no_mode = num_modes[e];
1053 struct bb_info *info = bb_info[j];
1055 /* Determine what the first use (if any) need for a mode of entity E is.
1056 This will be the mode that is anticipatable for this block.
1057 Also compute the initial transparency settings. */
1058 FOR_EACH_BB (bb)
1060 struct seginfo *ptr;
1061 int last_mode = no_mode;
1062 HARD_REG_SET live_now;
1064 REG_SET_TO_HARD_REG_SET (live_now,
1065 bb->global_live_at_start);
1066 for (insn = BB_HEAD (bb);
1067 insn != NULL && insn != NEXT_INSN (BB_END (bb));
1068 insn = NEXT_INSN (insn))
1070 if (INSN_P (insn))
1072 int mode = MODE_NEEDED (e, insn);
1073 rtx link;
1075 if (mode != no_mode && mode != last_mode)
1077 last_mode = mode;
1078 ptr = new_seginfo (mode, insn, bb->index, live_now);
1079 add_seginfo (info + bb->index, ptr);
1080 RESET_BIT (transp[bb->index], j);
1082 #ifdef MODE_AFTER
1083 last_mode = MODE_AFTER (last_mode, insn);
1084 #endif
1085 /* Update LIVE_NOW. */
1086 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1087 if (REG_NOTE_KIND (link) == REG_DEAD)
1088 reg_dies (XEXP (link, 0), live_now);
1090 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1091 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1092 if (REG_NOTE_KIND (link) == REG_UNUSED)
1093 reg_dies (XEXP (link, 0), live_now);
1097 info[bb->index].computing = last_mode;
1098 /* Check for blocks without ANY mode requirements. */
1099 if (last_mode == no_mode)
1101 ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
1102 add_seginfo (info + bb->index, ptr);
1105 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1107 int mode = MODE_ENTRY (e);
1109 if (mode != no_mode)
1111 bb = post_entry;
1113 /* By always making this nontransparent, we save
1114 an extra check in make_preds_opaque. We also
1115 need this to avoid confusing pre_edge_lcm when
1116 antic is cleared but transp and comp are set. */
1117 RESET_BIT (transp[bb->index], j);
1119 /* Insert a fake computing definition of MODE into entry
1120 blocks which compute no mode. This represents the mode on
1121 entry. */
1122 info[bb->index].computing = mode;
1124 if (pre_exit)
1125 info[pre_exit->index].seginfo->mode = MODE_EXIT (e);
1128 #endif /* NORMAL_MODE */
1131 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1132 for (i = 0; i < max_num_modes; i++)
1134 int current_mode[N_ENTITIES];
1136 /* Set the anticipatable and computing arrays. */
1137 sbitmap_vector_zero (antic, last_basic_block);
1138 sbitmap_vector_zero (comp, last_basic_block);
1139 for (j = n_entities - 1; j >= 0; j--)
1141 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1142 struct bb_info *info = bb_info[j];
1144 FOR_EACH_BB (bb)
1146 if (info[bb->index].seginfo->mode == m)
1147 SET_BIT (antic[bb->index], j);
1149 if (info[bb->index].computing == m)
1150 SET_BIT (comp[bb->index], j);
1154 /* Calculate the optimal locations for the
1155 placement mode switches to modes with priority I. */
1157 FOR_EACH_BB (bb)
1158 sbitmap_not (kill[bb->index], transp[bb->index]);
1159 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1160 kill, &insert, &delete);
1162 for (j = n_entities - 1; j >= 0; j--)
1164 /* Insert all mode sets that have been inserted by lcm. */
1165 int no_mode = num_modes[entity_map[j]];
1167 /* Wherever we have moved a mode setting upwards in the flow graph,
1168 the blocks between the new setting site and the now redundant
1169 computation ceases to be transparent for any lower-priority
1170 mode of the same entity. First set the aux field of each
1171 insertion site edge non-transparent, then propagate the new
1172 non-transparency from the redundant computation upwards till
1173 we hit an insertion site or an already non-transparent block. */
1174 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1176 edge eg = INDEX_EDGE (edge_list, e);
1177 int mode;
1178 basic_block src_bb;
1179 HARD_REG_SET live_at_edge;
1180 rtx mode_set;
1182 eg->aux = 0;
1184 if (! TEST_BIT (insert[e], j))
1185 continue;
1187 eg->aux = (void *)1;
1189 mode = current_mode[j];
1190 src_bb = eg->src;
1192 REG_SET_TO_HARD_REG_SET (live_at_edge,
1193 src_bb->global_live_at_end);
1195 start_sequence ();
1196 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1197 mode_set = get_insns ();
1198 end_sequence ();
1200 /* Do not bother to insert empty sequence. */
1201 if (mode_set == NULL_RTX)
1202 continue;
1204 /* If this is an abnormal edge, we'll insert at the end
1205 of the previous block. */
1206 if (eg->flags & EDGE_ABNORMAL)
1208 emited = true;
1209 if (JUMP_P (BB_END (src_bb)))
1210 emit_insn_before (mode_set, BB_END (src_bb));
1211 /* It doesn't make sense to switch to normal mode
1212 after a CALL_INSN, so we're going to abort if we
1213 find one. The cases in which a CALL_INSN may
1214 have an abnormal edge are sibcalls and EH edges.
1215 In the case of sibcalls, the dest basic-block is
1216 the EXIT_BLOCK, that runs in normal mode; it is
1217 assumed that a sibcall insn requires normal mode
1218 itself, so no mode switch would be required after
1219 the call (it wouldn't make sense, anyway). In
1220 the case of EH edges, EH entry points also start
1221 in normal mode, so a similar reasoning applies. */
1222 else if (NONJUMP_INSN_P (BB_END (src_bb)))
1223 emit_insn_after (mode_set, BB_END (src_bb));
1224 else
1225 abort ();
1226 bb_info[j][src_bb->index].computing = mode;
1227 RESET_BIT (transp[src_bb->index], j);
1229 else
1231 need_commit = 1;
1232 insert_insn_on_edge (mode_set, eg);
1236 FOR_EACH_BB_REVERSE (bb)
1237 if (TEST_BIT (delete[bb->index], j))
1239 make_preds_opaque (bb, j);
1240 /* Cancel the 'deleted' mode set. */
1241 bb_info[j][bb->index].seginfo->mode = no_mode;
1245 clear_aux_for_edges ();
1246 free_edge_list (edge_list);
1249 /* Now output the remaining mode sets in all the segments. */
1250 for (j = n_entities - 1; j >= 0; j--)
1252 int no_mode = num_modes[entity_map[j]];
1254 FOR_EACH_BB_REVERSE (bb)
1256 struct seginfo *ptr, *next;
1257 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1259 next = ptr->next;
1260 if (ptr->mode != no_mode)
1262 rtx mode_set;
1264 start_sequence ();
1265 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1266 mode_set = get_insns ();
1267 end_sequence ();
1269 /* Do not bother to insert empty sequence. */
1270 if (mode_set == NULL_RTX)
1271 continue;
1273 emited = true;
1274 if (NOTE_P (ptr->insn_ptr)
1275 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1276 == NOTE_INSN_BASIC_BLOCK))
1277 emit_insn_after (mode_set, ptr->insn_ptr);
1278 else
1279 emit_insn_before (mode_set, ptr->insn_ptr);
1282 free (ptr);
1286 free (bb_info[j]);
1289 /* Finished. Free up all the things we've allocated. */
1291 sbitmap_vector_free (kill);
1292 sbitmap_vector_free (antic);
1293 sbitmap_vector_free (transp);
1294 sbitmap_vector_free (comp);
1295 sbitmap_vector_free (delete);
1296 sbitmap_vector_free (insert);
1298 if (need_commit)
1299 commit_edge_insertions ();
1301 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1302 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1303 #else
1304 if (!need_commit && !emited)
1305 return 0;
1306 #endif
1308 max_regno = max_reg_num ();
1309 allocate_reg_info (max_regno, FALSE, FALSE);
1310 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1311 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1312 | PROP_SCAN_DEAD_CODE));
1314 return 1;
1316 #endif /* OPTIMIZE_MODE_SWITCHING */