* gcc.c-torture/execute/20020720-1.x: Skip test on ARM-based systems.
[official-gcc.git] / gcc / lcm.c
blobbdbae429d4d088105f66c255b8a528a882536da8
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
25 * Traditional partial redundancy elimination.
27 * Placement of caller/caller register save/restores.
29 * Load/store motion.
31 * Copy motion.
33 * Conversion of flat register files to a stacked register
34 model.
36 * Dead load/store elimination.
38 These routines accept as input:
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
52 #include "config.h"
53 #include "system.h"
54 #include "rtl.h"
55 #include "regs.h"
56 #include "hard-reg-set.h"
57 #include "flags.h"
58 #include "real.h"
59 #include "insn-config.h"
60 #include "recog.h"
61 #include "basic-block.h"
62 #include "output.h"
63 #include "tm_p.h"
65 /* We want target macros for the mode switching code to be able to refer
66 to instruction attribute values. */
67 #include "insn-attr.h"
69 /* Edge based LCM routines. */
70 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
71 sbitmap *, sbitmap *));
72 static void compute_earliest PARAMS ((struct edge_list *, int,
73 sbitmap *, sbitmap *,
74 sbitmap *, sbitmap *,
75 sbitmap *));
76 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
77 sbitmap *, sbitmap *,
78 sbitmap *));
79 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
80 sbitmap *, sbitmap *,
81 sbitmap *, sbitmap *,
82 sbitmap *));
84 /* Edge based LCM routines on a reverse flowgraph. */
85 static void compute_farthest PARAMS ((struct edge_list *, int,
86 sbitmap *, sbitmap *,
87 sbitmap*, sbitmap *,
88 sbitmap *));
89 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
90 sbitmap *, sbitmap *,
91 sbitmap *));
92 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
93 sbitmap *, sbitmap *,
94 sbitmap *, sbitmap *,
95 sbitmap *));
97 /* Edge based lcm routines. */
99 /* Compute expression anticipatability at entrance and exit of each block.
100 This is done based on the flow graph, and not on the pred-succ lists.
101 Other than that, its pretty much identical to compute_antinout. */
103 static void
104 compute_antinout_edge (antloc, transp, antin, antout)
105 sbitmap *antloc;
106 sbitmap *transp;
107 sbitmap *antin;
108 sbitmap *antout;
110 basic_block bb;
111 edge e;
112 basic_block *worklist, *qin, *qout, *qend;
113 unsigned int qlen;
115 /* Allocate a worklist array/queue. Entries are only added to the
116 list if they were not already on the list. So the size is
117 bounded by the number of basic blocks. */
118 qin = qout = worklist
119 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
121 /* We want a maximal solution, so make an optimistic initialization of
122 ANTIN. */
123 sbitmap_vector_ones (antin, last_basic_block);
125 /* Put every block on the worklist; this is necessary because of the
126 optimistic initialization of ANTIN above. */
127 FOR_EACH_BB_REVERSE (bb)
129 *qin++ =bb;
130 bb->aux = bb;
133 qin = worklist;
134 qend = &worklist[n_basic_blocks];
135 qlen = n_basic_blocks;
137 /* Mark blocks which are predecessors of the exit block so that we
138 can easily identify them below. */
139 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
140 e->src->aux = EXIT_BLOCK_PTR;
142 /* Iterate until the worklist is empty. */
143 while (qlen)
145 /* Take the first entry off the worklist. */
146 bb = *qout++;
147 qlen--;
149 if (qout >= qend)
150 qout = worklist;
152 if (bb->aux == EXIT_BLOCK_PTR)
153 /* Do not clear the aux field for blocks which are predecessors of
154 the EXIT block. That way we never add then to the worklist
155 again. */
156 sbitmap_zero (antout[bb->index]);
157 else
159 /* Clear the aux field of this block so that it can be added to
160 the worklist again if necessary. */
161 bb->aux = NULL;
162 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
165 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
166 transp[bb->index], antout[bb->index]))
167 /* If the in state of this block changed, then we need
168 to add the predecessors of this block to the worklist
169 if they are not already on the worklist. */
170 for (e = bb->pred; e; e = e->pred_next)
171 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
173 *qin++ = e->src;
174 e->src->aux = e;
175 qlen++;
176 if (qin >= qend)
177 qin = worklist;
181 clear_aux_for_edges ();
182 clear_aux_for_blocks ();
183 free (worklist);
186 /* Compute the earliest vector for edge based lcm. */
188 static void
189 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
190 struct edge_list *edge_list;
191 int n_exprs;
192 sbitmap *antin, *antout, *avout, *kill, *earliest;
194 sbitmap difference, temp_bitmap;
195 int x, num_edges;
196 basic_block pred, succ;
198 num_edges = NUM_EDGES (edge_list);
200 difference = sbitmap_alloc (n_exprs);
201 temp_bitmap = sbitmap_alloc (n_exprs);
203 for (x = 0; x < num_edges; x++)
205 pred = INDEX_EDGE_PRED_BB (edge_list, x);
206 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
207 if (pred == ENTRY_BLOCK_PTR)
208 sbitmap_copy (earliest[x], antin[succ->index]);
209 else
211 if (succ == EXIT_BLOCK_PTR)
212 sbitmap_zero (earliest[x]);
213 else
215 sbitmap_difference (difference, antin[succ->index],
216 avout[pred->index]);
217 sbitmap_not (temp_bitmap, antout[pred->index]);
218 sbitmap_a_and_b_or_c (earliest[x], difference,
219 kill[pred->index], temp_bitmap);
224 sbitmap_free (temp_bitmap);
225 sbitmap_free (difference);
228 /* later(p,s) is dependent on the calculation of laterin(p).
229 laterin(p) is dependent on the calculation of later(p2,p).
231 laterin(ENTRY) is defined as all 0's
232 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
233 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
235 If we progress in this manner, starting with all basic blocks
236 in the work list, anytime we change later(bb), we need to add
237 succs(bb) to the worklist if they are not already on the worklist.
239 Boundary conditions:
241 We prime the worklist all the normal basic blocks. The ENTRY block can
242 never be added to the worklist since it is never the successor of any
243 block. We explicitly prevent the EXIT block from being added to the
244 worklist.
246 We optimistically initialize LATER. That is the only time this routine
247 will compute LATER for an edge out of the entry block since the entry
248 block is never on the worklist. Thus, LATERIN is neither used nor
249 computed for the ENTRY block.
251 Since the EXIT block is never added to the worklist, we will neither
252 use nor compute LATERIN for the exit block. Edges which reach the
253 EXIT block are handled in the normal fashion inside the loop. However,
254 the insertion/deletion computation needs LATERIN(EXIT), so we have
255 to compute it. */
257 static void
258 compute_laterin (edge_list, earliest, antloc, later, laterin)
259 struct edge_list *edge_list;
260 sbitmap *earliest, *antloc, *later, *laterin;
262 int num_edges, i;
263 edge e;
264 basic_block *worklist, *qin, *qout, *qend, bb;
265 unsigned int qlen;
267 num_edges = NUM_EDGES (edge_list);
269 /* Allocate a worklist array/queue. Entries are only added to the
270 list if they were not already on the list. So the size is
271 bounded by the number of basic blocks. */
272 qin = qout = worklist
273 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
275 /* Initialize a mapping from each edge to its index. */
276 for (i = 0; i < num_edges; i++)
277 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
279 /* We want a maximal solution, so initially consider LATER true for
280 all edges. This allows propagation through a loop since the incoming
281 loop edge will have LATER set, so if all the other incoming edges
282 to the loop are set, then LATERIN will be set for the head of the
283 loop.
285 If the optimistic setting of LATER on that edge was incorrect (for
286 example the expression is ANTLOC in a block within the loop) then
287 this algorithm will detect it when we process the block at the head
288 of the optimistic edge. That will requeue the affected blocks. */
289 sbitmap_vector_ones (later, num_edges);
291 /* Note that even though we want an optimistic setting of LATER, we
292 do not want to be overly optimistic. Consider an outgoing edge from
293 the entry block. That edge should always have a LATER value the
294 same as EARLIEST for that edge. */
295 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
296 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
298 /* Add all the blocks to the worklist. This prevents an early exit from
299 the loop given our optimistic initialization of LATER above. */
300 FOR_EACH_BB (bb)
302 *qin++ = bb;
303 bb->aux = bb;
305 qin = worklist;
306 /* Note that we do not use the last allocated element for our queue,
307 as EXIT_BLOCK is never inserted into it. In fact the above allocation
308 of n_basic_blocks + 1 elements is not encessary. */
309 qend = &worklist[n_basic_blocks];
310 qlen = n_basic_blocks;
312 /* Iterate until the worklist is empty. */
313 while (qlen)
315 /* Take the first entry off the worklist. */
316 bb = *qout++;
317 bb->aux = NULL;
318 qlen--;
319 if (qout >= qend)
320 qout = worklist;
322 /* Compute the intersection of LATERIN for each incoming edge to B. */
323 sbitmap_ones (laterin[bb->index]);
324 for (e = bb->pred; e != NULL; e = e->pred_next)
325 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
327 /* Calculate LATER for all outgoing edges. */
328 for (e = bb->succ; e != NULL; e = e->succ_next)
329 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
330 earliest[(size_t) e->aux],
331 laterin[e->src->index],
332 antloc[e->src->index])
333 /* If LATER for an outgoing edge was changed, then we need
334 to add the target of the outgoing edge to the worklist. */
335 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
337 *qin++ = e->dest;
338 e->dest->aux = e;
339 qlen++;
340 if (qin >= qend)
341 qin = worklist;
345 /* Computation of insertion and deletion points requires computing LATERIN
346 for the EXIT block. We allocated an extra entry in the LATERIN array
347 for just this purpose. */
348 sbitmap_ones (laterin[last_basic_block]);
349 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
350 sbitmap_a_and_b (laterin[last_basic_block],
351 laterin[last_basic_block],
352 later[(size_t) e->aux]);
354 clear_aux_for_edges ();
355 free (worklist);
358 /* Compute the insertion and deletion points for edge based LCM. */
360 static void
361 compute_insert_delete (edge_list, antloc, later, laterin,
362 insert, delete)
363 struct edge_list *edge_list;
364 sbitmap *antloc, *later, *laterin, *insert, *delete;
366 int x;
367 basic_block bb;
369 FOR_EACH_BB (bb)
370 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
372 for (x = 0; x < NUM_EDGES (edge_list); x++)
374 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
376 if (b == EXIT_BLOCK_PTR)
377 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
378 else
379 sbitmap_difference (insert[x], later[x], laterin[b->index]);
383 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
384 delete vectors for edge based LCM. Returns an edgelist which is used to
385 map the insert vector to what edge an expression should be inserted on. */
387 struct edge_list *
388 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
389 FILE *file ATTRIBUTE_UNUSED;
390 int n_exprs;
391 sbitmap *transp;
392 sbitmap *avloc;
393 sbitmap *antloc;
394 sbitmap *kill;
395 sbitmap **insert;
396 sbitmap **delete;
398 sbitmap *antin, *antout, *earliest;
399 sbitmap *avin, *avout;
400 sbitmap *later, *laterin;
401 struct edge_list *edge_list;
402 int num_edges;
404 edge_list = create_edge_list ();
405 num_edges = NUM_EDGES (edge_list);
407 #ifdef LCM_DEBUG_INFO
408 if (file)
410 fprintf (file, "Edge List:\n");
411 verify_edge_list (file, edge_list);
412 print_edge_list (file, edge_list);
413 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
414 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
415 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
416 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
418 #endif
420 /* Compute global availability. */
421 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
422 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
423 compute_available (avloc, kill, avout, avin);
424 sbitmap_vector_free (avin);
426 /* Compute global anticipatability. */
427 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
428 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
429 compute_antinout_edge (antloc, transp, antin, antout);
431 #ifdef LCM_DEBUG_INFO
432 if (file)
434 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
435 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
437 #endif
439 /* Compute earliestness. */
440 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
441 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
443 #ifdef LCM_DEBUG_INFO
444 if (file)
445 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
446 #endif
448 sbitmap_vector_free (antout);
449 sbitmap_vector_free (antin);
450 sbitmap_vector_free (avout);
452 later = sbitmap_vector_alloc (num_edges, n_exprs);
454 /* Allocate an extra element for the exit block in the laterin vector. */
455 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
456 compute_laterin (edge_list, earliest, antloc, later, laterin);
458 #ifdef LCM_DEBUG_INFO
459 if (file)
461 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
462 dump_sbitmap_vector (file, "later", "", later, num_edges);
464 #endif
466 sbitmap_vector_free (earliest);
468 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
469 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
470 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
472 sbitmap_vector_free (laterin);
473 sbitmap_vector_free (later);
475 #ifdef LCM_DEBUG_INFO
476 if (file)
478 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
479 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
480 last_basic_block);
482 #endif
484 return edge_list;
487 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
488 Return the number of passes we performed to iterate to a solution. */
490 void
491 compute_available (avloc, kill, avout, avin)
492 sbitmap *avloc, *kill, *avout, *avin;
494 edge e;
495 basic_block *worklist, *qin, *qout, *qend, bb;
496 unsigned int qlen;
498 /* Allocate a worklist array/queue. Entries are only added to the
499 list if they were not already on the list. So the size is
500 bounded by the number of basic blocks. */
501 qin = qout = worklist
502 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
504 /* We want a maximal solution. */
505 sbitmap_vector_ones (avout, last_basic_block);
507 /* Put every block on the worklist; this is necessary because of the
508 optimistic initialization of AVOUT above. */
509 FOR_EACH_BB (bb)
511 *qin++ = bb;
512 bb->aux = bb;
515 qin = worklist;
516 qend = &worklist[n_basic_blocks];
517 qlen = n_basic_blocks;
519 /* Mark blocks which are successors of the entry block so that we
520 can easily identify them below. */
521 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
522 e->dest->aux = ENTRY_BLOCK_PTR;
524 /* Iterate until the worklist is empty. */
525 while (qlen)
527 /* Take the first entry off the worklist. */
528 bb = *qout++;
529 qlen--;
531 if (qout >= qend)
532 qout = worklist;
534 /* If one of the predecessor blocks is the ENTRY block, then the
535 intersection of avouts is the null set. We can identify such blocks
536 by the special value in the AUX field in the block structure. */
537 if (bb->aux == ENTRY_BLOCK_PTR)
538 /* Do not clear the aux field for blocks which are successors of the
539 ENTRY block. That way we never add then to the worklist again. */
540 sbitmap_zero (avin[bb->index]);
541 else
543 /* Clear the aux field of this block so that it can be added to
544 the worklist again if necessary. */
545 bb->aux = NULL;
546 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
549 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
550 /* If the out state of this block changed, then we need
551 to add the successors of this block to the worklist
552 if they are not already on the worklist. */
553 for (e = bb->succ; e; e = e->succ_next)
554 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
556 *qin++ = e->dest;
557 e->dest->aux = e;
558 qlen++;
560 if (qin >= qend)
561 qin = worklist;
565 clear_aux_for_edges ();
566 clear_aux_for_blocks ();
567 free (worklist);
570 /* Compute the farthest vector for edge based lcm. */
572 static void
573 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
574 kill, farthest)
575 struct edge_list *edge_list;
576 int n_exprs;
577 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
579 sbitmap difference, temp_bitmap;
580 int x, num_edges;
581 basic_block pred, succ;
583 num_edges = NUM_EDGES (edge_list);
585 difference = sbitmap_alloc (n_exprs);
586 temp_bitmap = sbitmap_alloc (n_exprs);
588 for (x = 0; x < num_edges; x++)
590 pred = INDEX_EDGE_PRED_BB (edge_list, x);
591 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
592 if (succ == EXIT_BLOCK_PTR)
593 sbitmap_copy (farthest[x], st_avout[pred->index]);
594 else
596 if (pred == ENTRY_BLOCK_PTR)
597 sbitmap_zero (farthest[x]);
598 else
600 sbitmap_difference (difference, st_avout[pred->index],
601 st_antin[succ->index]);
602 sbitmap_not (temp_bitmap, st_avin[succ->index]);
603 sbitmap_a_and_b_or_c (farthest[x], difference,
604 kill[succ->index], temp_bitmap);
609 sbitmap_free (temp_bitmap);
610 sbitmap_free (difference);
613 /* Compute nearer and nearerout vectors for edge based lcm.
615 This is the mirror of compute_laterin, additional comments on the
616 implementation can be found before compute_laterin. */
618 static void
619 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
620 struct edge_list *edge_list;
621 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
623 int num_edges, i;
624 edge e;
625 basic_block *worklist, *tos, bb;
627 num_edges = NUM_EDGES (edge_list);
629 /* Allocate a worklist array/queue. Entries are only added to the
630 list if they were not already on the list. So the size is
631 bounded by the number of basic blocks. */
632 tos = worklist
633 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
635 /* Initialize NEARER for each edge and build a mapping from an edge to
636 its index. */
637 for (i = 0; i < num_edges; i++)
638 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
640 /* We want a maximal solution. */
641 sbitmap_vector_ones (nearer, num_edges);
643 /* Note that even though we want an optimistic setting of NEARER, we
644 do not want to be overly optimistic. Consider an incoming edge to
645 the exit block. That edge should always have a NEARER value the
646 same as FARTHEST for that edge. */
647 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
648 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
650 /* Add all the blocks to the worklist. This prevents an early exit
651 from the loop given our optimistic initialization of NEARER. */
652 FOR_EACH_BB (bb)
654 *tos++ = bb;
655 bb->aux = bb;
658 /* Iterate until the worklist is empty. */
659 while (tos != worklist)
661 /* Take the first entry off the worklist. */
662 bb = *--tos;
663 bb->aux = NULL;
665 /* Compute the intersection of NEARER for each outgoing edge from B. */
666 sbitmap_ones (nearerout[bb->index]);
667 for (e = bb->succ; e != NULL; e = e->succ_next)
668 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
669 nearer[(size_t) e->aux]);
671 /* Calculate NEARER for all incoming edges. */
672 for (e = bb->pred; e != NULL; e = e->pred_next)
673 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
674 farthest[(size_t) e->aux],
675 nearerout[e->dest->index],
676 st_avloc[e->dest->index])
677 /* If NEARER for an incoming edge was changed, then we need
678 to add the source of the incoming edge to the worklist. */
679 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
681 *tos++ = e->src;
682 e->src->aux = e;
686 /* Computation of insertion and deletion points requires computing NEAREROUT
687 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
688 for just this purpose. */
689 sbitmap_ones (nearerout[last_basic_block]);
690 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
691 sbitmap_a_and_b (nearerout[last_basic_block],
692 nearerout[last_basic_block],
693 nearer[(size_t) e->aux]);
695 clear_aux_for_edges ();
696 free (tos);
699 /* Compute the insertion and deletion points for edge based LCM. */
701 static void
702 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
703 insert, delete)
704 struct edge_list *edge_list;
705 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
707 int x;
708 basic_block bb;
710 FOR_EACH_BB (bb)
711 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
713 for (x = 0; x < NUM_EDGES (edge_list); x++)
715 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
716 if (b == ENTRY_BLOCK_PTR)
717 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
718 else
719 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
723 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
724 insert and delete vectors for edge based reverse LCM. Returns an
725 edgelist which is used to map the insert vector to what edge
726 an expression should be inserted on. */
728 struct edge_list *
729 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
730 insert, delete)
731 FILE *file ATTRIBUTE_UNUSED;
732 int n_exprs;
733 sbitmap *transp;
734 sbitmap *st_avloc;
735 sbitmap *st_antloc;
736 sbitmap *kill;
737 sbitmap **insert;
738 sbitmap **delete;
740 sbitmap *st_antin, *st_antout;
741 sbitmap *st_avout, *st_avin, *farthest;
742 sbitmap *nearer, *nearerout;
743 struct edge_list *edge_list;
744 int num_edges;
746 edge_list = create_edge_list ();
747 num_edges = NUM_EDGES (edge_list);
749 st_antin = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
750 st_antout = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
751 sbitmap_vector_zero (st_antin, last_basic_block);
752 sbitmap_vector_zero (st_antout, last_basic_block);
753 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
755 /* Compute global anticipatability. */
756 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
757 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
758 compute_available (st_avloc, kill, st_avout, st_avin);
760 #ifdef LCM_DEBUG_INFO
761 if (file)
763 fprintf (file, "Edge List:\n");
764 verify_edge_list (file, edge_list);
765 print_edge_list (file, edge_list);
766 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
767 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
768 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
769 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
770 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
771 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
773 #endif
775 #ifdef LCM_DEBUG_INFO
776 if (file)
778 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
779 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
781 #endif
783 /* Compute farthestness. */
784 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
785 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
786 kill, farthest);
788 #ifdef LCM_DEBUG_INFO
789 if (file)
790 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
791 #endif
793 sbitmap_vector_free (st_antin);
794 sbitmap_vector_free (st_antout);
796 sbitmap_vector_free (st_avin);
797 sbitmap_vector_free (st_avout);
799 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
801 /* Allocate an extra element for the entry block. */
802 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
803 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
805 #ifdef LCM_DEBUG_INFO
806 if (file)
808 dump_sbitmap_vector (file, "nearerout", "", nearerout,
809 last_basic_block + 1);
810 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
812 #endif
814 sbitmap_vector_free (farthest);
816 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
817 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
818 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
819 *insert, *delete);
821 sbitmap_vector_free (nearerout);
822 sbitmap_vector_free (nearer);
824 #ifdef LCM_DEBUG_INFO
825 if (file)
827 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
828 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
829 last_basic_block);
831 #endif
832 return edge_list;
835 /* Mode switching:
837 The algorithm for setting the modes consists of scanning the insn list
838 and finding all the insns which require a specific mode. Each insn gets
839 a unique struct seginfo element. These structures are inserted into a list
840 for each basic block. For each entity, there is an array of bb_info over
841 the flow graph basic blocks (local var 'bb_info'), and contains a list
842 of all insns within that basic block, in the order they are encountered.
844 For each entity, any basic block WITHOUT any insns requiring a specific
845 mode are given a single entry, without a mode. (Each basic block
846 in the flow graph must have at least one entry in the segment table.)
848 The LCM algorithm is then run over the flow graph to determine where to
849 place the sets to the highest-priority value in respect of first the first
850 insn in any one block. Any adjustments required to the transparancy
851 vectors are made, then the next iteration starts for the next-lower
852 priority mode, till for each entity all modes are exhasted.
854 More details are located in the code for optimize_mode_switching(). */
856 /* This structure contains the information for each insn which requires
857 either single or double mode to be set.
858 MODE is the mode this insn must be executed in.
859 INSN_PTR is the insn to be executed (may be the note that marks the
860 beginning of a basic block).
861 BBNUM is the flow graph basic block this insn occurs in.
862 NEXT is the next insn in the same basic block. */
863 struct seginfo
865 int mode;
866 rtx insn_ptr;
867 int bbnum;
868 struct seginfo *next;
869 HARD_REG_SET regs_live;
872 struct bb_info
874 struct seginfo *seginfo;
875 int computing;
878 /* These bitmaps are used for the LCM algorithm. */
880 #ifdef OPTIMIZE_MODE_SWITCHING
881 static sbitmap *antic;
882 static sbitmap *transp;
883 static sbitmap *comp;
884 static sbitmap *delete;
885 static sbitmap *insert;
887 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));
888 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
889 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
890 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
891 static void make_preds_opaque PARAMS ((basic_block, int));
892 #endif
894 #ifdef OPTIMIZE_MODE_SWITCHING
896 /* This function will allocate a new BBINFO structure, initialized
897 with the MODE, INSN, and basic block BB parameters. */
899 static struct seginfo *
900 new_seginfo (mode, insn, bb, regs_live)
901 int mode;
902 rtx insn;
903 int bb;
904 HARD_REG_SET regs_live;
906 struct seginfo *ptr;
907 ptr = xmalloc (sizeof (struct seginfo));
908 ptr->mode = mode;
909 ptr->insn_ptr = insn;
910 ptr->bbnum = bb;
911 ptr->next = NULL;
912 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
913 return ptr;
916 /* Add a seginfo element to the end of a list.
917 HEAD is a pointer to the list beginning.
918 INFO is the structure to be linked in. */
920 static void
921 add_seginfo (head, info)
922 struct bb_info *head;
923 struct seginfo *info;
925 struct seginfo *ptr;
927 if (head->seginfo == NULL)
928 head->seginfo = info;
929 else
931 ptr = head->seginfo;
932 while (ptr->next != NULL)
933 ptr = ptr->next;
934 ptr->next = info;
938 /* Make all predecessors of basic block B opaque, recursively, till we hit
939 some that are already non-transparent, or an edge where aux is set; that
940 denotes that a mode set is to be done on that edge.
941 J is the bit number in the bitmaps that corresponds to the entity that
942 we are currently handling mode-switching for. */
944 static void
945 make_preds_opaque (b, j)
946 basic_block b;
947 int j;
949 edge e;
951 for (e = b->pred; e; e = e->pred_next)
953 basic_block pb = e->src;
955 if (e->aux || ! TEST_BIT (transp[pb->index], j))
956 continue;
958 RESET_BIT (transp[pb->index], j);
959 make_preds_opaque (pb, j);
963 /* Record in LIVE that register REG died. */
965 static void
966 reg_dies (reg, live)
967 rtx reg;
968 HARD_REG_SET live;
970 int regno, nregs;
972 if (GET_CODE (reg) != REG)
973 return;
975 regno = REGNO (reg);
976 if (regno < FIRST_PSEUDO_REGISTER)
977 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
978 nregs--)
979 CLEAR_HARD_REG_BIT (live, regno + nregs);
982 /* Record in LIVE that register REG became live.
983 This is called via note_stores. */
985 static void
986 reg_becomes_live (reg, setter, live)
987 rtx reg;
988 rtx setter ATTRIBUTE_UNUSED;
989 void *live;
991 int regno, nregs;
993 if (GET_CODE (reg) == SUBREG)
994 reg = SUBREG_REG (reg);
996 if (GET_CODE (reg) != REG)
997 return;
999 regno = REGNO (reg);
1000 if (regno < FIRST_PSEUDO_REGISTER)
1001 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
1002 nregs--)
1003 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
1006 /* Find all insns that need a particular mode setting, and insert the
1007 necessary mode switches. Return true if we did work. */
1010 optimize_mode_switching (file)
1011 FILE *file;
1013 rtx insn;
1014 int e;
1015 basic_block bb;
1016 int need_commit = 0;
1017 sbitmap *kill;
1018 struct edge_list *edge_list;
1019 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1020 #define N_ENTITIES ARRAY_SIZE (num_modes)
1021 int entity_map[N_ENTITIES];
1022 struct bb_info *bb_info[N_ENTITIES];
1023 int i, j;
1024 int n_entities;
1025 int max_num_modes = 0;
1026 bool emited = false;
1027 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
1029 clear_bb_flags ();
1031 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1032 if (OPTIMIZE_MODE_SWITCHING (e))
1034 int entry_exit_extra = 0;
1036 /* Create the list of segments within each basic block.
1037 If NORMAL_MODE is defined, allow for two extra
1038 blocks split from the entry and exit block. */
1039 #ifdef NORMAL_MODE
1040 entry_exit_extra = 2;
1041 #endif
1042 bb_info[n_entities]
1043 = (struct bb_info *) xcalloc (last_basic_block + entry_exit_extra,
1044 sizeof **bb_info);
1045 entity_map[n_entities++] = e;
1046 if (num_modes[e] > max_num_modes)
1047 max_num_modes = num_modes[e];
1050 if (! n_entities)
1051 return 0;
1053 #ifdef NORMAL_MODE
1055 /* Split the edge from the entry block and the fallthrough edge to the
1056 exit block, so that we can note that there NORMAL_MODE is supplied /
1057 required. */
1058 edge eg;
1059 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1060 /* The only non-call predecessor at this stage is a block with a
1061 fallthrough edge; there can be at most one, but there could be
1062 none at all, e.g. when exit is called. */
1063 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1064 if (eg->flags & EDGE_FALLTHRU)
1066 regset live_at_end = eg->src->global_live_at_end;
1068 if (pre_exit)
1069 abort ();
1070 pre_exit = split_edge (eg);
1071 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1072 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1075 #endif
1077 /* Create the bitmap vectors. */
1079 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1080 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1081 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1083 sbitmap_vector_ones (transp, last_basic_block);
1085 for (j = n_entities - 1; j >= 0; j--)
1087 int e = entity_map[j];
1088 int no_mode = num_modes[e];
1089 struct bb_info *info = bb_info[j];
1091 /* Determine what the first use (if any) need for a mode of entity E is.
1092 This will be the mode that is anticipatable for this block.
1093 Also compute the initial transparency settings. */
1094 FOR_EACH_BB (bb)
1096 struct seginfo *ptr;
1097 int last_mode = no_mode;
1098 HARD_REG_SET live_now;
1100 REG_SET_TO_HARD_REG_SET (live_now,
1101 bb->global_live_at_start);
1102 for (insn = bb->head;
1103 insn != NULL && insn != NEXT_INSN (bb->end);
1104 insn = NEXT_INSN (insn))
1106 if (INSN_P (insn))
1108 int mode = MODE_NEEDED (e, insn);
1109 rtx link;
1111 if (mode != no_mode && mode != last_mode)
1113 last_mode = mode;
1114 ptr = new_seginfo (mode, insn, bb->index, live_now);
1115 add_seginfo (info + bb->index, ptr);
1116 RESET_BIT (transp[bb->index], j);
1119 /* Update LIVE_NOW. */
1120 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1121 if (REG_NOTE_KIND (link) == REG_DEAD)
1122 reg_dies (XEXP (link, 0), live_now);
1124 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1125 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1126 if (REG_NOTE_KIND (link) == REG_UNUSED)
1127 reg_dies (XEXP (link, 0), live_now);
1131 info[bb->index].computing = last_mode;
1132 /* Check for blocks without ANY mode requirements. */
1133 if (last_mode == no_mode)
1135 ptr = new_seginfo (no_mode, bb->end, bb->index, live_now);
1136 add_seginfo (info + bb->index, ptr);
1139 #ifdef NORMAL_MODE
1141 int mode = NORMAL_MODE (e);
1143 if (mode != no_mode)
1145 bb = post_entry;
1147 /* By always making this nontransparent, we save
1148 an extra check in make_preds_opaque. We also
1149 need this to avoid confusing pre_edge_lcm when
1150 antic is cleared but transp and comp are set. */
1151 RESET_BIT (transp[bb->index], j);
1153 /* Insert a fake computing definition of MODE into entry
1154 blocks which compute no mode. This represents the mode on
1155 entry. */
1156 info[bb->index].computing = mode;
1158 if (pre_exit)
1159 info[pre_exit->index].seginfo->mode = mode;
1162 #endif /* NORMAL_MODE */
1165 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1166 for (i = 0; i < max_num_modes; i++)
1168 int current_mode[N_ENTITIES];
1170 /* Set the anticipatable and computing arrays. */
1171 sbitmap_vector_zero (antic, last_basic_block);
1172 sbitmap_vector_zero (comp, last_basic_block);
1173 for (j = n_entities - 1; j >= 0; j--)
1175 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1176 struct bb_info *info = bb_info[j];
1178 FOR_EACH_BB (bb)
1180 if (info[bb->index].seginfo->mode == m)
1181 SET_BIT (antic[bb->index], j);
1183 if (info[bb->index].computing == m)
1184 SET_BIT (comp[bb->index], j);
1188 /* Calculate the optimal locations for the
1189 placement mode switches to modes with priority I. */
1191 FOR_EACH_BB (bb)
1192 sbitmap_not (kill[bb->index], transp[bb->index]);
1193 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1194 kill, &insert, &delete);
1196 for (j = n_entities - 1; j >= 0; j--)
1198 /* Insert all mode sets that have been inserted by lcm. */
1199 int no_mode = num_modes[entity_map[j]];
1201 /* Wherever we have moved a mode setting upwards in the flow graph,
1202 the blocks between the new setting site and the now redundant
1203 computation ceases to be transparent for any lower-priority
1204 mode of the same entity. First set the aux field of each
1205 insertion site edge non-transparent, then propagate the new
1206 non-transparency from the redundant computation upwards till
1207 we hit an insertion site or an already non-transparent block. */
1208 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1210 edge eg = INDEX_EDGE (edge_list, e);
1211 int mode;
1212 basic_block src_bb;
1213 HARD_REG_SET live_at_edge;
1214 rtx mode_set;
1216 eg->aux = 0;
1218 if (! TEST_BIT (insert[e], j))
1219 continue;
1221 eg->aux = (void *)1;
1223 mode = current_mode[j];
1224 src_bb = eg->src;
1226 REG_SET_TO_HARD_REG_SET (live_at_edge,
1227 src_bb->global_live_at_end);
1229 start_sequence ();
1230 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1231 mode_set = get_insns ();
1232 end_sequence ();
1234 /* Do not bother to insert empty sequence. */
1235 if (mode_set == NULL_RTX)
1236 continue;
1238 /* If this is an abnormal edge, we'll insert at the end
1239 of the previous block. */
1240 if (eg->flags & EDGE_ABNORMAL)
1242 emited = true;
1243 if (GET_CODE (src_bb->end) == JUMP_INSN)
1244 emit_insn_before (mode_set, src_bb->end);
1245 /* It doesn't make sense to switch to normal mode
1246 after a CALL_INSN, so we're going to abort if we
1247 find one. The cases in which a CALL_INSN may
1248 have an abnormal edge are sibcalls and EH edges.
1249 In the case of sibcalls, the dest basic-block is
1250 the EXIT_BLOCK, that runs in normal mode; it is
1251 assumed that a sibcall insn requires normal mode
1252 itself, so no mode switch would be required after
1253 the call (it wouldn't make sense, anyway). In
1254 the case of EH edges, EH entry points also start
1255 in normal mode, so a similar reasoning applies. */
1256 else if (GET_CODE (src_bb->end) == INSN)
1257 emit_insn_after (mode_set, src_bb->end);
1258 else
1259 abort ();
1260 bb_info[j][src_bb->index].computing = mode;
1261 RESET_BIT (transp[src_bb->index], j);
1263 else
1265 need_commit = 1;
1266 insert_insn_on_edge (mode_set, eg);
1270 FOR_EACH_BB_REVERSE (bb)
1271 if (TEST_BIT (delete[bb->index], j))
1273 make_preds_opaque (bb, j);
1274 /* Cancel the 'deleted' mode set. */
1275 bb_info[j][bb->index].seginfo->mode = no_mode;
1279 clear_aux_for_edges ();
1280 free_edge_list (edge_list);
1283 /* Now output the remaining mode sets in all the segments. */
1284 for (j = n_entities - 1; j >= 0; j--)
1286 int no_mode = num_modes[entity_map[j]];
1288 FOR_EACH_BB_REVERSE (bb)
1290 struct seginfo *ptr, *next;
1291 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1293 next = ptr->next;
1294 if (ptr->mode != no_mode)
1296 rtx mode_set;
1298 start_sequence ();
1299 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1300 mode_set = get_insns ();
1301 end_sequence ();
1303 /* Do not bother to insert empty sequence. */
1304 if (mode_set == NULL_RTX)
1305 continue;
1307 emited = true;
1308 if (GET_CODE (ptr->insn_ptr) == NOTE
1309 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1310 == NOTE_INSN_BASIC_BLOCK))
1311 emit_insn_after (mode_set, ptr->insn_ptr);
1312 else
1313 emit_insn_before (mode_set, ptr->insn_ptr);
1316 free (ptr);
1320 free (bb_info[j]);
1323 /* Finished. Free up all the things we've allocated. */
1325 sbitmap_vector_free (kill);
1326 sbitmap_vector_free (antic);
1327 sbitmap_vector_free (transp);
1328 sbitmap_vector_free (comp);
1329 sbitmap_vector_free (delete);
1330 sbitmap_vector_free (insert);
1332 if (need_commit)
1333 commit_edge_insertions ();
1335 #ifdef NORMAL_MODE
1336 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1337 #else
1338 if (!need_commit && !emited)
1339 return 0;
1340 #endif
1342 max_regno = max_reg_num ();
1343 allocate_reg_info (max_regno, FALSE, FALSE);
1344 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1345 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1346 | PROP_SCAN_DEAD_CODE));
1348 return 1;
1350 #endif /* OPTIMIZE_MODE_SWITCHING */