Correctly count substitutions if eliminations are going on.
[official-gcc.git] / gcc / lcm.c
blob43bebe74ece03695a8cc956130acad4164952f3b
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
25 * Traditional partial redundancy elimination.
27 * Placement of caller/caller register save/restores.
29 * Load/store motion.
31 * Copy motion.
33 * Conversion of flat register files to a stacked register
34 model.
36 * Dead load/store elimination.
38 These routines accept as input:
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
52 #include "config.h"
53 #include "system.h"
54 #include "rtl.h"
55 #include "regs.h"
56 #include "hard-reg-set.h"
57 #include "flags.h"
58 #include "real.h"
59 #include "insn-config.h"
60 #include "recog.h"
61 #include "basic-block.h"
62 #include "tm_p.h"
64 /* We want target macros for the mode switching code to be able to refer
65 to instruction attribute values. */
66 #include "insn-attr.h"
68 /* Edge based LCM routines. */
69 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
70 sbitmap *, sbitmap *));
71 static void compute_earliest PARAMS ((struct edge_list *, int,
72 sbitmap *, sbitmap *,
73 sbitmap *, sbitmap *,
74 sbitmap *));
75 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
76 sbitmap *, sbitmap *,
77 sbitmap *));
78 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
79 sbitmap *, sbitmap *,
80 sbitmap *, sbitmap *,
81 sbitmap *));
83 /* Edge based LCM routines on a reverse flowgraph. */
84 static void compute_farthest PARAMS ((struct edge_list *, int,
85 sbitmap *, sbitmap *,
86 sbitmap*, sbitmap *,
87 sbitmap *));
88 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
89 sbitmap *, sbitmap *,
90 sbitmap *));
91 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
92 sbitmap *, sbitmap *,
93 sbitmap *, sbitmap *,
94 sbitmap *));
96 /* Edge based lcm routines. */
98 /* Compute expression anticipatability at entrance and exit of each block.
99 This is done based on the flow graph, and not on the pred-succ lists.
100 Other than that, its pretty much identical to compute_antinout. */
102 static void
103 compute_antinout_edge (antloc, transp, antin, antout)
104 sbitmap *antloc;
105 sbitmap *transp;
106 sbitmap *antin;
107 sbitmap *antout;
109 int bb;
110 edge e;
111 basic_block *worklist, *qin, *qout, *qend;
112 unsigned int qlen;
114 /* Allocate a worklist array/queue. Entries are only added to the
115 list if they were not already on the list. So the size is
116 bounded by the number of basic blocks. */
117 qin = qout = worklist
118 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
120 /* We want a maximal solution, so make an optimistic initialization of
121 ANTIN. */
122 sbitmap_vector_ones (antin, n_basic_blocks);
124 /* Put every block on the worklist; this is necessary because of the
125 optimistic initialization of ANTIN above. */
126 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
128 *qin++ = BASIC_BLOCK (bb);
129 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
132 qin = worklist;
133 qend = &worklist[n_basic_blocks];
134 qlen = n_basic_blocks;
136 /* Mark blocks which are predecessors of the exit block so that we
137 can easily identify them below. */
138 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
139 e->src->aux = EXIT_BLOCK_PTR;
141 /* Iterate until the worklist is empty. */
142 while (qlen)
144 /* Take the first entry off the worklist. */
145 basic_block b = *qout++;
146 bb = b->index;
147 qlen--;
149 if (qout >= qend)
150 qout = worklist;
152 if (b->aux == EXIT_BLOCK_PTR)
153 /* Do not clear the aux field for blocks which are predecessors of
154 the EXIT block. That way we never add then to the worklist
155 again. */
156 sbitmap_zero (antout[bb]);
157 else
159 /* Clear the aux field of this block so that it can be added to
160 the worklist again if necessary. */
161 b->aux = NULL;
162 sbitmap_intersection_of_succs (antout[bb], antin, bb);
165 if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], transp[bb], antout[bb]))
166 /* If the in state of this block changed, then we need
167 to add the predecessors of this block to the worklist
168 if they are not already on the worklist. */
169 for (e = b->pred; e; e = e->pred_next)
170 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
172 *qin++ = e->src;
173 e->src->aux = e;
174 qlen++;
175 if (qin >= qend)
176 qin = worklist;
180 free (worklist);
183 /* Compute the earliest vector for edge based lcm. */
185 static void
186 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
187 struct edge_list *edge_list;
188 int n_exprs;
189 sbitmap *antin, *antout, *avout, *kill, *earliest;
191 sbitmap difference, temp_bitmap;
192 int x, num_edges;
193 basic_block pred, succ;
195 num_edges = NUM_EDGES (edge_list);
197 difference = sbitmap_alloc (n_exprs);
198 temp_bitmap = sbitmap_alloc (n_exprs);
200 for (x = 0; x < num_edges; x++)
202 pred = INDEX_EDGE_PRED_BB (edge_list, x);
203 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
204 if (pred == ENTRY_BLOCK_PTR)
205 sbitmap_copy (earliest[x], antin[succ->index]);
206 else
208 /* We refer to the EXIT_BLOCK index, instead of testing for
209 EXIT_BLOCK_PTR, so that EXIT_BLOCK_PTR's index can be
210 changed so as to pretend it's a regular block, so that
211 its antin can be taken into account. */
212 if (succ->index == EXIT_BLOCK)
213 sbitmap_zero (earliest[x]);
214 else
216 sbitmap_difference (difference, antin[succ->index],
217 avout[pred->index]);
218 sbitmap_not (temp_bitmap, antout[pred->index]);
219 sbitmap_a_and_b_or_c (earliest[x], difference,
220 kill[pred->index], temp_bitmap);
225 free (temp_bitmap);
226 free (difference);
229 /* later(p,s) is dependent on the calculation of laterin(p).
230 laterin(p) is dependent on the calculation of later(p2,p).
232 laterin(ENTRY) is defined as all 0's
233 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
234 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
236 If we progress in this manner, starting with all basic blocks
237 in the work list, anytime we change later(bb), we need to add
238 succs(bb) to the worklist if they are not already on the worklist.
240 Boundary conditions:
242 We prime the worklist all the normal basic blocks. The ENTRY block can
243 never be added to the worklist since it is never the successor of any
244 block. We explicitly prevent the EXIT block from being added to the
245 worklist.
247 We optimistically initialize LATER. That is the only time this routine
248 will compute LATER for an edge out of the entry block since the entry
249 block is never on the worklist. Thus, LATERIN is neither used nor
250 computed for the ENTRY block.
252 Since the EXIT block is never added to the worklist, we will neither
253 use nor compute LATERIN for the exit block. Edges which reach the
254 EXIT block are handled in the normal fashion inside the loop. However,
255 the insertion/deletion computation needs LATERIN(EXIT), so we have
256 to compute it. */
258 static void
259 compute_laterin (edge_list, earliest, antloc, later, laterin)
260 struct edge_list *edge_list;
261 sbitmap *earliest, *antloc, *later, *laterin;
263 int bb, num_edges, i;
264 edge e;
265 basic_block *worklist, *qin, *qout, *qend;
266 unsigned int qlen;
268 num_edges = NUM_EDGES (edge_list);
270 /* Allocate a worklist array/queue. Entries are only added to the
271 list if they were not already on the list. So the size is
272 bounded by the number of basic blocks. */
273 qin = qout = worklist
274 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
276 /* Initialize a mapping from each edge to its index. */
277 for (i = 0; i < num_edges; i++)
278 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
280 /* We want a maximal solution, so initially consider LATER true for
281 all edges. This allows propagation through a loop since the incoming
282 loop edge will have LATER set, so if all the other incoming edges
283 to the loop are set, then LATERIN will be set for the head of the
284 loop.
286 If the optimistic setting of LATER on that edge was incorrect (for
287 example the expression is ANTLOC in a block within the loop) then
288 this algorithm will detect it when we process the block at the head
289 of the optimistic edge. That will requeue the affected blocks. */
290 sbitmap_vector_ones (later, num_edges);
292 /* Note that even though we want an optimistic setting of LATER, we
293 do not want to be overly optimistic. Consider an outgoing edge from
294 the entry block. That edge should always have a LATER value the
295 same as EARLIEST for that edge. */
296 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
297 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
299 /* Add all the blocks to the worklist. This prevents an early exit from
300 the loop given our optimistic initialization of LATER above. */
301 for (bb = 0; bb < n_basic_blocks; bb++)
303 basic_block b = BASIC_BLOCK (bb);
304 *qin++ = b;
305 b->aux = b;
307 qin = worklist;
308 /* Note that we do not use the last allocated element for our queue,
309 as EXIT_BLOCK is never inserted into it. In fact the above allocation
310 of n_basic_blocks + 1 elements is not encessary. */
311 qend = &worklist[n_basic_blocks];
312 qlen = n_basic_blocks;
314 /* Iterate until the worklist is empty. */
315 while (qlen)
317 /* Take the first entry off the worklist. */
318 basic_block b = *qout++;
319 b->aux = NULL;
320 qlen--;
321 if (qout >= qend)
322 qout = worklist;
324 /* Compute the intersection of LATERIN for each incoming edge to B. */
325 bb = b->index;
326 sbitmap_ones (laterin[bb]);
327 for (e = b->pred; e != NULL; e = e->pred_next)
328 sbitmap_a_and_b (laterin[bb], laterin[bb], later[(size_t)e->aux]);
330 /* Calculate LATER for all outgoing edges. */
331 for (e = b->succ; e != NULL; e = e->succ_next)
332 if (sbitmap_union_of_diff (later[(size_t) e->aux],
333 earliest[(size_t) e->aux],
334 laterin[e->src->index],
335 antloc[e->src->index])
336 /* If LATER for an outgoing edge was changed, then we need
337 to add the target of the outgoing edge to the worklist. */
338 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
340 *qin++ = e->dest;
341 e->dest->aux = e;
342 qlen++;
343 if (qin >= qend)
344 qin = worklist;
348 /* Computation of insertion and deletion points requires computing LATERIN
349 for the EXIT block. We allocated an extra entry in the LATERIN array
350 for just this purpose. */
351 sbitmap_ones (laterin[n_basic_blocks]);
352 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
353 sbitmap_a_and_b (laterin[n_basic_blocks],
354 laterin[n_basic_blocks],
355 later[(size_t) e->aux]);
357 free (worklist);
360 /* Compute the insertion and deletion points for edge based LCM. */
362 static void
363 compute_insert_delete (edge_list, antloc, later, laterin,
364 insert, delete)
365 struct edge_list *edge_list;
366 sbitmap *antloc, *later, *laterin, *insert, *delete;
368 int x;
370 for (x = 0; x < n_basic_blocks; x++)
371 sbitmap_difference (delete[x], antloc[x], laterin[x]);
373 for (x = 0; x < NUM_EDGES (edge_list); x++)
375 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
377 if (b == EXIT_BLOCK_PTR)
378 sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]);
379 else
380 sbitmap_difference (insert[x], later[x], laterin[b->index]);
384 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
385 delete vectors for edge based LCM. Returns an edgelist which is used to
386 map the insert vector to what edge an expression should be inserted on. */
388 struct edge_list *
389 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
390 FILE *file ATTRIBUTE_UNUSED;
391 int n_exprs;
392 sbitmap *transp;
393 sbitmap *avloc;
394 sbitmap *antloc;
395 sbitmap *kill;
396 sbitmap **insert;
397 sbitmap **delete;
399 sbitmap *antin, *antout, *earliest;
400 sbitmap *avin, *avout;
401 sbitmap *later, *laterin;
402 struct edge_list *edge_list;
403 int num_edges;
405 edge_list = create_edge_list ();
406 num_edges = NUM_EDGES (edge_list);
408 #ifdef LCM_DEBUG_INFO
409 if (file)
411 fprintf (file, "Edge List:\n");
412 verify_edge_list (file, edge_list);
413 print_edge_list (file, edge_list);
414 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
415 dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks);
416 dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks);
417 dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks);
419 #endif
421 /* Compute global availability. */
422 avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
423 avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
424 compute_available (avloc, kill, avout, avin);
425 free (avin);
427 /* Compute global anticipatability. */
428 antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
429 antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
430 compute_antinout_edge (antloc, transp, antin, antout);
432 #ifdef LCM_DEBUG_INFO
433 if (file)
435 dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks);
436 dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks);
438 #endif
440 /* Compute earliestness. */
441 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
442 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
444 #ifdef LCM_DEBUG_INFO
445 if (file)
446 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
447 #endif
449 free (antout);
450 free (antin);
451 free (avout);
453 later = sbitmap_vector_alloc (num_edges, n_exprs);
455 /* Allocate an extra element for the exit block in the laterin vector. */
456 laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
457 compute_laterin (edge_list, earliest, antloc, later, laterin);
459 #ifdef LCM_DEBUG_INFO
460 if (file)
462 dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1);
463 dump_sbitmap_vector (file, "later", "", later, num_edges);
465 #endif
467 free (earliest);
469 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
470 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
471 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
473 free (laterin);
474 free (later);
476 #ifdef LCM_DEBUG_INFO
477 if (file)
479 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
480 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
481 n_basic_blocks);
483 #endif
485 return edge_list;
488 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
489 Return the number of passes we performed to iterate to a solution. */
491 void
492 compute_available (avloc, kill, avout, avin)
493 sbitmap *avloc, *kill, *avout, *avin;
495 int bb;
496 edge e;
497 basic_block *worklist, *qin, *qout, *qend;
498 unsigned int qlen;
500 /* Allocate a worklist array/queue. Entries are only added to the
501 list if they were not already on the list. So the size is
502 bounded by the number of basic blocks. */
503 qin = qout = worklist
504 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
506 /* We want a maximal solution. */
507 sbitmap_vector_ones (avout, n_basic_blocks);
509 /* Put every block on the worklist; this is necessary because of the
510 optimistic initialization of AVOUT above. */
511 for (bb = 0; bb < n_basic_blocks; bb++)
513 *qin++ = BASIC_BLOCK (bb);
514 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
517 qin = worklist;
518 qend = &worklist[n_basic_blocks];
519 qlen = n_basic_blocks;
521 /* Mark blocks which are successors of the entry block so that we
522 can easily identify them below. */
523 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
524 e->dest->aux = ENTRY_BLOCK_PTR;
526 /* Iterate until the worklist is empty. */
527 while (qlen)
529 /* Take the first entry off the worklist. */
530 basic_block b = *qout++;
531 bb = b->index;
532 qlen--;
534 if (qout >= qend)
535 qout = worklist;
537 /* If one of the predecessor blocks is the ENTRY block, then the
538 intersection of avouts is the null set. We can identify such blocks
539 by the special value in the AUX field in the block structure. */
540 if (b->aux == ENTRY_BLOCK_PTR)
541 /* Do not clear the aux field for blocks which are successors of the
542 ENTRY block. That way we never add then to the worklist again. */
543 sbitmap_zero (avin[bb]);
544 else
546 /* Clear the aux field of this block so that it can be added to
547 the worklist again if necessary. */
548 b->aux = NULL;
549 sbitmap_intersection_of_preds (avin[bb], avout, bb);
552 if (sbitmap_union_of_diff (avout[bb], avloc[bb], avin[bb], kill[bb]))
553 /* If the out state of this block changed, then we need
554 to add the successors of this block to the worklist
555 if they are not already on the worklist. */
556 for (e = b->succ; e; e = e->succ_next)
557 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
559 *qin++ = e->dest;
560 e->dest->aux = e;
561 qlen++;
563 if (qin >= qend)
564 qin = worklist;
568 free (worklist);
571 /* Compute the farthest vector for edge based lcm. */
573 static void
574 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
575 kill, farthest)
576 struct edge_list *edge_list;
577 int n_exprs;
578 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
580 sbitmap difference, temp_bitmap;
581 int x, num_edges;
582 basic_block pred, succ;
584 num_edges = NUM_EDGES (edge_list);
586 difference = sbitmap_alloc (n_exprs);
587 temp_bitmap = sbitmap_alloc (n_exprs);
589 for (x = 0; x < num_edges; x++)
591 pred = INDEX_EDGE_PRED_BB (edge_list, x);
592 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
593 if (succ == EXIT_BLOCK_PTR)
594 sbitmap_copy (farthest[x], st_avout[pred->index]);
595 else
597 if (pred == ENTRY_BLOCK_PTR)
598 sbitmap_zero (farthest[x]);
599 else
601 sbitmap_difference (difference, st_avout[pred->index],
602 st_antin[succ->index]);
603 sbitmap_not (temp_bitmap, st_avin[succ->index]);
604 sbitmap_a_and_b_or_c (farthest[x], difference,
605 kill[succ->index], temp_bitmap);
610 free (temp_bitmap);
611 free (difference);
614 /* Compute nearer and nearerout vectors for edge based lcm.
616 This is the mirror of compute_laterin, additional comments on the
617 implementation can be found before compute_laterin. */
619 static void
620 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
621 struct edge_list *edge_list;
622 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
624 int bb, num_edges, i;
625 edge e;
626 basic_block *worklist, *tos;
628 num_edges = NUM_EDGES (edge_list);
630 /* Allocate a worklist array/queue. Entries are only added to the
631 list if they were not already on the list. So the size is
632 bounded by the number of basic blocks. */
633 tos = worklist
634 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
636 /* Initialize NEARER for each edge and build a mapping from an edge to
637 its index. */
638 for (i = 0; i < num_edges; i++)
639 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
641 /* We want a maximal solution. */
642 sbitmap_vector_ones (nearer, num_edges);
644 /* Note that even though we want an optimistic setting of NEARER, we
645 do not want to be overly optimistic. Consider an incoming edge to
646 the exit block. That edge should always have a NEARER value the
647 same as FARTHEST for that edge. */
648 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
649 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
651 /* Add all the blocks to the worklist. This prevents an early exit
652 from the loop given our optimistic initialization of NEARER. */
653 for (bb = 0; bb < n_basic_blocks; bb++)
655 basic_block b = BASIC_BLOCK (bb);
656 *tos++ = b;
657 b->aux = b;
660 /* Iterate until the worklist is empty. */
661 while (tos != worklist)
663 /* Take the first entry off the worklist. */
664 basic_block b = *--tos;
665 b->aux = NULL;
667 /* Compute the intersection of NEARER for each outgoing edge from B. */
668 bb = b->index;
669 sbitmap_ones (nearerout[bb]);
670 for (e = b->succ; e != NULL; e = e->succ_next)
671 sbitmap_a_and_b (nearerout[bb], nearerout[bb],
672 nearer[(size_t) e->aux]);
674 /* Calculate NEARER for all incoming edges. */
675 for (e = b->pred; e != NULL; e = e->pred_next)
676 if (sbitmap_union_of_diff (nearer[(size_t) e->aux],
677 farthest[(size_t) e->aux],
678 nearerout[e->dest->index],
679 st_avloc[e->dest->index])
680 /* If NEARER for an incoming edge was changed, then we need
681 to add the source of the incoming edge to the worklist. */
682 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
684 *tos++ = e->src;
685 e->src->aux = e;
689 /* Computation of insertion and deletion points requires computing NEAREROUT
690 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
691 for just this purpose. */
692 sbitmap_ones (nearerout[n_basic_blocks]);
693 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
694 sbitmap_a_and_b (nearerout[n_basic_blocks],
695 nearerout[n_basic_blocks],
696 nearer[(size_t) e->aux]);
698 free (tos);
701 /* Compute the insertion and deletion points for edge based LCM. */
703 static void
704 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
705 insert, delete)
706 struct edge_list *edge_list;
707 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
709 int x;
711 for (x = 0; x < n_basic_blocks; x++)
712 sbitmap_difference (delete[x], st_avloc[x], nearerout[x]);
714 for (x = 0; x < NUM_EDGES (edge_list); x++)
716 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
717 if (b == ENTRY_BLOCK_PTR)
718 sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]);
719 else
720 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
724 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
725 insert and delete vectors for edge based reverse LCM. Returns an
726 edgelist which is used to map the insert vector to what edge
727 an expression should be inserted on. */
729 struct edge_list *
730 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
731 insert, delete)
732 FILE *file ATTRIBUTE_UNUSED;
733 int n_exprs;
734 sbitmap *transp;
735 sbitmap *st_avloc;
736 sbitmap *st_antloc;
737 sbitmap *kill;
738 sbitmap **insert;
739 sbitmap **delete;
741 sbitmap *st_antin, *st_antout;
742 sbitmap *st_avout, *st_avin, *farthest;
743 sbitmap *nearer, *nearerout;
744 struct edge_list *edge_list;
745 int num_edges;
747 edge_list = create_edge_list ();
748 num_edges = NUM_EDGES (edge_list);
750 st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
751 st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
752 sbitmap_vector_zero (st_antin, n_basic_blocks);
753 sbitmap_vector_zero (st_antout, n_basic_blocks);
754 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
756 /* Compute global anticipatability. */
757 st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
758 st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
759 compute_available (st_avloc, kill, st_avout, st_avin);
761 #ifdef LCM_DEBUG_INFO
762 if (file)
764 fprintf (file, "Edge List:\n");
765 verify_edge_list (file, edge_list);
766 print_edge_list (file, edge_list);
767 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
768 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks);
769 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks);
770 dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks);
771 dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks);
772 dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks);
774 #endif
776 #ifdef LCM_DEBUG_INFO
777 if (file)
779 dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks);
780 dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks);
782 #endif
784 /* Compute farthestness. */
785 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
786 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
787 kill, farthest);
789 #ifdef LCM_DEBUG_INFO
790 if (file)
791 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
792 #endif
794 free (st_avin);
795 free (st_avout);
797 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
799 /* Allocate an extra element for the entry block. */
800 nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
801 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
803 #ifdef LCM_DEBUG_INFO
804 if (file)
806 dump_sbitmap_vector (file, "nearerout", "", nearerout,
807 n_basic_blocks + 1);
808 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
810 #endif
812 free (farthest);
814 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
815 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
816 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
817 *insert, *delete);
819 free (nearerout);
820 free (nearer);
822 #ifdef LCM_DEBUG_INFO
823 if (file)
825 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
826 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
827 n_basic_blocks);
829 #endif
831 return edge_list;
834 /* Mode switching:
836 The algorithm for setting the modes consists of scanning the insn list
837 and finding all the insns which require a specific mode. Each insn gets
838 a unique struct seginfo element. These structures are inserted into a list
839 for each basic block. For each entity, there is an array of bb_info over
840 the flow graph basic blocks (local var 'bb_info'), and contains a list
841 of all insns within that basic block, in the order they are encountered.
843 For each entity, any basic block WITHOUT any insns requiring a specific
844 mode are given a single entry, without a mode. (Each basic block
845 in the flow graph must have at least one entry in the segment table.)
847 The LCM algorithm is then run over the flow graph to determine where to
848 place the sets to the highest-priority value in respect of first the first
849 insn in any one block. Any adjustments required to the transparancy
850 vectors are made, then the next iteration starts for the next-lower
851 priority mode, till for each entity all modes are exhasted.
853 More details are located in the code for optimize_mode_switching(). */
855 /* This structure contains the information for each insn which requires
856 either single or double mode to be set.
857 MODE is the mode this insn must be executed in.
858 INSN_PTR is the insn to be executed (may be the note that marks the
859 beginning of a basic block).
860 BBNUM is the flow graph basic block this insn occurs in.
861 NEXT is the next insn in the same basic block. */
862 struct seginfo
864 int mode;
865 rtx insn_ptr;
866 int bbnum;
867 struct seginfo *next;
868 HARD_REG_SET regs_live;
871 struct bb_info
873 struct seginfo *seginfo;
874 int computing;
877 /* These bitmaps are used for the LCM algorithm. */
879 #ifdef OPTIMIZE_MODE_SWITCHING
880 static sbitmap *antic;
881 static sbitmap *transp;
882 static sbitmap *comp;
883 static sbitmap *delete;
884 static sbitmap *insert;
886 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));
887 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
888 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
889 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
890 static void make_preds_opaque PARAMS ((basic_block, int));
891 #endif
893 #ifdef OPTIMIZE_MODE_SWITCHING
895 /* This function will allocate a new BBINFO structure, initialized
896 with the MODE, INSN, and basic block BB parameters. */
898 static struct seginfo *
899 new_seginfo (mode, insn, bb, regs_live)
900 int mode;
901 rtx insn;
902 int bb;
903 HARD_REG_SET regs_live;
905 struct seginfo *ptr;
906 ptr = xmalloc (sizeof (struct seginfo));
907 ptr->mode = mode;
908 ptr->insn_ptr = insn;
909 ptr->bbnum = bb;
910 ptr->next = NULL;
911 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
912 return ptr;
915 /* Add a seginfo element to the end of a list.
916 HEAD is a pointer to the list beginning.
917 INFO is the structure to be linked in. */
919 static void
920 add_seginfo (head, info)
921 struct bb_info *head;
922 struct seginfo *info;
924 struct seginfo *ptr;
926 if (head->seginfo == NULL)
927 head->seginfo = info;
928 else
930 ptr = head->seginfo;
931 while (ptr->next != NULL)
932 ptr = ptr->next;
933 ptr->next = info;
937 /* Make all predecessors of basic block B opaque, recursively, till we hit
938 some that are already non-transparent, or an edge where aux is set; that
939 denotes that a mode set is to be done on that edge.
940 J is the bit number in the bitmaps that corresponds to the entity that
941 we are currently handling mode-switching for. */
943 static void
944 make_preds_opaque (b, j)
945 basic_block b;
946 int j;
948 edge e;
950 for (e = b->pred; e; e = e->pred_next)
952 basic_block pb = e->src;
954 if (e->aux || ! TEST_BIT (transp[pb->index], j))
955 continue;
957 RESET_BIT (transp[pb->index], j);
958 make_preds_opaque (pb, j);
962 /* Record in LIVE that register REG died. */
964 static void
965 reg_dies (reg, live)
966 rtx reg;
967 HARD_REG_SET live;
969 int regno, nregs;
971 if (GET_CODE (reg) != REG)
972 return;
974 regno = REGNO (reg);
975 if (regno < FIRST_PSEUDO_REGISTER)
976 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
977 nregs--)
978 CLEAR_HARD_REG_BIT (live, regno + nregs);
981 /* Record in LIVE that register REG became live.
982 This is called via note_stores. */
984 static void
985 reg_becomes_live (reg, setter, live)
986 rtx reg;
987 rtx setter ATTRIBUTE_UNUSED;
988 void *live;
990 int regno, nregs;
992 if (GET_CODE (reg) == SUBREG)
993 reg = SUBREG_REG (reg);
995 if (GET_CODE (reg) != REG)
996 return;
998 regno = REGNO (reg);
999 if (regno < FIRST_PSEUDO_REGISTER)
1000 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
1001 nregs--)
1002 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
1005 /* Find all insns that need a particular mode setting, and insert the
1006 necessary mode switches. Return true if we did work. */
1009 optimize_mode_switching (file)
1010 FILE *file;
1012 rtx insn;
1013 int bb, e;
1014 edge eg;
1015 int need_commit = 0;
1016 sbitmap *kill;
1017 struct edge_list *edge_list;
1018 static int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1019 #define N_ENTITIES (sizeof num_modes / sizeof (int))
1020 int entity_map[N_ENTITIES];
1021 struct bb_info *bb_info[N_ENTITIES];
1022 int i, j;
1023 int n_entities;
1024 int max_num_modes = 0;
1026 #ifdef NORMAL_MODE
1027 /* Increment n_basic_blocks before allocating bb_info. */
1028 n_basic_blocks++;
1029 #endif
1031 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1032 if (OPTIMIZE_MODE_SWITCHING (e))
1034 /* Create the list of segments within each basic block. */
1035 bb_info[n_entities]
1036 = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info);
1037 entity_map[n_entities++] = e;
1038 if (num_modes[e] > max_num_modes)
1039 max_num_modes = num_modes[e];
1042 #ifdef NORMAL_MODE
1043 /* Decrement it back in case we return below. */
1044 n_basic_blocks--;
1045 #endif
1047 if (! n_entities)
1048 return 0;
1050 #ifdef NORMAL_MODE
1051 /* We're going to pretend the EXIT_BLOCK is a regular basic block,
1052 so that switching back to normal mode when entering the
1053 EXIT_BLOCK isn't optimized away. We do this by incrementing the
1054 basic block count, growing the VARRAY of basic_block_info and
1055 appending the EXIT_BLOCK_PTR to it. */
1056 n_basic_blocks++;
1057 if (VARRAY_SIZE (basic_block_info) < n_basic_blocks)
1058 VARRAY_GROW (basic_block_info, n_basic_blocks);
1059 BASIC_BLOCK (n_basic_blocks - 1) = EXIT_BLOCK_PTR;
1060 EXIT_BLOCK_PTR->index = n_basic_blocks - 1;
1061 #endif
1063 /* Create the bitmap vectors. */
1065 antic = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1066 transp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1067 comp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1069 sbitmap_vector_ones (transp, n_basic_blocks);
1071 for (j = n_entities - 1; j >= 0; j--)
1073 int e = entity_map[j];
1074 int no_mode = num_modes[e];
1075 struct bb_info *info = bb_info[j];
1077 /* Determine what the first use (if any) need for a mode of entity E is.
1078 This will be the mode that is anticipatable for this block.
1079 Also compute the initial transparency settings. */
1080 for (bb = 0 ; bb < n_basic_blocks; bb++)
1082 struct seginfo *ptr;
1083 int last_mode = no_mode;
1084 HARD_REG_SET live_now;
1086 REG_SET_TO_HARD_REG_SET (live_now,
1087 BASIC_BLOCK (bb)->global_live_at_start);
1088 for (insn = BLOCK_HEAD (bb);
1089 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
1090 insn = NEXT_INSN (insn))
1092 if (INSN_P (insn))
1094 int mode = MODE_NEEDED (e, insn);
1095 rtx link;
1097 if (mode != no_mode && mode != last_mode)
1099 last_mode = mode;
1100 ptr = new_seginfo (mode, insn, bb, live_now);
1101 add_seginfo (info + bb, ptr);
1102 RESET_BIT (transp[bb], j);
1105 /* Update LIVE_NOW. */
1106 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1107 if (REG_NOTE_KIND (link) == REG_DEAD)
1108 reg_dies (XEXP (link, 0), live_now);
1110 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1111 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1112 if (REG_NOTE_KIND (link) == REG_UNUSED)
1113 reg_dies (XEXP (link, 0), live_now);
1117 info[bb].computing = last_mode;
1118 /* Check for blocks without ANY mode requirements. */
1119 if (last_mode == no_mode)
1121 ptr = new_seginfo (no_mode, insn, bb, live_now);
1122 add_seginfo (info + bb, ptr);
1125 #ifdef NORMAL_MODE
1127 int mode = NORMAL_MODE (e);
1129 if (mode != no_mode)
1131 for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next)
1133 bb = eg->dest->index;
1135 /* By always making this nontransparent, we save
1136 an extra check in make_preds_opaque. We also
1137 need this to avoid confusing pre_edge_lcm when
1138 antic is cleared but transp and comp are set. */
1139 RESET_BIT (transp[bb], j);
1141 /* If the block already has MODE, pretend it
1142 has none (because we don't need to set it),
1143 but retain whatever mode it computes. */
1144 if (info[bb].seginfo->mode == mode)
1145 info[bb].seginfo->mode = no_mode;
1147 /* Insert a fake computing definition of MODE into entry
1148 blocks which compute no mode. This represents the mode on
1149 entry. */
1150 else if (info[bb].computing == no_mode)
1152 info[bb].computing = mode;
1153 info[bb].seginfo->mode = no_mode;
1157 bb = n_basic_blocks - 1;
1158 info[bb].seginfo->mode = mode;
1161 #endif /* NORMAL_MODE */
1164 kill = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1165 for (i = 0; i < max_num_modes; i++)
1167 int current_mode[N_ENTITIES];
1169 /* Set the anticipatable and computing arrays. */
1170 sbitmap_vector_zero (antic, n_basic_blocks);
1171 sbitmap_vector_zero (comp, n_basic_blocks);
1172 for (j = n_entities - 1; j >= 0; j--)
1174 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1175 struct bb_info *info = bb_info[j];
1177 for (bb = 0 ; bb < n_basic_blocks; bb++)
1179 if (info[bb].seginfo->mode == m)
1180 SET_BIT (antic[bb], j);
1182 if (info[bb].computing == m)
1183 SET_BIT (comp[bb], j);
1187 /* Calculate the optimal locations for the
1188 placement mode switches to modes with priority I. */
1190 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1191 sbitmap_not (kill[bb], transp[bb]);
1192 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1193 kill, &insert, &delete);
1195 for (j = n_entities - 1; j >= 0; j--)
1197 /* Insert all mode sets that have been inserted by lcm. */
1198 int no_mode = num_modes[entity_map[j]];
1200 /* Wherever we have moved a mode setting upwards in the flow graph,
1201 the blocks between the new setting site and the now redundant
1202 computation ceases to be transparent for any lower-priority
1203 mode of the same entity. First set the aux field of each
1204 insertion site edge non-transparent, then propagate the new
1205 non-transparency from the redundant computation upwards till
1206 we hit an insertion site or an already non-transparent block. */
1207 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1209 edge eg = INDEX_EDGE (edge_list, e);
1210 int mode;
1211 basic_block src_bb;
1212 HARD_REG_SET live_at_edge;
1213 rtx mode_set;
1215 eg->aux = 0;
1217 if (! TEST_BIT (insert[e], j))
1218 continue;
1220 eg->aux = (void *)1;
1222 mode = current_mode[j];
1223 src_bb = eg->src;
1225 REG_SET_TO_HARD_REG_SET (live_at_edge,
1226 src_bb->global_live_at_end);
1228 start_sequence ();
1229 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1230 mode_set = gen_sequence ();
1231 end_sequence ();
1233 /* If this is an abnormal edge, we'll insert at the end
1234 of the previous block. */
1235 if (eg->flags & EDGE_ABNORMAL)
1237 if (GET_CODE (src_bb->end) == JUMP_INSN)
1238 emit_insn_before (mode_set, src_bb->end);
1239 /* It doesn't make sense to switch to normal mode
1240 after a CALL_INSN, so we're going to abort if we
1241 find one. The cases in which a CALL_INSN may
1242 have an abnormal edge are sibcalls and EH edges.
1243 In the case of sibcalls, the dest basic-block is
1244 the EXIT_BLOCK, that runs in normal mode; it is
1245 assumed that a sibcall insn requires normal mode
1246 itself, so no mode switch would be required after
1247 the call (it wouldn't make sense, anyway). In
1248 the case of EH edges, EH entry points also start
1249 in normal mode, so a similar reasoning applies. */
1250 else if (GET_CODE (src_bb->end) == INSN)
1251 src_bb->end = emit_insn_after (mode_set, src_bb->end);
1252 else
1253 abort ();
1254 bb_info[j][src_bb->index].computing = mode;
1255 RESET_BIT (transp[src_bb->index], j);
1257 else
1259 need_commit = 1;
1260 insert_insn_on_edge (mode_set, eg);
1264 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1265 if (TEST_BIT (delete[bb], j))
1267 make_preds_opaque (BASIC_BLOCK (bb), j);
1268 /* Cancel the 'deleted' mode set. */
1269 bb_info[j][bb].seginfo->mode = no_mode;
1273 free_edge_list (edge_list);
1276 #ifdef NORMAL_MODE
1277 /* Restore the special status of EXIT_BLOCK. */
1278 n_basic_blocks--;
1279 VARRAY_POP (basic_block_info);
1280 EXIT_BLOCK_PTR->index = EXIT_BLOCK;
1281 #endif
1283 /* Now output the remaining mode sets in all the segments. */
1284 for (j = n_entities - 1; j >= 0; j--)
1286 int no_mode = num_modes[entity_map[j]];
1288 #ifdef NORMAL_MODE
1289 if (bb_info[j][n_basic_blocks].seginfo->mode != no_mode)
1291 edge eg;
1292 struct seginfo *ptr = bb_info[j][n_basic_blocks].seginfo;
1294 for (eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1296 rtx mode_set;
1298 if (bb_info[j][eg->src->index].computing == ptr->mode)
1299 continue;
1301 start_sequence ();
1302 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1303 mode_set = gen_sequence ();
1304 end_sequence ();
1306 /* If this is an abnormal edge, we'll insert at the end of the
1307 previous block. */
1308 if (eg->flags & EDGE_ABNORMAL)
1310 if (GET_CODE (eg->src->end) == JUMP_INSN)
1311 emit_insn_before (mode_set, eg->src->end);
1312 else if (GET_CODE (eg->src->end) == INSN)
1313 eg->src->end = emit_insn_after (mode_set, eg->src->end);
1314 else
1315 abort ();
1317 else
1319 need_commit = 1;
1320 insert_insn_on_edge (mode_set, eg);
1325 #endif
1327 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1329 struct seginfo *ptr, *next;
1330 for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next)
1332 next = ptr->next;
1333 if (ptr->mode != no_mode)
1335 rtx mode_set;
1337 start_sequence ();
1338 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1339 mode_set = gen_sequence ();
1340 end_sequence ();
1342 if (GET_CODE (ptr->insn_ptr) == NOTE
1343 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1344 == NOTE_INSN_BASIC_BLOCK))
1345 emit_block_insn_after (mode_set, ptr->insn_ptr,
1346 BASIC_BLOCK (ptr->bbnum));
1347 else
1348 emit_block_insn_before (mode_set, ptr->insn_ptr,
1349 BASIC_BLOCK (ptr->bbnum));
1352 free (ptr);
1356 free (bb_info[j]);
1359 /* Finished. Free up all the things we've allocated. */
1361 sbitmap_vector_free (kill);
1362 sbitmap_vector_free (antic);
1363 sbitmap_vector_free (transp);
1364 sbitmap_vector_free (comp);
1365 sbitmap_vector_free (delete);
1366 sbitmap_vector_free (insert);
1368 if (need_commit)
1369 commit_edge_insertions ();
1371 /* Ideally we'd figure out what blocks were affected and start from
1372 there, but this is enormously complicated by commit_edge_insertions,
1373 which would screw up any indicies we'd collected, and also need to
1374 be involved in the update. Bail and recompute global life info for
1375 everything. */
1377 allocate_reg_life_data ();
1378 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
1379 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1380 | PROP_SCAN_DEAD_CODE | PROP_REG_INFO));
1382 return 1;
1384 #endif /* OPTIMIZE_MODE_SWITCHING */