* Makefile.in (rtlanal.o): Depend on $(TM_P_H).
[official-gcc.git] / gcc / lcm.c
blob62572b8c9d6e10f5e4f3d5ce48144136c7c47f8e
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
25 * Traditional partial redundancy elimination.
27 * Placement of caller/caller register save/restores.
29 * Load/store motion.
31 * Copy motion.
33 * Conversion of flat register files to a stacked register
34 model.
36 * Dead load/store elimination.
38 These routines accept as input:
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
52 #include "config.h"
53 #include "system.h"
54 #include "rtl.h"
55 #include "regs.h"
56 #include "hard-reg-set.h"
57 #include "flags.h"
58 #include "real.h"
59 #include "insn-config.h"
60 #include "recog.h"
61 #include "basic-block.h"
62 #include "tm_p.h"
64 /* We want target macros for the mode switching code to be able to refer
65 to instruction attribute values. */
66 #include "insn-attr.h"
68 /* Edge based LCM routines. */
69 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
70 sbitmap *, sbitmap *));
71 static void compute_earliest PARAMS ((struct edge_list *, int,
72 sbitmap *, sbitmap *,
73 sbitmap *, sbitmap *,
74 sbitmap *));
75 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
76 sbitmap *, sbitmap *,
77 sbitmap *));
78 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
79 sbitmap *, sbitmap *,
80 sbitmap *, sbitmap *,
81 sbitmap *));
83 /* Edge based LCM routines on a reverse flowgraph. */
84 static void compute_farthest PARAMS ((struct edge_list *, int,
85 sbitmap *, sbitmap *,
86 sbitmap*, sbitmap *,
87 sbitmap *));
88 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
89 sbitmap *, sbitmap *,
90 sbitmap *));
91 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
92 sbitmap *, sbitmap *,
93 sbitmap *, sbitmap *,
94 sbitmap *));
96 /* Edge based lcm routines. */
98 /* Compute expression anticipatability at entrance and exit of each block.
99 This is done based on the flow graph, and not on the pred-succ lists.
100 Other than that, its pretty much identical to compute_antinout. */
102 static void
103 compute_antinout_edge (antloc, transp, antin, antout)
104 sbitmap *antloc;
105 sbitmap *transp;
106 sbitmap *antin;
107 sbitmap *antout;
109 int bb;
110 edge e;
111 basic_block *worklist, *qin, *qout, *qend;
112 unsigned int qlen;
114 /* Allocate a worklist array/queue. Entries are only added to the
115 list if they were not already on the list. So the size is
116 bounded by the number of basic blocks. */
117 qin = qout = worklist
118 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
120 /* We want a maximal solution, so make an optimistic initialization of
121 ANTIN. */
122 sbitmap_vector_ones (antin, n_basic_blocks);
124 /* Put every block on the worklist; this is necessary because of the
125 optimistic initialization of ANTIN above. */
126 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
128 *qin++ = BASIC_BLOCK (bb);
129 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
132 qin = worklist;
133 qend = &worklist[n_basic_blocks];
134 qlen = n_basic_blocks;
136 /* Mark blocks which are predecessors of the exit block so that we
137 can easily identify them below. */
138 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
139 e->src->aux = EXIT_BLOCK_PTR;
141 /* Iterate until the worklist is empty. */
142 while (qlen)
144 /* Take the first entry off the worklist. */
145 basic_block b = *qout++;
146 bb = b->index;
147 qlen--;
149 if (qout >= qend)
150 qout = worklist;
152 if (b->aux == EXIT_BLOCK_PTR)
153 /* Do not clear the aux field for blocks which are predecessors of
154 the EXIT block. That way we never add then to the worklist
155 again. */
156 sbitmap_zero (antout[bb]);
157 else
159 /* Clear the aux field of this block so that it can be added to
160 the worklist again if necessary. */
161 b->aux = NULL;
162 sbitmap_intersection_of_succs (antout[bb], antin, bb);
165 if (sbitmap_a_or_b_and_c (antin[bb], antloc[bb], transp[bb], antout[bb]))
166 /* If the in state of this block changed, then we need
167 to add the predecessors of this block to the worklist
168 if they are not already on the worklist. */
169 for (e = b->pred; e; e = e->pred_next)
170 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
172 *qin++ = e->src;
173 e->src->aux = e;
174 qlen++;
175 if (qin >= qend)
176 qin = worklist;
180 free (worklist);
183 /* Compute the earliest vector for edge based lcm. */
185 static void
186 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
187 struct edge_list *edge_list;
188 int n_exprs;
189 sbitmap *antin, *antout, *avout, *kill, *earliest;
191 sbitmap difference, temp_bitmap;
192 int x, num_edges;
193 basic_block pred, succ;
195 num_edges = NUM_EDGES (edge_list);
197 difference = sbitmap_alloc (n_exprs);
198 temp_bitmap = sbitmap_alloc (n_exprs);
200 for (x = 0; x < num_edges; x++)
202 pred = INDEX_EDGE_PRED_BB (edge_list, x);
203 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
204 if (pred == ENTRY_BLOCK_PTR)
205 sbitmap_copy (earliest[x], antin[succ->index]);
206 else
208 /* We refer to the EXIT_BLOCK index, instead of testing for
209 EXIT_BLOCK_PTR, so that EXIT_BLOCK_PTR's index can be
210 changed so as to pretend it's a regular block, so that
211 its antin can be taken into account. */
212 if (succ->index == EXIT_BLOCK)
213 sbitmap_zero (earliest[x]);
214 else
216 sbitmap_difference (difference, antin[succ->index],
217 avout[pred->index]);
218 sbitmap_not (temp_bitmap, antout[pred->index]);
219 sbitmap_a_and_b_or_c (earliest[x], difference,
220 kill[pred->index], temp_bitmap);
225 free (temp_bitmap);
226 free (difference);
229 /* later(p,s) is dependent on the calculation of laterin(p).
230 laterin(p) is dependent on the calculation of later(p2,p).
232 laterin(ENTRY) is defined as all 0's
233 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
234 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
236 If we progress in this manner, starting with all basic blocks
237 in the work list, anytime we change later(bb), we need to add
238 succs(bb) to the worklist if they are not already on the worklist.
240 Boundary conditions:
242 We prime the worklist all the normal basic blocks. The ENTRY block can
243 never be added to the worklist since it is never the successor of any
244 block. We explicitly prevent the EXIT block from being added to the
245 worklist.
247 We optimistically initialize LATER. That is the only time this routine
248 will compute LATER for an edge out of the entry block since the entry
249 block is never on the worklist. Thus, LATERIN is neither used nor
250 computed for the ENTRY block.
252 Since the EXIT block is never added to the worklist, we will neither
253 use nor compute LATERIN for the exit block. Edges which reach the
254 EXIT block are handled in the normal fashion inside the loop. However,
255 the insertion/deletion computation needs LATERIN(EXIT), so we have
256 to compute it. */
258 static void
259 compute_laterin (edge_list, earliest, antloc, later, laterin)
260 struct edge_list *edge_list;
261 sbitmap *earliest, *antloc, *later, *laterin;
263 int bb, num_edges, i;
264 edge e;
265 basic_block *worklist, *qin, *qout, *qend;
266 unsigned int qlen;
268 num_edges = NUM_EDGES (edge_list);
270 /* Allocate a worklist array/queue. Entries are only added to the
271 list if they were not already on the list. So the size is
272 bounded by the number of basic blocks. */
273 qin = qout = worklist
274 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
276 /* Initialize a mapping from each edge to its index. */
277 for (i = 0; i < num_edges; i++)
278 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
280 /* We want a maximal solution, so initially consider LATER true for
281 all edges. This allows propagation through a loop since the incoming
282 loop edge will have LATER set, so if all the other incoming edges
283 to the loop are set, then LATERIN will be set for the head of the
284 loop.
286 If the optimistic setting of LATER on that edge was incorrect (for
287 example the expression is ANTLOC in a block within the loop) then
288 this algorithm will detect it when we process the block at the head
289 of the optimistic edge. That will requeue the affected blocks. */
290 sbitmap_vector_ones (later, num_edges);
292 /* Note that even though we want an optimistic setting of LATER, we
293 do not want to be overly optimistic. Consider an outgoing edge from
294 the entry block. That edge should always have a LATER value the
295 same as EARLIEST for that edge. */
296 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
297 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
299 /* Add all the blocks to the worklist. This prevents an early exit from
300 the loop given our optimistic initialization of LATER above. */
301 for (bb = 0; bb < n_basic_blocks; bb++)
303 basic_block b = BASIC_BLOCK (bb);
304 *qin++ = b;
305 b->aux = b;
307 qin = worklist;
308 /* Note that we do not use the last allocated element for our queue,
309 as EXIT_BLOCK is never inserted into it. In fact the above allocation
310 of n_basic_blocks + 1 elements is not encessary. */
311 qend = &worklist[n_basic_blocks];
312 qlen = n_basic_blocks;
314 /* Iterate until the worklist is empty. */
315 while (qlen)
317 /* Take the first entry off the worklist. */
318 basic_block b = *qout++;
319 b->aux = NULL;
320 qlen--;
321 if (qout >= qend)
322 qout = worklist;
324 /* Compute the intersection of LATERIN for each incoming edge to B. */
325 bb = b->index;
326 sbitmap_ones (laterin[bb]);
327 for (e = b->pred; e != NULL; e = e->pred_next)
328 sbitmap_a_and_b (laterin[bb], laterin[bb], later[(size_t)e->aux]);
330 /* Calculate LATER for all outgoing edges. */
331 for (e = b->succ; e != NULL; e = e->succ_next)
332 if (sbitmap_union_of_diff (later[(size_t) e->aux],
333 earliest[(size_t) e->aux],
334 laterin[e->src->index],
335 antloc[e->src->index])
336 /* If LATER for an outgoing edge was changed, then we need
337 to add the target of the outgoing edge to the worklist. */
338 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
340 *qin++ = e->dest;
341 e->dest->aux = e;
342 qlen++;
343 if (qin >= qend)
344 qin = worklist;
348 /* Computation of insertion and deletion points requires computing LATERIN
349 for the EXIT block. We allocated an extra entry in the LATERIN array
350 for just this purpose. */
351 sbitmap_ones (laterin[n_basic_blocks]);
352 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
353 sbitmap_a_and_b (laterin[n_basic_blocks],
354 laterin[n_basic_blocks],
355 later[(size_t) e->aux]);
357 free (worklist);
360 /* Compute the insertion and deletion points for edge based LCM. */
362 static void
363 compute_insert_delete (edge_list, antloc, later, laterin,
364 insert, delete)
365 struct edge_list *edge_list;
366 sbitmap *antloc, *later, *laterin, *insert, *delete;
368 int x;
370 for (x = 0; x < n_basic_blocks; x++)
371 sbitmap_difference (delete[x], antloc[x], laterin[x]);
373 for (x = 0; x < NUM_EDGES (edge_list); x++)
375 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
377 if (b == EXIT_BLOCK_PTR)
378 sbitmap_difference (insert[x], later[x], laterin[n_basic_blocks]);
379 else
380 sbitmap_difference (insert[x], later[x], laterin[b->index]);
384 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
385 delete vectors for edge based LCM. Returns an edgelist which is used to
386 map the insert vector to what edge an expression should be inserted on. */
388 struct edge_list *
389 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
390 FILE *file ATTRIBUTE_UNUSED;
391 int n_exprs;
392 sbitmap *transp;
393 sbitmap *avloc;
394 sbitmap *antloc;
395 sbitmap *kill;
396 sbitmap **insert;
397 sbitmap **delete;
399 sbitmap *antin, *antout, *earliest;
400 sbitmap *avin, *avout;
401 sbitmap *later, *laterin;
402 struct edge_list *edge_list;
403 int num_edges;
405 edge_list = create_edge_list ();
406 num_edges = NUM_EDGES (edge_list);
408 #ifdef LCM_DEBUG_INFO
409 if (file)
411 fprintf (file, "Edge List:\n");
412 verify_edge_list (file, edge_list);
413 print_edge_list (file, edge_list);
414 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
415 dump_sbitmap_vector (file, "antloc", "", antloc, n_basic_blocks);
416 dump_sbitmap_vector (file, "avloc", "", avloc, n_basic_blocks);
417 dump_sbitmap_vector (file, "kill", "", kill, n_basic_blocks);
419 #endif
421 /* Compute global availability. */
422 avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
423 avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
424 compute_available (avloc, kill, avout, avin);
425 sbitmap_vector_free (avin);
427 /* Compute global anticipatability. */
428 antin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
429 antout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
430 compute_antinout_edge (antloc, transp, antin, antout);
432 #ifdef LCM_DEBUG_INFO
433 if (file)
435 dump_sbitmap_vector (file, "antin", "", antin, n_basic_blocks);
436 dump_sbitmap_vector (file, "antout", "", antout, n_basic_blocks);
438 #endif
440 /* Compute earliestness. */
441 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
442 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
444 #ifdef LCM_DEBUG_INFO
445 if (file)
446 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
447 #endif
449 sbitmap_vector_free (antout);
450 sbitmap_vector_free (antin);
451 sbitmap_vector_free (avout);
453 later = sbitmap_vector_alloc (num_edges, n_exprs);
455 /* Allocate an extra element for the exit block in the laterin vector. */
456 laterin = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
457 compute_laterin (edge_list, earliest, antloc, later, laterin);
459 #ifdef LCM_DEBUG_INFO
460 if (file)
462 dump_sbitmap_vector (file, "laterin", "", laterin, n_basic_blocks + 1);
463 dump_sbitmap_vector (file, "later", "", later, num_edges);
465 #endif
467 sbitmap_vector_free (earliest);
469 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
470 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
471 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
473 sbitmap_vector_free (laterin);
474 sbitmap_vector_free (later);
476 #ifdef LCM_DEBUG_INFO
477 if (file)
479 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
480 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
481 n_basic_blocks);
483 #endif
485 return edge_list;
488 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
489 Return the number of passes we performed to iterate to a solution. */
491 void
492 compute_available (avloc, kill, avout, avin)
493 sbitmap *avloc, *kill, *avout, *avin;
495 int bb;
496 edge e;
497 basic_block *worklist, *qin, *qout, *qend;
498 unsigned int qlen;
500 /* Allocate a worklist array/queue. Entries are only added to the
501 list if they were not already on the list. So the size is
502 bounded by the number of basic blocks. */
503 qin = qout = worklist
504 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
506 /* We want a maximal solution. */
507 sbitmap_vector_ones (avout, n_basic_blocks);
509 /* Put every block on the worklist; this is necessary because of the
510 optimistic initialization of AVOUT above. */
511 for (bb = 0; bb < n_basic_blocks; bb++)
513 *qin++ = BASIC_BLOCK (bb);
514 BASIC_BLOCK (bb)->aux = BASIC_BLOCK (bb);
517 qin = worklist;
518 qend = &worklist[n_basic_blocks];
519 qlen = n_basic_blocks;
521 /* Mark blocks which are successors of the entry block so that we
522 can easily identify them below. */
523 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
524 e->dest->aux = ENTRY_BLOCK_PTR;
526 /* Iterate until the worklist is empty. */
527 while (qlen)
529 /* Take the first entry off the worklist. */
530 basic_block b = *qout++;
531 bb = b->index;
532 qlen--;
534 if (qout >= qend)
535 qout = worklist;
537 /* If one of the predecessor blocks is the ENTRY block, then the
538 intersection of avouts is the null set. We can identify such blocks
539 by the special value in the AUX field in the block structure. */
540 if (b->aux == ENTRY_BLOCK_PTR)
541 /* Do not clear the aux field for blocks which are successors of the
542 ENTRY block. That way we never add then to the worklist again. */
543 sbitmap_zero (avin[bb]);
544 else
546 /* Clear the aux field of this block so that it can be added to
547 the worklist again if necessary. */
548 b->aux = NULL;
549 sbitmap_intersection_of_preds (avin[bb], avout, bb);
552 if (sbitmap_union_of_diff (avout[bb], avloc[bb], avin[bb], kill[bb]))
553 /* If the out state of this block changed, then we need
554 to add the successors of this block to the worklist
555 if they are not already on the worklist. */
556 for (e = b->succ; e; e = e->succ_next)
557 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
559 *qin++ = e->dest;
560 e->dest->aux = e;
561 qlen++;
563 if (qin >= qend)
564 qin = worklist;
568 free (worklist);
571 /* Compute the farthest vector for edge based lcm. */
573 static void
574 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
575 kill, farthest)
576 struct edge_list *edge_list;
577 int n_exprs;
578 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
580 sbitmap difference, temp_bitmap;
581 int x, num_edges;
582 basic_block pred, succ;
584 num_edges = NUM_EDGES (edge_list);
586 difference = sbitmap_alloc (n_exprs);
587 temp_bitmap = sbitmap_alloc (n_exprs);
589 for (x = 0; x < num_edges; x++)
591 pred = INDEX_EDGE_PRED_BB (edge_list, x);
592 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
593 if (succ == EXIT_BLOCK_PTR)
594 sbitmap_copy (farthest[x], st_avout[pred->index]);
595 else
597 if (pred == ENTRY_BLOCK_PTR)
598 sbitmap_zero (farthest[x]);
599 else
601 sbitmap_difference (difference, st_avout[pred->index],
602 st_antin[succ->index]);
603 sbitmap_not (temp_bitmap, st_avin[succ->index]);
604 sbitmap_a_and_b_or_c (farthest[x], difference,
605 kill[succ->index], temp_bitmap);
610 free (temp_bitmap);
611 free (difference);
614 /* Compute nearer and nearerout vectors for edge based lcm.
616 This is the mirror of compute_laterin, additional comments on the
617 implementation can be found before compute_laterin. */
619 static void
620 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
621 struct edge_list *edge_list;
622 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
624 int bb, num_edges, i;
625 edge e;
626 basic_block *worklist, *tos;
628 num_edges = NUM_EDGES (edge_list);
630 /* Allocate a worklist array/queue. Entries are only added to the
631 list if they were not already on the list. So the size is
632 bounded by the number of basic blocks. */
633 tos = worklist
634 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
636 /* Initialize NEARER for each edge and build a mapping from an edge to
637 its index. */
638 for (i = 0; i < num_edges; i++)
639 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
641 /* We want a maximal solution. */
642 sbitmap_vector_ones (nearer, num_edges);
644 /* Note that even though we want an optimistic setting of NEARER, we
645 do not want to be overly optimistic. Consider an incoming edge to
646 the exit block. That edge should always have a NEARER value the
647 same as FARTHEST for that edge. */
648 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
649 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
651 /* Add all the blocks to the worklist. This prevents an early exit
652 from the loop given our optimistic initialization of NEARER. */
653 for (bb = 0; bb < n_basic_blocks; bb++)
655 basic_block b = BASIC_BLOCK (bb);
656 *tos++ = b;
657 b->aux = b;
660 /* Iterate until the worklist is empty. */
661 while (tos != worklist)
663 /* Take the first entry off the worklist. */
664 basic_block b = *--tos;
665 b->aux = NULL;
667 /* Compute the intersection of NEARER for each outgoing edge from B. */
668 bb = b->index;
669 sbitmap_ones (nearerout[bb]);
670 for (e = b->succ; e != NULL; e = e->succ_next)
671 sbitmap_a_and_b (nearerout[bb], nearerout[bb],
672 nearer[(size_t) e->aux]);
674 /* Calculate NEARER for all incoming edges. */
675 for (e = b->pred; e != NULL; e = e->pred_next)
676 if (sbitmap_union_of_diff (nearer[(size_t) e->aux],
677 farthest[(size_t) e->aux],
678 nearerout[e->dest->index],
679 st_avloc[e->dest->index])
680 /* If NEARER for an incoming edge was changed, then we need
681 to add the source of the incoming edge to the worklist. */
682 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
684 *tos++ = e->src;
685 e->src->aux = e;
689 /* Computation of insertion and deletion points requires computing NEAREROUT
690 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
691 for just this purpose. */
692 sbitmap_ones (nearerout[n_basic_blocks]);
693 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
694 sbitmap_a_and_b (nearerout[n_basic_blocks],
695 nearerout[n_basic_blocks],
696 nearer[(size_t) e->aux]);
698 free (tos);
701 /* Compute the insertion and deletion points for edge based LCM. */
703 static void
704 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
705 insert, delete)
706 struct edge_list *edge_list;
707 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
709 int x;
711 for (x = 0; x < n_basic_blocks; x++)
712 sbitmap_difference (delete[x], st_avloc[x], nearerout[x]);
714 for (x = 0; x < NUM_EDGES (edge_list); x++)
716 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
717 if (b == ENTRY_BLOCK_PTR)
718 sbitmap_difference (insert[x], nearer[x], nearerout[n_basic_blocks]);
719 else
720 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
724 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
725 insert and delete vectors for edge based reverse LCM. Returns an
726 edgelist which is used to map the insert vector to what edge
727 an expression should be inserted on. */
729 struct edge_list *
730 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
731 insert, delete)
732 FILE *file ATTRIBUTE_UNUSED;
733 int n_exprs;
734 sbitmap *transp;
735 sbitmap *st_avloc;
736 sbitmap *st_antloc;
737 sbitmap *kill;
738 sbitmap **insert;
739 sbitmap **delete;
741 sbitmap *st_antin, *st_antout;
742 sbitmap *st_avout, *st_avin, *farthest;
743 sbitmap *nearer, *nearerout;
744 struct edge_list *edge_list;
745 int num_edges;
747 edge_list = create_edge_list ();
748 num_edges = NUM_EDGES (edge_list);
750 st_antin = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
751 st_antout = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, n_exprs);
752 sbitmap_vector_zero (st_antin, n_basic_blocks);
753 sbitmap_vector_zero (st_antout, n_basic_blocks);
754 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
756 /* Compute global anticipatability. */
757 st_avout = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
758 st_avin = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
759 compute_available (st_avloc, kill, st_avout, st_avin);
761 #ifdef LCM_DEBUG_INFO
762 if (file)
764 fprintf (file, "Edge List:\n");
765 verify_edge_list (file, edge_list);
766 print_edge_list (file, edge_list);
767 dump_sbitmap_vector (file, "transp", "", transp, n_basic_blocks);
768 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, n_basic_blocks);
769 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, n_basic_blocks);
770 dump_sbitmap_vector (file, "st_antin", "", st_antin, n_basic_blocks);
771 dump_sbitmap_vector (file, "st_antout", "", st_antout, n_basic_blocks);
772 dump_sbitmap_vector (file, "st_kill", "", kill, n_basic_blocks);
774 #endif
776 #ifdef LCM_DEBUG_INFO
777 if (file)
779 dump_sbitmap_vector (file, "st_avout", "", st_avout, n_basic_blocks);
780 dump_sbitmap_vector (file, "st_avin", "", st_avin, n_basic_blocks);
782 #endif
784 /* Compute farthestness. */
785 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
786 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
787 kill, farthest);
789 #ifdef LCM_DEBUG_INFO
790 if (file)
791 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
792 #endif
794 sbitmap_vector_free (st_antin);
795 sbitmap_vector_free (st_antout);
797 sbitmap_vector_free (st_avin);
798 sbitmap_vector_free (st_avout);
800 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
802 /* Allocate an extra element for the entry block. */
803 nearerout = sbitmap_vector_alloc (n_basic_blocks + 1, n_exprs);
804 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
806 #ifdef LCM_DEBUG_INFO
807 if (file)
809 dump_sbitmap_vector (file, "nearerout", "", nearerout,
810 n_basic_blocks + 1);
811 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
813 #endif
815 sbitmap_vector_free (farthest);
817 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
818 *delete = sbitmap_vector_alloc (n_basic_blocks, n_exprs);
819 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
820 *insert, *delete);
822 sbitmap_vector_free (nearerout);
823 sbitmap_vector_free (nearer);
825 #ifdef LCM_DEBUG_INFO
826 if (file)
828 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
829 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
830 n_basic_blocks);
832 #endif
833 return edge_list;
836 /* Mode switching:
838 The algorithm for setting the modes consists of scanning the insn list
839 and finding all the insns which require a specific mode. Each insn gets
840 a unique struct seginfo element. These structures are inserted into a list
841 for each basic block. For each entity, there is an array of bb_info over
842 the flow graph basic blocks (local var 'bb_info'), and contains a list
843 of all insns within that basic block, in the order they are encountered.
845 For each entity, any basic block WITHOUT any insns requiring a specific
846 mode are given a single entry, without a mode. (Each basic block
847 in the flow graph must have at least one entry in the segment table.)
849 The LCM algorithm is then run over the flow graph to determine where to
850 place the sets to the highest-priority value in respect of first the first
851 insn in any one block. Any adjustments required to the transparancy
852 vectors are made, then the next iteration starts for the next-lower
853 priority mode, till for each entity all modes are exhasted.
855 More details are located in the code for optimize_mode_switching(). */
857 /* This structure contains the information for each insn which requires
858 either single or double mode to be set.
859 MODE is the mode this insn must be executed in.
860 INSN_PTR is the insn to be executed (may be the note that marks the
861 beginning of a basic block).
862 BBNUM is the flow graph basic block this insn occurs in.
863 NEXT is the next insn in the same basic block. */
864 struct seginfo
866 int mode;
867 rtx insn_ptr;
868 int bbnum;
869 struct seginfo *next;
870 HARD_REG_SET regs_live;
873 struct bb_info
875 struct seginfo *seginfo;
876 int computing;
879 /* These bitmaps are used for the LCM algorithm. */
881 #ifdef OPTIMIZE_MODE_SWITCHING
882 static sbitmap *antic;
883 static sbitmap *transp;
884 static sbitmap *comp;
885 static sbitmap *delete;
886 static sbitmap *insert;
888 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));
889 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
890 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
891 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
892 static void make_preds_opaque PARAMS ((basic_block, int));
893 #endif
895 #ifdef OPTIMIZE_MODE_SWITCHING
897 /* This function will allocate a new BBINFO structure, initialized
898 with the MODE, INSN, and basic block BB parameters. */
900 static struct seginfo *
901 new_seginfo (mode, insn, bb, regs_live)
902 int mode;
903 rtx insn;
904 int bb;
905 HARD_REG_SET regs_live;
907 struct seginfo *ptr;
908 ptr = xmalloc (sizeof (struct seginfo));
909 ptr->mode = mode;
910 ptr->insn_ptr = insn;
911 ptr->bbnum = bb;
912 ptr->next = NULL;
913 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
914 return ptr;
917 /* Add a seginfo element to the end of a list.
918 HEAD is a pointer to the list beginning.
919 INFO is the structure to be linked in. */
921 static void
922 add_seginfo (head, info)
923 struct bb_info *head;
924 struct seginfo *info;
926 struct seginfo *ptr;
928 if (head->seginfo == NULL)
929 head->seginfo = info;
930 else
932 ptr = head->seginfo;
933 while (ptr->next != NULL)
934 ptr = ptr->next;
935 ptr->next = info;
939 /* Make all predecessors of basic block B opaque, recursively, till we hit
940 some that are already non-transparent, or an edge where aux is set; that
941 denotes that a mode set is to be done on that edge.
942 J is the bit number in the bitmaps that corresponds to the entity that
943 we are currently handling mode-switching for. */
945 static void
946 make_preds_opaque (b, j)
947 basic_block b;
948 int j;
950 edge e;
952 for (e = b->pred; e; e = e->pred_next)
954 basic_block pb = e->src;
956 if (e->aux || ! TEST_BIT (transp[pb->index], j))
957 continue;
959 RESET_BIT (transp[pb->index], j);
960 make_preds_opaque (pb, j);
964 /* Record in LIVE that register REG died. */
966 static void
967 reg_dies (reg, live)
968 rtx reg;
969 HARD_REG_SET live;
971 int regno, nregs;
973 if (GET_CODE (reg) != REG)
974 return;
976 regno = REGNO (reg);
977 if (regno < FIRST_PSEUDO_REGISTER)
978 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
979 nregs--)
980 CLEAR_HARD_REG_BIT (live, regno + nregs);
983 /* Record in LIVE that register REG became live.
984 This is called via note_stores. */
986 static void
987 reg_becomes_live (reg, setter, live)
988 rtx reg;
989 rtx setter ATTRIBUTE_UNUSED;
990 void *live;
992 int regno, nregs;
994 if (GET_CODE (reg) == SUBREG)
995 reg = SUBREG_REG (reg);
997 if (GET_CODE (reg) != REG)
998 return;
1000 regno = REGNO (reg);
1001 if (regno < FIRST_PSEUDO_REGISTER)
1002 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
1003 nregs--)
1004 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
1007 /* Find all insns that need a particular mode setting, and insert the
1008 necessary mode switches. Return true if we did work. */
1011 optimize_mode_switching (file)
1012 FILE *file;
1014 rtx insn;
1015 int bb, e;
1016 int need_commit = 0;
1017 sbitmap *kill;
1018 struct edge_list *edge_list;
1019 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1020 #define N_ENTITIES (sizeof num_modes / sizeof (int))
1021 int entity_map[N_ENTITIES];
1022 struct bb_info *bb_info[N_ENTITIES];
1023 int i, j;
1024 int n_entities;
1025 int max_num_modes = 0;
1027 #ifdef NORMAL_MODE
1028 /* Increment n_basic_blocks before allocating bb_info. */
1029 n_basic_blocks++;
1030 #endif
1032 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1033 if (OPTIMIZE_MODE_SWITCHING (e))
1035 /* Create the list of segments within each basic block. */
1036 bb_info[n_entities]
1037 = (struct bb_info *) xcalloc (n_basic_blocks, sizeof **bb_info);
1038 entity_map[n_entities++] = e;
1039 if (num_modes[e] > max_num_modes)
1040 max_num_modes = num_modes[e];
1043 #ifdef NORMAL_MODE
1044 /* Decrement it back in case we return below. */
1045 n_basic_blocks--;
1046 #endif
1048 if (! n_entities)
1049 return 0;
1051 #ifdef NORMAL_MODE
1052 /* We're going to pretend the EXIT_BLOCK is a regular basic block,
1053 so that switching back to normal mode when entering the
1054 EXIT_BLOCK isn't optimized away. We do this by incrementing the
1055 basic block count, growing the VARRAY of basic_block_info and
1056 appending the EXIT_BLOCK_PTR to it. */
1057 n_basic_blocks++;
1058 if (VARRAY_SIZE (basic_block_info) < n_basic_blocks)
1059 VARRAY_GROW (basic_block_info, n_basic_blocks);
1060 BASIC_BLOCK (n_basic_blocks - 1) = EXIT_BLOCK_PTR;
1061 EXIT_BLOCK_PTR->index = n_basic_blocks - 1;
1062 #endif
1064 /* Create the bitmap vectors. */
1066 antic = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1067 transp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1068 comp = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1070 sbitmap_vector_ones (transp, n_basic_blocks);
1072 for (j = n_entities - 1; j >= 0; j--)
1074 int e = entity_map[j];
1075 int no_mode = num_modes[e];
1076 struct bb_info *info = bb_info[j];
1078 /* Determine what the first use (if any) need for a mode of entity E is.
1079 This will be the mode that is anticipatable for this block.
1080 Also compute the initial transparency settings. */
1081 for (bb = 0 ; bb < n_basic_blocks; bb++)
1083 struct seginfo *ptr;
1084 int last_mode = no_mode;
1085 HARD_REG_SET live_now;
1087 REG_SET_TO_HARD_REG_SET (live_now,
1088 BASIC_BLOCK (bb)->global_live_at_start);
1089 for (insn = BLOCK_HEAD (bb);
1090 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb));
1091 insn = NEXT_INSN (insn))
1093 if (INSN_P (insn))
1095 int mode = MODE_NEEDED (e, insn);
1096 rtx link;
1098 if (mode != no_mode && mode != last_mode)
1100 last_mode = mode;
1101 ptr = new_seginfo (mode, insn, bb, live_now);
1102 add_seginfo (info + bb, ptr);
1103 RESET_BIT (transp[bb], j);
1106 /* Update LIVE_NOW. */
1107 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1108 if (REG_NOTE_KIND (link) == REG_DEAD)
1109 reg_dies (XEXP (link, 0), live_now);
1111 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1112 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1113 if (REG_NOTE_KIND (link) == REG_UNUSED)
1114 reg_dies (XEXP (link, 0), live_now);
1118 info[bb].computing = last_mode;
1119 /* Check for blocks without ANY mode requirements. */
1120 if (last_mode == no_mode)
1122 ptr = new_seginfo (no_mode, insn, bb, live_now);
1123 add_seginfo (info + bb, ptr);
1126 #ifdef NORMAL_MODE
1128 int mode = NORMAL_MODE (e);
1130 if (mode != no_mode)
1132 edge eg;
1134 for (eg = ENTRY_BLOCK_PTR->succ; eg; eg = eg->succ_next)
1136 bb = eg->dest->index;
1138 /* By always making this nontransparent, we save
1139 an extra check in make_preds_opaque. We also
1140 need this to avoid confusing pre_edge_lcm when
1141 antic is cleared but transp and comp are set. */
1142 RESET_BIT (transp[bb], j);
1144 /* If the block already has MODE, pretend it
1145 has none (because we don't need to set it),
1146 but retain whatever mode it computes. */
1147 if (info[bb].seginfo->mode == mode)
1148 info[bb].seginfo->mode = no_mode;
1150 /* Insert a fake computing definition of MODE into entry
1151 blocks which compute no mode. This represents the mode on
1152 entry. */
1153 else if (info[bb].computing == no_mode)
1155 info[bb].computing = mode;
1156 info[bb].seginfo->mode = no_mode;
1160 bb = n_basic_blocks - 1;
1161 info[bb].seginfo->mode = mode;
1164 #endif /* NORMAL_MODE */
1167 kill = sbitmap_vector_alloc (n_basic_blocks, n_entities);
1168 for (i = 0; i < max_num_modes; i++)
1170 int current_mode[N_ENTITIES];
1172 /* Set the anticipatable and computing arrays. */
1173 sbitmap_vector_zero (antic, n_basic_blocks);
1174 sbitmap_vector_zero (comp, n_basic_blocks);
1175 for (j = n_entities - 1; j >= 0; j--)
1177 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1178 struct bb_info *info = bb_info[j];
1180 for (bb = 0 ; bb < n_basic_blocks; bb++)
1182 if (info[bb].seginfo->mode == m)
1183 SET_BIT (antic[bb], j);
1185 if (info[bb].computing == m)
1186 SET_BIT (comp[bb], j);
1190 /* Calculate the optimal locations for the
1191 placement mode switches to modes with priority I. */
1193 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1194 sbitmap_not (kill[bb], transp[bb]);
1195 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1196 kill, &insert, &delete);
1198 for (j = n_entities - 1; j >= 0; j--)
1200 /* Insert all mode sets that have been inserted by lcm. */
1201 int no_mode = num_modes[entity_map[j]];
1203 /* Wherever we have moved a mode setting upwards in the flow graph,
1204 the blocks between the new setting site and the now redundant
1205 computation ceases to be transparent for any lower-priority
1206 mode of the same entity. First set the aux field of each
1207 insertion site edge non-transparent, then propagate the new
1208 non-transparency from the redundant computation upwards till
1209 we hit an insertion site or an already non-transparent block. */
1210 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1212 edge eg = INDEX_EDGE (edge_list, e);
1213 int mode;
1214 basic_block src_bb;
1215 HARD_REG_SET live_at_edge;
1216 rtx mode_set;
1218 eg->aux = 0;
1220 if (! TEST_BIT (insert[e], j))
1221 continue;
1223 eg->aux = (void *)1;
1225 mode = current_mode[j];
1226 src_bb = eg->src;
1228 REG_SET_TO_HARD_REG_SET (live_at_edge,
1229 src_bb->global_live_at_end);
1231 start_sequence ();
1232 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1233 mode_set = gen_sequence ();
1234 end_sequence ();
1236 /* If this is an abnormal edge, we'll insert at the end
1237 of the previous block. */
1238 if (eg->flags & EDGE_ABNORMAL)
1240 if (GET_CODE (src_bb->end) == JUMP_INSN)
1241 emit_insn_before (mode_set, src_bb->end);
1242 /* It doesn't make sense to switch to normal mode
1243 after a CALL_INSN, so we're going to abort if we
1244 find one. The cases in which a CALL_INSN may
1245 have an abnormal edge are sibcalls and EH edges.
1246 In the case of sibcalls, the dest basic-block is
1247 the EXIT_BLOCK, that runs in normal mode; it is
1248 assumed that a sibcall insn requires normal mode
1249 itself, so no mode switch would be required after
1250 the call (it wouldn't make sense, anyway). In
1251 the case of EH edges, EH entry points also start
1252 in normal mode, so a similar reasoning applies. */
1253 else if (GET_CODE (src_bb->end) == INSN)
1254 emit_insn_after (mode_set, src_bb->end);
1255 else
1256 abort ();
1257 bb_info[j][src_bb->index].computing = mode;
1258 RESET_BIT (transp[src_bb->index], j);
1260 else
1262 need_commit = 1;
1263 insert_insn_on_edge (mode_set, eg);
1267 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1268 if (TEST_BIT (delete[bb], j))
1270 make_preds_opaque (BASIC_BLOCK (bb), j);
1271 /* Cancel the 'deleted' mode set. */
1272 bb_info[j][bb].seginfo->mode = no_mode;
1276 free_edge_list (edge_list);
1279 #ifdef NORMAL_MODE
1280 /* Restore the special status of EXIT_BLOCK. */
1281 n_basic_blocks--;
1282 VARRAY_POP (basic_block_info);
1283 EXIT_BLOCK_PTR->index = EXIT_BLOCK;
1284 #endif
1286 /* Now output the remaining mode sets in all the segments. */
1287 for (j = n_entities - 1; j >= 0; j--)
1289 int no_mode = num_modes[entity_map[j]];
1291 #ifdef NORMAL_MODE
1292 if (bb_info[j][n_basic_blocks].seginfo->mode != no_mode)
1294 edge eg;
1295 struct seginfo *ptr = bb_info[j][n_basic_blocks].seginfo;
1297 for (eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1299 rtx mode_set;
1301 if (bb_info[j][eg->src->index].computing == ptr->mode)
1302 continue;
1304 start_sequence ();
1305 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1306 mode_set = gen_sequence ();
1307 end_sequence ();
1309 /* If this is an abnormal edge, we'll insert at the end of the
1310 previous block. */
1311 if (eg->flags & EDGE_ABNORMAL)
1313 if (GET_CODE (eg->src->end) == JUMP_INSN)
1314 emit_insn_before (mode_set, eg->src->end);
1315 else if (GET_CODE (eg->src->end) == INSN)
1316 emit_insn_after (mode_set, eg->src->end);
1317 else
1318 abort ();
1320 else
1322 need_commit = 1;
1323 insert_insn_on_edge (mode_set, eg);
1328 #endif
1330 for (bb = n_basic_blocks - 1; bb >= 0; bb--)
1332 struct seginfo *ptr, *next;
1333 for (ptr = bb_info[j][bb].seginfo; ptr; ptr = next)
1335 next = ptr->next;
1336 if (ptr->mode != no_mode)
1338 rtx mode_set;
1340 start_sequence ();
1341 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1342 mode_set = gen_sequence ();
1343 end_sequence ();
1345 if (GET_CODE (ptr->insn_ptr) == NOTE
1346 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1347 == NOTE_INSN_BASIC_BLOCK))
1348 emit_insn_after (mode_set, ptr->insn_ptr);
1349 else
1350 emit_insn_before (mode_set, ptr->insn_ptr);
1353 free (ptr);
1357 free (bb_info[j]);
1360 /* Finished. Free up all the things we've allocated. */
1362 sbitmap_vector_free (kill);
1363 sbitmap_vector_free (antic);
1364 sbitmap_vector_free (transp);
1365 sbitmap_vector_free (comp);
1366 sbitmap_vector_free (delete);
1367 sbitmap_vector_free (insert);
1369 if (need_commit)
1370 commit_edge_insertions ();
1372 /* Ideally we'd figure out what blocks were affected and start from
1373 there, but this is enormously complicated by commit_edge_insertions,
1374 which would screw up any indicies we'd collected, and also need to
1375 be involved in the update. Bail and recompute global life info for
1376 everything. */
1378 allocate_reg_life_data ();
1379 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES,
1380 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1381 | PROP_SCAN_DEAD_CODE | PROP_REG_INFO));
1383 return 1;
1385 #endif /* OPTIMIZE_MODE_SWITCHING */