* acinclude.m4: Check for native targets that can't link at
[official-gcc.git] / gcc / lcm.c
blob8bbe893c82386eaa6c1e849d7dc2a6d207e24ea4
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
25 * Traditional partial redundancy elimination.
27 * Placement of caller/caller register save/restores.
29 * Load/store motion.
31 * Copy motion.
33 * Conversion of flat register files to a stacked register
34 model.
36 * Dead load/store elimination.
38 These routines accept as input:
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
52 #include "config.h"
53 #include "system.h"
54 #include "coretypes.h"
55 #include "tm.h"
56 #include "rtl.h"
57 #include "regs.h"
58 #include "hard-reg-set.h"
59 #include "flags.h"
60 #include "real.h"
61 #include "insn-config.h"
62 #include "recog.h"
63 #include "basic-block.h"
64 #include "output.h"
65 #include "tm_p.h"
67 /* We want target macros for the mode switching code to be able to refer
68 to instruction attribute values. */
69 #include "insn-attr.h"
71 /* Edge based LCM routines. */
72 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
73 sbitmap *, sbitmap *));
74 static void compute_earliest PARAMS ((struct edge_list *, int,
75 sbitmap *, sbitmap *,
76 sbitmap *, sbitmap *,
77 sbitmap *));
78 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
79 sbitmap *, sbitmap *,
80 sbitmap *));
81 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
82 sbitmap *, sbitmap *,
83 sbitmap *, sbitmap *,
84 sbitmap *));
86 /* Edge based LCM routines on a reverse flowgraph. */
87 static void compute_farthest PARAMS ((struct edge_list *, int,
88 sbitmap *, sbitmap *,
89 sbitmap*, sbitmap *,
90 sbitmap *));
91 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
92 sbitmap *, sbitmap *,
93 sbitmap *));
94 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
95 sbitmap *, sbitmap *,
96 sbitmap *, sbitmap *,
97 sbitmap *));
99 /* Edge based lcm routines. */
101 /* Compute expression anticipatability at entrance and exit of each block.
102 This is done based on the flow graph, and not on the pred-succ lists.
103 Other than that, its pretty much identical to compute_antinout. */
105 static void
106 compute_antinout_edge (antloc, transp, antin, antout)
107 sbitmap *antloc;
108 sbitmap *transp;
109 sbitmap *antin;
110 sbitmap *antout;
112 basic_block bb;
113 edge e;
114 basic_block *worklist, *qin, *qout, *qend;
115 unsigned int qlen;
117 /* Allocate a worklist array/queue. Entries are only added to the
118 list if they were not already on the list. So the size is
119 bounded by the number of basic blocks. */
120 qin = qout = worklist
121 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
123 /* We want a maximal solution, so make an optimistic initialization of
124 ANTIN. */
125 sbitmap_vector_ones (antin, last_basic_block);
127 /* Put every block on the worklist; this is necessary because of the
128 optimistic initialization of ANTIN above. */
129 FOR_EACH_BB_REVERSE (bb)
131 *qin++ =bb;
132 bb->aux = bb;
135 qin = worklist;
136 qend = &worklist[n_basic_blocks];
137 qlen = n_basic_blocks;
139 /* Mark blocks which are predecessors of the exit block so that we
140 can easily identify them below. */
141 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
142 e->src->aux = EXIT_BLOCK_PTR;
144 /* Iterate until the worklist is empty. */
145 while (qlen)
147 /* Take the first entry off the worklist. */
148 bb = *qout++;
149 qlen--;
151 if (qout >= qend)
152 qout = worklist;
154 if (bb->aux == EXIT_BLOCK_PTR)
155 /* Do not clear the aux field for blocks which are predecessors of
156 the EXIT block. That way we never add then to the worklist
157 again. */
158 sbitmap_zero (antout[bb->index]);
159 else
161 /* Clear the aux field of this block so that it can be added to
162 the worklist again if necessary. */
163 bb->aux = NULL;
164 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
167 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
168 transp[bb->index], antout[bb->index]))
169 /* If the in state of this block changed, then we need
170 to add the predecessors of this block to the worklist
171 if they are not already on the worklist. */
172 for (e = bb->pred; e; e = e->pred_next)
173 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
175 *qin++ = e->src;
176 e->src->aux = e;
177 qlen++;
178 if (qin >= qend)
179 qin = worklist;
183 clear_aux_for_edges ();
184 clear_aux_for_blocks ();
185 free (worklist);
188 /* Compute the earliest vector for edge based lcm. */
190 static void
191 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
192 struct edge_list *edge_list;
193 int n_exprs;
194 sbitmap *antin, *antout, *avout, *kill, *earliest;
196 sbitmap difference, temp_bitmap;
197 int x, num_edges;
198 basic_block pred, succ;
200 num_edges = NUM_EDGES (edge_list);
202 difference = sbitmap_alloc (n_exprs);
203 temp_bitmap = sbitmap_alloc (n_exprs);
205 for (x = 0; x < num_edges; x++)
207 pred = INDEX_EDGE_PRED_BB (edge_list, x);
208 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
209 if (pred == ENTRY_BLOCK_PTR)
210 sbitmap_copy (earliest[x], antin[succ->index]);
211 else
213 if (succ == EXIT_BLOCK_PTR)
214 sbitmap_zero (earliest[x]);
215 else
217 sbitmap_difference (difference, antin[succ->index],
218 avout[pred->index]);
219 sbitmap_not (temp_bitmap, antout[pred->index]);
220 sbitmap_a_and_b_or_c (earliest[x], difference,
221 kill[pred->index], temp_bitmap);
226 sbitmap_free (temp_bitmap);
227 sbitmap_free (difference);
230 /* later(p,s) is dependent on the calculation of laterin(p).
231 laterin(p) is dependent on the calculation of later(p2,p).
233 laterin(ENTRY) is defined as all 0's
234 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
235 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
237 If we progress in this manner, starting with all basic blocks
238 in the work list, anytime we change later(bb), we need to add
239 succs(bb) to the worklist if they are not already on the worklist.
241 Boundary conditions:
243 We prime the worklist all the normal basic blocks. The ENTRY block can
244 never be added to the worklist since it is never the successor of any
245 block. We explicitly prevent the EXIT block from being added to the
246 worklist.
248 We optimistically initialize LATER. That is the only time this routine
249 will compute LATER for an edge out of the entry block since the entry
250 block is never on the worklist. Thus, LATERIN is neither used nor
251 computed for the ENTRY block.
253 Since the EXIT block is never added to the worklist, we will neither
254 use nor compute LATERIN for the exit block. Edges which reach the
255 EXIT block are handled in the normal fashion inside the loop. However,
256 the insertion/deletion computation needs LATERIN(EXIT), so we have
257 to compute it. */
259 static void
260 compute_laterin (edge_list, earliest, antloc, later, laterin)
261 struct edge_list *edge_list;
262 sbitmap *earliest, *antloc, *later, *laterin;
264 int num_edges, i;
265 edge e;
266 basic_block *worklist, *qin, *qout, *qend, bb;
267 unsigned int qlen;
269 num_edges = NUM_EDGES (edge_list);
271 /* Allocate a worklist array/queue. Entries are only added to the
272 list if they were not already on the list. So the size is
273 bounded by the number of basic blocks. */
274 qin = qout = worklist
275 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
277 /* Initialize a mapping from each edge to its index. */
278 for (i = 0; i < num_edges; i++)
279 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
281 /* We want a maximal solution, so initially consider LATER true for
282 all edges. This allows propagation through a loop since the incoming
283 loop edge will have LATER set, so if all the other incoming edges
284 to the loop are set, then LATERIN will be set for the head of the
285 loop.
287 If the optimistic setting of LATER on that edge was incorrect (for
288 example the expression is ANTLOC in a block within the loop) then
289 this algorithm will detect it when we process the block at the head
290 of the optimistic edge. That will requeue the affected blocks. */
291 sbitmap_vector_ones (later, num_edges);
293 /* Note that even though we want an optimistic setting of LATER, we
294 do not want to be overly optimistic. Consider an outgoing edge from
295 the entry block. That edge should always have a LATER value the
296 same as EARLIEST for that edge. */
297 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
298 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
300 /* Add all the blocks to the worklist. This prevents an early exit from
301 the loop given our optimistic initialization of LATER above. */
302 FOR_EACH_BB (bb)
304 *qin++ = bb;
305 bb->aux = bb;
307 qin = worklist;
308 /* Note that we do not use the last allocated element for our queue,
309 as EXIT_BLOCK is never inserted into it. In fact the above allocation
310 of n_basic_blocks + 1 elements is not necessary. */
311 qend = &worklist[n_basic_blocks];
312 qlen = n_basic_blocks;
314 /* Iterate until the worklist is empty. */
315 while (qlen)
317 /* Take the first entry off the worklist. */
318 bb = *qout++;
319 bb->aux = NULL;
320 qlen--;
321 if (qout >= qend)
322 qout = worklist;
324 /* Compute the intersection of LATERIN for each incoming edge to B. */
325 sbitmap_ones (laterin[bb->index]);
326 for (e = bb->pred; e != NULL; e = e->pred_next)
327 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
329 /* Calculate LATER for all outgoing edges. */
330 for (e = bb->succ; e != NULL; e = e->succ_next)
331 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
332 earliest[(size_t) e->aux],
333 laterin[e->src->index],
334 antloc[e->src->index])
335 /* If LATER for an outgoing edge was changed, then we need
336 to add the target of the outgoing edge to the worklist. */
337 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
339 *qin++ = e->dest;
340 e->dest->aux = e;
341 qlen++;
342 if (qin >= qend)
343 qin = worklist;
347 /* Computation of insertion and deletion points requires computing LATERIN
348 for the EXIT block. We allocated an extra entry in the LATERIN array
349 for just this purpose. */
350 sbitmap_ones (laterin[last_basic_block]);
351 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
352 sbitmap_a_and_b (laterin[last_basic_block],
353 laterin[last_basic_block],
354 later[(size_t) e->aux]);
356 clear_aux_for_edges ();
357 free (worklist);
360 /* Compute the insertion and deletion points for edge based LCM. */
362 static void
363 compute_insert_delete (edge_list, antloc, later, laterin,
364 insert, delete)
365 struct edge_list *edge_list;
366 sbitmap *antloc, *later, *laterin, *insert, *delete;
368 int x;
369 basic_block bb;
371 FOR_EACH_BB (bb)
372 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
374 for (x = 0; x < NUM_EDGES (edge_list); x++)
376 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
378 if (b == EXIT_BLOCK_PTR)
379 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
380 else
381 sbitmap_difference (insert[x], later[x], laterin[b->index]);
385 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
386 delete vectors for edge based LCM. Returns an edgelist which is used to
387 map the insert vector to what edge an expression should be inserted on. */
389 struct edge_list *
390 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
391 FILE *file ATTRIBUTE_UNUSED;
392 int n_exprs;
393 sbitmap *transp;
394 sbitmap *avloc;
395 sbitmap *antloc;
396 sbitmap *kill;
397 sbitmap **insert;
398 sbitmap **delete;
400 sbitmap *antin, *antout, *earliest;
401 sbitmap *avin, *avout;
402 sbitmap *later, *laterin;
403 struct edge_list *edge_list;
404 int num_edges;
406 edge_list = create_edge_list ();
407 num_edges = NUM_EDGES (edge_list);
409 #ifdef LCM_DEBUG_INFO
410 if (file)
412 fprintf (file, "Edge List:\n");
413 verify_edge_list (file, edge_list);
414 print_edge_list (file, edge_list);
415 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
416 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
417 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
418 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
420 #endif
422 /* Compute global availability. */
423 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
424 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
425 compute_available (avloc, kill, avout, avin);
426 sbitmap_vector_free (avin);
428 /* Compute global anticipatability. */
429 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
430 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
431 compute_antinout_edge (antloc, transp, antin, antout);
433 #ifdef LCM_DEBUG_INFO
434 if (file)
436 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
437 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
439 #endif
441 /* Compute earliestness. */
442 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
443 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
445 #ifdef LCM_DEBUG_INFO
446 if (file)
447 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
448 #endif
450 sbitmap_vector_free (antout);
451 sbitmap_vector_free (antin);
452 sbitmap_vector_free (avout);
454 later = sbitmap_vector_alloc (num_edges, n_exprs);
456 /* Allocate an extra element for the exit block in the laterin vector. */
457 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
458 compute_laterin (edge_list, earliest, antloc, later, laterin);
460 #ifdef LCM_DEBUG_INFO
461 if (file)
463 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
464 dump_sbitmap_vector (file, "later", "", later, num_edges);
466 #endif
468 sbitmap_vector_free (earliest);
470 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
471 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
472 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
474 sbitmap_vector_free (laterin);
475 sbitmap_vector_free (later);
477 #ifdef LCM_DEBUG_INFO
478 if (file)
480 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
481 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
482 last_basic_block);
484 #endif
486 return edge_list;
489 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
490 Return the number of passes we performed to iterate to a solution. */
492 void
493 compute_available (avloc, kill, avout, avin)
494 sbitmap *avloc, *kill, *avout, *avin;
496 edge e;
497 basic_block *worklist, *qin, *qout, *qend, bb;
498 unsigned int qlen;
500 /* Allocate a worklist array/queue. Entries are only added to the
501 list if they were not already on the list. So the size is
502 bounded by the number of basic blocks. */
503 qin = qout = worklist
504 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
506 /* We want a maximal solution. */
507 sbitmap_vector_ones (avout, last_basic_block);
509 /* Put every block on the worklist; this is necessary because of the
510 optimistic initialization of AVOUT above. */
511 FOR_EACH_BB (bb)
513 *qin++ = bb;
514 bb->aux = bb;
517 qin = worklist;
518 qend = &worklist[n_basic_blocks];
519 qlen = n_basic_blocks;
521 /* Mark blocks which are successors of the entry block so that we
522 can easily identify them below. */
523 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
524 e->dest->aux = ENTRY_BLOCK_PTR;
526 /* Iterate until the worklist is empty. */
527 while (qlen)
529 /* Take the first entry off the worklist. */
530 bb = *qout++;
531 qlen--;
533 if (qout >= qend)
534 qout = worklist;
536 /* If one of the predecessor blocks is the ENTRY block, then the
537 intersection of avouts is the null set. We can identify such blocks
538 by the special value in the AUX field in the block structure. */
539 if (bb->aux == ENTRY_BLOCK_PTR)
540 /* Do not clear the aux field for blocks which are successors of the
541 ENTRY block. That way we never add then to the worklist again. */
542 sbitmap_zero (avin[bb->index]);
543 else
545 /* Clear the aux field of this block so that it can be added to
546 the worklist again if necessary. */
547 bb->aux = NULL;
548 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
551 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
552 /* If the out state of this block changed, then we need
553 to add the successors of this block to the worklist
554 if they are not already on the worklist. */
555 for (e = bb->succ; e; e = e->succ_next)
556 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
558 *qin++ = e->dest;
559 e->dest->aux = e;
560 qlen++;
562 if (qin >= qend)
563 qin = worklist;
567 clear_aux_for_edges ();
568 clear_aux_for_blocks ();
569 free (worklist);
572 /* Compute the farthest vector for edge based lcm. */
574 static void
575 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
576 kill, farthest)
577 struct edge_list *edge_list;
578 int n_exprs;
579 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
581 sbitmap difference, temp_bitmap;
582 int x, num_edges;
583 basic_block pred, succ;
585 num_edges = NUM_EDGES (edge_list);
587 difference = sbitmap_alloc (n_exprs);
588 temp_bitmap = sbitmap_alloc (n_exprs);
590 for (x = 0; x < num_edges; x++)
592 pred = INDEX_EDGE_PRED_BB (edge_list, x);
593 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
594 if (succ == EXIT_BLOCK_PTR)
595 sbitmap_copy (farthest[x], st_avout[pred->index]);
596 else
598 if (pred == ENTRY_BLOCK_PTR)
599 sbitmap_zero (farthest[x]);
600 else
602 sbitmap_difference (difference, st_avout[pred->index],
603 st_antin[succ->index]);
604 sbitmap_not (temp_bitmap, st_avin[succ->index]);
605 sbitmap_a_and_b_or_c (farthest[x], difference,
606 kill[succ->index], temp_bitmap);
611 sbitmap_free (temp_bitmap);
612 sbitmap_free (difference);
615 /* Compute nearer and nearerout vectors for edge based lcm.
617 This is the mirror of compute_laterin, additional comments on the
618 implementation can be found before compute_laterin. */
620 static void
621 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
622 struct edge_list *edge_list;
623 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
625 int num_edges, i;
626 edge e;
627 basic_block *worklist, *tos, bb;
629 num_edges = NUM_EDGES (edge_list);
631 /* Allocate a worklist array/queue. Entries are only added to the
632 list if they were not already on the list. So the size is
633 bounded by the number of basic blocks. */
634 tos = worklist
635 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
637 /* Initialize NEARER for each edge and build a mapping from an edge to
638 its index. */
639 for (i = 0; i < num_edges; i++)
640 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
642 /* We want a maximal solution. */
643 sbitmap_vector_ones (nearer, num_edges);
645 /* Note that even though we want an optimistic setting of NEARER, we
646 do not want to be overly optimistic. Consider an incoming edge to
647 the exit block. That edge should always have a NEARER value the
648 same as FARTHEST for that edge. */
649 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
650 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
652 /* Add all the blocks to the worklist. This prevents an early exit
653 from the loop given our optimistic initialization of NEARER. */
654 FOR_EACH_BB (bb)
656 *tos++ = bb;
657 bb->aux = bb;
660 /* Iterate until the worklist is empty. */
661 while (tos != worklist)
663 /* Take the first entry off the worklist. */
664 bb = *--tos;
665 bb->aux = NULL;
667 /* Compute the intersection of NEARER for each outgoing edge from B. */
668 sbitmap_ones (nearerout[bb->index]);
669 for (e = bb->succ; e != NULL; e = e->succ_next)
670 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
671 nearer[(size_t) e->aux]);
673 /* Calculate NEARER for all incoming edges. */
674 for (e = bb->pred; e != NULL; e = e->pred_next)
675 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
676 farthest[(size_t) e->aux],
677 nearerout[e->dest->index],
678 st_avloc[e->dest->index])
679 /* If NEARER for an incoming edge was changed, then we need
680 to add the source of the incoming edge to the worklist. */
681 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
683 *tos++ = e->src;
684 e->src->aux = e;
688 /* Computation of insertion and deletion points requires computing NEAREROUT
689 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
690 for just this purpose. */
691 sbitmap_ones (nearerout[last_basic_block]);
692 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
693 sbitmap_a_and_b (nearerout[last_basic_block],
694 nearerout[last_basic_block],
695 nearer[(size_t) e->aux]);
697 clear_aux_for_edges ();
698 free (tos);
701 /* Compute the insertion and deletion points for edge based LCM. */
703 static void
704 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
705 insert, delete)
706 struct edge_list *edge_list;
707 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
709 int x;
710 basic_block bb;
712 FOR_EACH_BB (bb)
713 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
715 for (x = 0; x < NUM_EDGES (edge_list); x++)
717 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
718 if (b == ENTRY_BLOCK_PTR)
719 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
720 else
721 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
725 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
726 insert and delete vectors for edge based reverse LCM. Returns an
727 edgelist which is used to map the insert vector to what edge
728 an expression should be inserted on. */
730 struct edge_list *
731 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
732 insert, delete)
733 FILE *file ATTRIBUTE_UNUSED;
734 int n_exprs;
735 sbitmap *transp;
736 sbitmap *st_avloc;
737 sbitmap *st_antloc;
738 sbitmap *kill;
739 sbitmap **insert;
740 sbitmap **delete;
742 sbitmap *st_antin, *st_antout;
743 sbitmap *st_avout, *st_avin, *farthest;
744 sbitmap *nearer, *nearerout;
745 struct edge_list *edge_list;
746 int num_edges;
748 edge_list = create_edge_list ();
749 num_edges = NUM_EDGES (edge_list);
751 st_antin = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
752 st_antout = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
753 sbitmap_vector_zero (st_antin, last_basic_block);
754 sbitmap_vector_zero (st_antout, last_basic_block);
755 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
757 /* Compute global anticipatability. */
758 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
759 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
760 compute_available (st_avloc, kill, st_avout, st_avin);
762 #ifdef LCM_DEBUG_INFO
763 if (file)
765 fprintf (file, "Edge List:\n");
766 verify_edge_list (file, edge_list);
767 print_edge_list (file, edge_list);
768 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
769 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
770 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
771 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
772 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
773 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
775 #endif
777 #ifdef LCM_DEBUG_INFO
778 if (file)
780 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
781 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
783 #endif
785 /* Compute farthestness. */
786 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
787 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
788 kill, farthest);
790 #ifdef LCM_DEBUG_INFO
791 if (file)
792 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
793 #endif
795 sbitmap_vector_free (st_antin);
796 sbitmap_vector_free (st_antout);
798 sbitmap_vector_free (st_avin);
799 sbitmap_vector_free (st_avout);
801 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
803 /* Allocate an extra element for the entry block. */
804 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
805 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
807 #ifdef LCM_DEBUG_INFO
808 if (file)
810 dump_sbitmap_vector (file, "nearerout", "", nearerout,
811 last_basic_block + 1);
812 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
814 #endif
816 sbitmap_vector_free (farthest);
818 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
819 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
820 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
821 *insert, *delete);
823 sbitmap_vector_free (nearerout);
824 sbitmap_vector_free (nearer);
826 #ifdef LCM_DEBUG_INFO
827 if (file)
829 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
830 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
831 last_basic_block);
833 #endif
834 return edge_list;
837 /* Mode switching:
839 The algorithm for setting the modes consists of scanning the insn list
840 and finding all the insns which require a specific mode. Each insn gets
841 a unique struct seginfo element. These structures are inserted into a list
842 for each basic block. For each entity, there is an array of bb_info over
843 the flow graph basic blocks (local var 'bb_info'), and contains a list
844 of all insns within that basic block, in the order they are encountered.
846 For each entity, any basic block WITHOUT any insns requiring a specific
847 mode are given a single entry, without a mode. (Each basic block
848 in the flow graph must have at least one entry in the segment table.)
850 The LCM algorithm is then run over the flow graph to determine where to
851 place the sets to the highest-priority value in respect of first the first
852 insn in any one block. Any adjustments required to the transparency
853 vectors are made, then the next iteration starts for the next-lower
854 priority mode, till for each entity all modes are exhausted.
856 More details are located in the code for optimize_mode_switching(). */
858 /* This structure contains the information for each insn which requires
859 either single or double mode to be set.
860 MODE is the mode this insn must be executed in.
861 INSN_PTR is the insn to be executed (may be the note that marks the
862 beginning of a basic block).
863 BBNUM is the flow graph basic block this insn occurs in.
864 NEXT is the next insn in the same basic block. */
865 struct seginfo
867 int mode;
868 rtx insn_ptr;
869 int bbnum;
870 struct seginfo *next;
871 HARD_REG_SET regs_live;
874 struct bb_info
876 struct seginfo *seginfo;
877 int computing;
880 /* These bitmaps are used for the LCM algorithm. */
882 #ifdef OPTIMIZE_MODE_SWITCHING
883 static sbitmap *antic;
884 static sbitmap *transp;
885 static sbitmap *comp;
886 static sbitmap *delete;
887 static sbitmap *insert;
889 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));
890 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
891 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
892 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
893 static void make_preds_opaque PARAMS ((basic_block, int));
894 #endif
896 #ifdef OPTIMIZE_MODE_SWITCHING
898 /* This function will allocate a new BBINFO structure, initialized
899 with the MODE, INSN, and basic block BB parameters. */
901 static struct seginfo *
902 new_seginfo (mode, insn, bb, regs_live)
903 int mode;
904 rtx insn;
905 int bb;
906 HARD_REG_SET regs_live;
908 struct seginfo *ptr;
909 ptr = xmalloc (sizeof (struct seginfo));
910 ptr->mode = mode;
911 ptr->insn_ptr = insn;
912 ptr->bbnum = bb;
913 ptr->next = NULL;
914 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
915 return ptr;
918 /* Add a seginfo element to the end of a list.
919 HEAD is a pointer to the list beginning.
920 INFO is the structure to be linked in. */
922 static void
923 add_seginfo (head, info)
924 struct bb_info *head;
925 struct seginfo *info;
927 struct seginfo *ptr;
929 if (head->seginfo == NULL)
930 head->seginfo = info;
931 else
933 ptr = head->seginfo;
934 while (ptr->next != NULL)
935 ptr = ptr->next;
936 ptr->next = info;
940 /* Make all predecessors of basic block B opaque, recursively, till we hit
941 some that are already non-transparent, or an edge where aux is set; that
942 denotes that a mode set is to be done on that edge.
943 J is the bit number in the bitmaps that corresponds to the entity that
944 we are currently handling mode-switching for. */
946 static void
947 make_preds_opaque (b, j)
948 basic_block b;
949 int j;
951 edge e;
953 for (e = b->pred; e; e = e->pred_next)
955 basic_block pb = e->src;
957 if (e->aux || ! TEST_BIT (transp[pb->index], j))
958 continue;
960 RESET_BIT (transp[pb->index], j);
961 make_preds_opaque (pb, j);
965 /* Record in LIVE that register REG died. */
967 static void
968 reg_dies (reg, live)
969 rtx reg;
970 HARD_REG_SET live;
972 int regno, nregs;
974 if (GET_CODE (reg) != REG)
975 return;
977 regno = REGNO (reg);
978 if (regno < FIRST_PSEUDO_REGISTER)
979 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
980 nregs--)
981 CLEAR_HARD_REG_BIT (live, regno + nregs);
984 /* Record in LIVE that register REG became live.
985 This is called via note_stores. */
987 static void
988 reg_becomes_live (reg, setter, live)
989 rtx reg;
990 rtx setter ATTRIBUTE_UNUSED;
991 void *live;
993 int regno, nregs;
995 if (GET_CODE (reg) == SUBREG)
996 reg = SUBREG_REG (reg);
998 if (GET_CODE (reg) != REG)
999 return;
1001 regno = REGNO (reg);
1002 if (regno < FIRST_PSEUDO_REGISTER)
1003 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
1004 nregs--)
1005 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
1008 /* Find all insns that need a particular mode setting, and insert the
1009 necessary mode switches. Return true if we did work. */
1012 optimize_mode_switching (file)
1013 FILE *file;
1015 rtx insn;
1016 int e;
1017 basic_block bb;
1018 int need_commit = 0;
1019 sbitmap *kill;
1020 struct edge_list *edge_list;
1021 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1022 #define N_ENTITIES ARRAY_SIZE (num_modes)
1023 int entity_map[N_ENTITIES];
1024 struct bb_info *bb_info[N_ENTITIES];
1025 int i, j;
1026 int n_entities;
1027 int max_num_modes = 0;
1028 bool emited = false;
1029 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
1031 clear_bb_flags ();
1033 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1034 if (OPTIMIZE_MODE_SWITCHING (e))
1036 int entry_exit_extra = 0;
1038 /* Create the list of segments within each basic block.
1039 If NORMAL_MODE is defined, allow for two extra
1040 blocks split from the entry and exit block. */
1041 #ifdef NORMAL_MODE
1042 entry_exit_extra = 2;
1043 #endif
1044 bb_info[n_entities]
1045 = (struct bb_info *) xcalloc (last_basic_block + entry_exit_extra,
1046 sizeof **bb_info);
1047 entity_map[n_entities++] = e;
1048 if (num_modes[e] > max_num_modes)
1049 max_num_modes = num_modes[e];
1052 if (! n_entities)
1053 return 0;
1055 #ifdef NORMAL_MODE
1057 /* Split the edge from the entry block and the fallthrough edge to the
1058 exit block, so that we can note that there NORMAL_MODE is supplied /
1059 required. */
1060 edge eg;
1061 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1062 /* The only non-call predecessor at this stage is a block with a
1063 fallthrough edge; there can be at most one, but there could be
1064 none at all, e.g. when exit is called. */
1065 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1066 if (eg->flags & EDGE_FALLTHRU)
1068 regset live_at_end = eg->src->global_live_at_end;
1070 if (pre_exit)
1071 abort ();
1072 pre_exit = split_edge (eg);
1073 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1074 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1077 #endif
1079 /* Create the bitmap vectors. */
1081 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1082 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1083 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1085 sbitmap_vector_ones (transp, last_basic_block);
1087 for (j = n_entities - 1; j >= 0; j--)
1089 int e = entity_map[j];
1090 int no_mode = num_modes[e];
1091 struct bb_info *info = bb_info[j];
1093 /* Determine what the first use (if any) need for a mode of entity E is.
1094 This will be the mode that is anticipatable for this block.
1095 Also compute the initial transparency settings. */
1096 FOR_EACH_BB (bb)
1098 struct seginfo *ptr;
1099 int last_mode = no_mode;
1100 HARD_REG_SET live_now;
1102 REG_SET_TO_HARD_REG_SET (live_now,
1103 bb->global_live_at_start);
1104 for (insn = bb->head;
1105 insn != NULL && insn != NEXT_INSN (bb->end);
1106 insn = NEXT_INSN (insn))
1108 if (INSN_P (insn))
1110 int mode = MODE_NEEDED (e, insn);
1111 rtx link;
1113 if (mode != no_mode && mode != last_mode)
1115 last_mode = mode;
1116 ptr = new_seginfo (mode, insn, bb->index, live_now);
1117 add_seginfo (info + bb->index, ptr);
1118 RESET_BIT (transp[bb->index], j);
1121 /* Update LIVE_NOW. */
1122 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1123 if (REG_NOTE_KIND (link) == REG_DEAD)
1124 reg_dies (XEXP (link, 0), live_now);
1126 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1127 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1128 if (REG_NOTE_KIND (link) == REG_UNUSED)
1129 reg_dies (XEXP (link, 0), live_now);
1133 info[bb->index].computing = last_mode;
1134 /* Check for blocks without ANY mode requirements. */
1135 if (last_mode == no_mode)
1137 ptr = new_seginfo (no_mode, bb->end, bb->index, live_now);
1138 add_seginfo (info + bb->index, ptr);
1141 #ifdef NORMAL_MODE
1143 int mode = NORMAL_MODE (e);
1145 if (mode != no_mode)
1147 bb = post_entry;
1149 /* By always making this nontransparent, we save
1150 an extra check in make_preds_opaque. We also
1151 need this to avoid confusing pre_edge_lcm when
1152 antic is cleared but transp and comp are set. */
1153 RESET_BIT (transp[bb->index], j);
1155 /* Insert a fake computing definition of MODE into entry
1156 blocks which compute no mode. This represents the mode on
1157 entry. */
1158 info[bb->index].computing = mode;
1160 if (pre_exit)
1161 info[pre_exit->index].seginfo->mode = mode;
1164 #endif /* NORMAL_MODE */
1167 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1168 for (i = 0; i < max_num_modes; i++)
1170 int current_mode[N_ENTITIES];
1172 /* Set the anticipatable and computing arrays. */
1173 sbitmap_vector_zero (antic, last_basic_block);
1174 sbitmap_vector_zero (comp, last_basic_block);
1175 for (j = n_entities - 1; j >= 0; j--)
1177 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1178 struct bb_info *info = bb_info[j];
1180 FOR_EACH_BB (bb)
1182 if (info[bb->index].seginfo->mode == m)
1183 SET_BIT (antic[bb->index], j);
1185 if (info[bb->index].computing == m)
1186 SET_BIT (comp[bb->index], j);
1190 /* Calculate the optimal locations for the
1191 placement mode switches to modes with priority I. */
1193 FOR_EACH_BB (bb)
1194 sbitmap_not (kill[bb->index], transp[bb->index]);
1195 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1196 kill, &insert, &delete);
1198 for (j = n_entities - 1; j >= 0; j--)
1200 /* Insert all mode sets that have been inserted by lcm. */
1201 int no_mode = num_modes[entity_map[j]];
1203 /* Wherever we have moved a mode setting upwards in the flow graph,
1204 the blocks between the new setting site and the now redundant
1205 computation ceases to be transparent for any lower-priority
1206 mode of the same entity. First set the aux field of each
1207 insertion site edge non-transparent, then propagate the new
1208 non-transparency from the redundant computation upwards till
1209 we hit an insertion site or an already non-transparent block. */
1210 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1212 edge eg = INDEX_EDGE (edge_list, e);
1213 int mode;
1214 basic_block src_bb;
1215 HARD_REG_SET live_at_edge;
1216 rtx mode_set;
1218 eg->aux = 0;
1220 if (! TEST_BIT (insert[e], j))
1221 continue;
1223 eg->aux = (void *)1;
1225 mode = current_mode[j];
1226 src_bb = eg->src;
1228 REG_SET_TO_HARD_REG_SET (live_at_edge,
1229 src_bb->global_live_at_end);
1231 start_sequence ();
1232 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1233 mode_set = get_insns ();
1234 end_sequence ();
1236 /* Do not bother to insert empty sequence. */
1237 if (mode_set == NULL_RTX)
1238 continue;
1240 /* If this is an abnormal edge, we'll insert at the end
1241 of the previous block. */
1242 if (eg->flags & EDGE_ABNORMAL)
1244 emited = true;
1245 if (GET_CODE (src_bb->end) == JUMP_INSN)
1246 emit_insn_before (mode_set, src_bb->end);
1247 /* It doesn't make sense to switch to normal mode
1248 after a CALL_INSN, so we're going to abort if we
1249 find one. The cases in which a CALL_INSN may
1250 have an abnormal edge are sibcalls and EH edges.
1251 In the case of sibcalls, the dest basic-block is
1252 the EXIT_BLOCK, that runs in normal mode; it is
1253 assumed that a sibcall insn requires normal mode
1254 itself, so no mode switch would be required after
1255 the call (it wouldn't make sense, anyway). In
1256 the case of EH edges, EH entry points also start
1257 in normal mode, so a similar reasoning applies. */
1258 else if (GET_CODE (src_bb->end) == INSN)
1259 emit_insn_after (mode_set, src_bb->end);
1260 else
1261 abort ();
1262 bb_info[j][src_bb->index].computing = mode;
1263 RESET_BIT (transp[src_bb->index], j);
1265 else
1267 need_commit = 1;
1268 insert_insn_on_edge (mode_set, eg);
1272 FOR_EACH_BB_REVERSE (bb)
1273 if (TEST_BIT (delete[bb->index], j))
1275 make_preds_opaque (bb, j);
1276 /* Cancel the 'deleted' mode set. */
1277 bb_info[j][bb->index].seginfo->mode = no_mode;
1281 clear_aux_for_edges ();
1282 free_edge_list (edge_list);
1285 /* Now output the remaining mode sets in all the segments. */
1286 for (j = n_entities - 1; j >= 0; j--)
1288 int no_mode = num_modes[entity_map[j]];
1290 FOR_EACH_BB_REVERSE (bb)
1292 struct seginfo *ptr, *next;
1293 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1295 next = ptr->next;
1296 if (ptr->mode != no_mode)
1298 rtx mode_set;
1300 start_sequence ();
1301 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1302 mode_set = get_insns ();
1303 end_sequence ();
1305 /* Do not bother to insert empty sequence. */
1306 if (mode_set == NULL_RTX)
1307 continue;
1309 emited = true;
1310 if (GET_CODE (ptr->insn_ptr) == NOTE
1311 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1312 == NOTE_INSN_BASIC_BLOCK))
1313 emit_insn_after (mode_set, ptr->insn_ptr);
1314 else
1315 emit_insn_before (mode_set, ptr->insn_ptr);
1318 free (ptr);
1322 free (bb_info[j]);
1325 /* Finished. Free up all the things we've allocated. */
1327 sbitmap_vector_free (kill);
1328 sbitmap_vector_free (antic);
1329 sbitmap_vector_free (transp);
1330 sbitmap_vector_free (comp);
1331 sbitmap_vector_free (delete);
1332 sbitmap_vector_free (insert);
1334 if (need_commit)
1335 commit_edge_insertions ();
1337 #ifdef NORMAL_MODE
1338 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1339 #else
1340 if (!need_commit && !emited)
1341 return 0;
1342 #endif
1344 max_regno = max_reg_num ();
1345 allocate_reg_info (max_regno, FALSE, FALSE);
1346 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1347 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1348 | PROP_SCAN_DEAD_CODE));
1350 return 1;
1352 #endif /* OPTIMIZE_MODE_SWITCHING */