* doc/install.texi (Prerequisites): New section documenting
[official-gcc.git] / gcc / lcm.c
blob8774fbd65db1ded704394cc66e196e6d54770b8e
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
21 /* These routines are meant to be used by various optimization
22 passes which can be modeled as lazy code motion problems.
23 Including, but not limited to:
25 * Traditional partial redundancy elimination.
27 * Placement of caller/caller register save/restores.
29 * Load/store motion.
31 * Copy motion.
33 * Conversion of flat register files to a stacked register
34 model.
36 * Dead load/store elimination.
38 These routines accept as input:
40 * Basic block information (number of blocks, lists of
41 predecessors and successors). Note the granularity
42 does not need to be basic block, they could be statements
43 or functions.
45 * Bitmaps of local properties (computed, transparent and
46 anticipatable expressions).
48 The output of these routines is bitmap of redundant computations
49 and a bitmap of optimal placement points. */
52 #include "config.h"
53 #include "system.h"
54 #include "coretypes.h"
55 #include "tm.h"
56 #include "rtl.h"
57 #include "regs.h"
58 #include "hard-reg-set.h"
59 #include "flags.h"
60 #include "real.h"
61 #include "insn-config.h"
62 #include "recog.h"
63 #include "basic-block.h"
64 #include "output.h"
65 #include "tm_p.h"
66 #include "function.h"
68 /* We want target macros for the mode switching code to be able to refer
69 to instruction attribute values. */
70 #include "insn-attr.h"
72 /* Edge based LCM routines. */
73 static void compute_antinout_edge PARAMS ((sbitmap *, sbitmap *,
74 sbitmap *, sbitmap *));
75 static void compute_earliest PARAMS ((struct edge_list *, int,
76 sbitmap *, sbitmap *,
77 sbitmap *, sbitmap *,
78 sbitmap *));
79 static void compute_laterin PARAMS ((struct edge_list *, sbitmap *,
80 sbitmap *, sbitmap *,
81 sbitmap *));
82 static void compute_insert_delete PARAMS ((struct edge_list *edge_list,
83 sbitmap *, sbitmap *,
84 sbitmap *, sbitmap *,
85 sbitmap *));
87 /* Edge based LCM routines on a reverse flowgraph. */
88 static void compute_farthest PARAMS ((struct edge_list *, int,
89 sbitmap *, sbitmap *,
90 sbitmap*, sbitmap *,
91 sbitmap *));
92 static void compute_nearerout PARAMS ((struct edge_list *, sbitmap *,
93 sbitmap *, sbitmap *,
94 sbitmap *));
95 static void compute_rev_insert_delete PARAMS ((struct edge_list *edge_list,
96 sbitmap *, sbitmap *,
97 sbitmap *, sbitmap *,
98 sbitmap *));
100 /* Edge based lcm routines. */
102 /* Compute expression anticipatability at entrance and exit of each block.
103 This is done based on the flow graph, and not on the pred-succ lists.
104 Other than that, its pretty much identical to compute_antinout. */
106 static void
107 compute_antinout_edge (antloc, transp, antin, antout)
108 sbitmap *antloc;
109 sbitmap *transp;
110 sbitmap *antin;
111 sbitmap *antout;
113 basic_block bb;
114 edge e;
115 basic_block *worklist, *qin, *qout, *qend;
116 unsigned int qlen;
118 /* Allocate a worklist array/queue. Entries are only added to the
119 list if they were not already on the list. So the size is
120 bounded by the number of basic blocks. */
121 qin = qout = worklist
122 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
124 /* We want a maximal solution, so make an optimistic initialization of
125 ANTIN. */
126 sbitmap_vector_ones (antin, last_basic_block);
128 /* Put every block on the worklist; this is necessary because of the
129 optimistic initialization of ANTIN above. */
130 FOR_EACH_BB_REVERSE (bb)
132 *qin++ = bb;
133 bb->aux = bb;
136 qin = worklist;
137 qend = &worklist[n_basic_blocks];
138 qlen = n_basic_blocks;
140 /* Mark blocks which are predecessors of the exit block so that we
141 can easily identify them below. */
142 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
143 e->src->aux = EXIT_BLOCK_PTR;
145 /* Iterate until the worklist is empty. */
146 while (qlen)
148 /* Take the first entry off the worklist. */
149 bb = *qout++;
150 qlen--;
152 if (qout >= qend)
153 qout = worklist;
155 if (bb->aux == EXIT_BLOCK_PTR)
156 /* Do not clear the aux field for blocks which are predecessors of
157 the EXIT block. That way we never add then to the worklist
158 again. */
159 sbitmap_zero (antout[bb->index]);
160 else
162 /* Clear the aux field of this block so that it can be added to
163 the worklist again if necessary. */
164 bb->aux = NULL;
165 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
168 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
169 transp[bb->index], antout[bb->index]))
170 /* If the in state of this block changed, then we need
171 to add the predecessors of this block to the worklist
172 if they are not already on the worklist. */
173 for (e = bb->pred; e; e = e->pred_next)
174 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
176 *qin++ = e->src;
177 e->src->aux = e;
178 qlen++;
179 if (qin >= qend)
180 qin = worklist;
184 clear_aux_for_edges ();
185 clear_aux_for_blocks ();
186 free (worklist);
189 /* Compute the earliest vector for edge based lcm. */
191 static void
192 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest)
193 struct edge_list *edge_list;
194 int n_exprs;
195 sbitmap *antin, *antout, *avout, *kill, *earliest;
197 sbitmap difference, temp_bitmap;
198 int x, num_edges;
199 basic_block pred, succ;
201 num_edges = NUM_EDGES (edge_list);
203 difference = sbitmap_alloc (n_exprs);
204 temp_bitmap = sbitmap_alloc (n_exprs);
206 for (x = 0; x < num_edges; x++)
208 pred = INDEX_EDGE_PRED_BB (edge_list, x);
209 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
210 if (pred == ENTRY_BLOCK_PTR)
211 sbitmap_copy (earliest[x], antin[succ->index]);
212 else
214 if (succ == EXIT_BLOCK_PTR)
215 sbitmap_zero (earliest[x]);
216 else
218 sbitmap_difference (difference, antin[succ->index],
219 avout[pred->index]);
220 sbitmap_not (temp_bitmap, antout[pred->index]);
221 sbitmap_a_and_b_or_c (earliest[x], difference,
222 kill[pred->index], temp_bitmap);
227 sbitmap_free (temp_bitmap);
228 sbitmap_free (difference);
231 /* later(p,s) is dependent on the calculation of laterin(p).
232 laterin(p) is dependent on the calculation of later(p2,p).
234 laterin(ENTRY) is defined as all 0's
235 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
236 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
238 If we progress in this manner, starting with all basic blocks
239 in the work list, anytime we change later(bb), we need to add
240 succs(bb) to the worklist if they are not already on the worklist.
242 Boundary conditions:
244 We prime the worklist all the normal basic blocks. The ENTRY block can
245 never be added to the worklist since it is never the successor of any
246 block. We explicitly prevent the EXIT block from being added to the
247 worklist.
249 We optimistically initialize LATER. That is the only time this routine
250 will compute LATER for an edge out of the entry block since the entry
251 block is never on the worklist. Thus, LATERIN is neither used nor
252 computed for the ENTRY block.
254 Since the EXIT block is never added to the worklist, we will neither
255 use nor compute LATERIN for the exit block. Edges which reach the
256 EXIT block are handled in the normal fashion inside the loop. However,
257 the insertion/deletion computation needs LATERIN(EXIT), so we have
258 to compute it. */
260 static void
261 compute_laterin (edge_list, earliest, antloc, later, laterin)
262 struct edge_list *edge_list;
263 sbitmap *earliest, *antloc, *later, *laterin;
265 int num_edges, i;
266 edge e;
267 basic_block *worklist, *qin, *qout, *qend, bb;
268 unsigned int qlen;
270 num_edges = NUM_EDGES (edge_list);
272 /* Allocate a worklist array/queue. Entries are only added to the
273 list if they were not already on the list. So the size is
274 bounded by the number of basic blocks. */
275 qin = qout = worklist
276 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
278 /* Initialize a mapping from each edge to its index. */
279 for (i = 0; i < num_edges; i++)
280 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
282 /* We want a maximal solution, so initially consider LATER true for
283 all edges. This allows propagation through a loop since the incoming
284 loop edge will have LATER set, so if all the other incoming edges
285 to the loop are set, then LATERIN will be set for the head of the
286 loop.
288 If the optimistic setting of LATER on that edge was incorrect (for
289 example the expression is ANTLOC in a block within the loop) then
290 this algorithm will detect it when we process the block at the head
291 of the optimistic edge. That will requeue the affected blocks. */
292 sbitmap_vector_ones (later, num_edges);
294 /* Note that even though we want an optimistic setting of LATER, we
295 do not want to be overly optimistic. Consider an outgoing edge from
296 the entry block. That edge should always have a LATER value the
297 same as EARLIEST for that edge. */
298 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
299 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
301 /* Add all the blocks to the worklist. This prevents an early exit from
302 the loop given our optimistic initialization of LATER above. */
303 FOR_EACH_BB (bb)
305 *qin++ = bb;
306 bb->aux = bb;
308 qin = worklist;
309 /* Note that we do not use the last allocated element for our queue,
310 as EXIT_BLOCK is never inserted into it. In fact the above allocation
311 of n_basic_blocks + 1 elements is not necessary. */
312 qend = &worklist[n_basic_blocks];
313 qlen = n_basic_blocks;
315 /* Iterate until the worklist is empty. */
316 while (qlen)
318 /* Take the first entry off the worklist. */
319 bb = *qout++;
320 bb->aux = NULL;
321 qlen--;
322 if (qout >= qend)
323 qout = worklist;
325 /* Compute the intersection of LATERIN for each incoming edge to B. */
326 sbitmap_ones (laterin[bb->index]);
327 for (e = bb->pred; e != NULL; e = e->pred_next)
328 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
330 /* Calculate LATER for all outgoing edges. */
331 for (e = bb->succ; e != NULL; e = e->succ_next)
332 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
333 earliest[(size_t) e->aux],
334 laterin[e->src->index],
335 antloc[e->src->index])
336 /* If LATER for an outgoing edge was changed, then we need
337 to add the target of the outgoing edge to the worklist. */
338 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
340 *qin++ = e->dest;
341 e->dest->aux = e;
342 qlen++;
343 if (qin >= qend)
344 qin = worklist;
348 /* Computation of insertion and deletion points requires computing LATERIN
349 for the EXIT block. We allocated an extra entry in the LATERIN array
350 for just this purpose. */
351 sbitmap_ones (laterin[last_basic_block]);
352 for (e = EXIT_BLOCK_PTR->pred; e != NULL; e = e->pred_next)
353 sbitmap_a_and_b (laterin[last_basic_block],
354 laterin[last_basic_block],
355 later[(size_t) e->aux]);
357 clear_aux_for_edges ();
358 free (worklist);
361 /* Compute the insertion and deletion points for edge based LCM. */
363 static void
364 compute_insert_delete (edge_list, antloc, later, laterin,
365 insert, delete)
366 struct edge_list *edge_list;
367 sbitmap *antloc, *later, *laterin, *insert, *delete;
369 int x;
370 basic_block bb;
372 FOR_EACH_BB (bb)
373 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
375 for (x = 0; x < NUM_EDGES (edge_list); x++)
377 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
379 if (b == EXIT_BLOCK_PTR)
380 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
381 else
382 sbitmap_difference (insert[x], later[x], laterin[b->index]);
386 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
387 delete vectors for edge based LCM. Returns an edgelist which is used to
388 map the insert vector to what edge an expression should be inserted on. */
390 struct edge_list *
391 pre_edge_lcm (file, n_exprs, transp, avloc, antloc, kill, insert, delete)
392 FILE *file ATTRIBUTE_UNUSED;
393 int n_exprs;
394 sbitmap *transp;
395 sbitmap *avloc;
396 sbitmap *antloc;
397 sbitmap *kill;
398 sbitmap **insert;
399 sbitmap **delete;
401 sbitmap *antin, *antout, *earliest;
402 sbitmap *avin, *avout;
403 sbitmap *later, *laterin;
404 struct edge_list *edge_list;
405 int num_edges;
407 edge_list = create_edge_list ();
408 num_edges = NUM_EDGES (edge_list);
410 #ifdef LCM_DEBUG_INFO
411 if (file)
413 fprintf (file, "Edge List:\n");
414 verify_edge_list (file, edge_list);
415 print_edge_list (file, edge_list);
416 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
417 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
418 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
419 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
421 #endif
423 /* Compute global availability. */
424 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
425 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
426 compute_available (avloc, kill, avout, avin);
427 sbitmap_vector_free (avin);
429 /* Compute global anticipatability. */
430 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
431 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
432 compute_antinout_edge (antloc, transp, antin, antout);
434 #ifdef LCM_DEBUG_INFO
435 if (file)
437 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
438 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
440 #endif
442 /* Compute earliestness. */
443 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
444 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
446 #ifdef LCM_DEBUG_INFO
447 if (file)
448 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
449 #endif
451 sbitmap_vector_free (antout);
452 sbitmap_vector_free (antin);
453 sbitmap_vector_free (avout);
455 later = sbitmap_vector_alloc (num_edges, n_exprs);
457 /* Allocate an extra element for the exit block in the laterin vector. */
458 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
459 compute_laterin (edge_list, earliest, antloc, later, laterin);
461 #ifdef LCM_DEBUG_INFO
462 if (file)
464 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
465 dump_sbitmap_vector (file, "later", "", later, num_edges);
467 #endif
469 sbitmap_vector_free (earliest);
471 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
472 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
473 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
475 sbitmap_vector_free (laterin);
476 sbitmap_vector_free (later);
478 #ifdef LCM_DEBUG_INFO
479 if (file)
481 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
482 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
483 last_basic_block);
485 #endif
487 return edge_list;
490 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
491 Return the number of passes we performed to iterate to a solution. */
493 void
494 compute_available (avloc, kill, avout, avin)
495 sbitmap *avloc, *kill, *avout, *avin;
497 edge e;
498 basic_block *worklist, *qin, *qout, *qend, bb;
499 unsigned int qlen;
501 /* Allocate a worklist array/queue. Entries are only added to the
502 list if they were not already on the list. So the size is
503 bounded by the number of basic blocks. */
504 qin = qout = worklist
505 = (basic_block *) xmalloc (sizeof (basic_block) * n_basic_blocks);
507 /* We want a maximal solution. */
508 sbitmap_vector_ones (avout, last_basic_block);
510 /* Put every block on the worklist; this is necessary because of the
511 optimistic initialization of AVOUT above. */
512 FOR_EACH_BB (bb)
514 *qin++ = bb;
515 bb->aux = bb;
518 qin = worklist;
519 qend = &worklist[n_basic_blocks];
520 qlen = n_basic_blocks;
522 /* Mark blocks which are successors of the entry block so that we
523 can easily identify them below. */
524 for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next)
525 e->dest->aux = ENTRY_BLOCK_PTR;
527 /* Iterate until the worklist is empty. */
528 while (qlen)
530 /* Take the first entry off the worklist. */
531 bb = *qout++;
532 qlen--;
534 if (qout >= qend)
535 qout = worklist;
537 /* If one of the predecessor blocks is the ENTRY block, then the
538 intersection of avouts is the null set. We can identify such blocks
539 by the special value in the AUX field in the block structure. */
540 if (bb->aux == ENTRY_BLOCK_PTR)
541 /* Do not clear the aux field for blocks which are successors of the
542 ENTRY block. That way we never add then to the worklist again. */
543 sbitmap_zero (avin[bb->index]);
544 else
546 /* Clear the aux field of this block so that it can be added to
547 the worklist again if necessary. */
548 bb->aux = NULL;
549 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
552 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
553 /* If the out state of this block changed, then we need
554 to add the successors of this block to the worklist
555 if they are not already on the worklist. */
556 for (e = bb->succ; e; e = e->succ_next)
557 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
559 *qin++ = e->dest;
560 e->dest->aux = e;
561 qlen++;
563 if (qin >= qend)
564 qin = worklist;
568 clear_aux_for_edges ();
569 clear_aux_for_blocks ();
570 free (worklist);
573 /* Compute the farthest vector for edge based lcm. */
575 static void
576 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
577 kill, farthest)
578 struct edge_list *edge_list;
579 int n_exprs;
580 sbitmap *st_avout, *st_avin, *st_antin, *kill, *farthest;
582 sbitmap difference, temp_bitmap;
583 int x, num_edges;
584 basic_block pred, succ;
586 num_edges = NUM_EDGES (edge_list);
588 difference = sbitmap_alloc (n_exprs);
589 temp_bitmap = sbitmap_alloc (n_exprs);
591 for (x = 0; x < num_edges; x++)
593 pred = INDEX_EDGE_PRED_BB (edge_list, x);
594 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
595 if (succ == EXIT_BLOCK_PTR)
596 sbitmap_copy (farthest[x], st_avout[pred->index]);
597 else
599 if (pred == ENTRY_BLOCK_PTR)
600 sbitmap_zero (farthest[x]);
601 else
603 sbitmap_difference (difference, st_avout[pred->index],
604 st_antin[succ->index]);
605 sbitmap_not (temp_bitmap, st_avin[succ->index]);
606 sbitmap_a_and_b_or_c (farthest[x], difference,
607 kill[succ->index], temp_bitmap);
612 sbitmap_free (temp_bitmap);
613 sbitmap_free (difference);
616 /* Compute nearer and nearerout vectors for edge based lcm.
618 This is the mirror of compute_laterin, additional comments on the
619 implementation can be found before compute_laterin. */
621 static void
622 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout)
623 struct edge_list *edge_list;
624 sbitmap *farthest, *st_avloc, *nearer, *nearerout;
626 int num_edges, i;
627 edge e;
628 basic_block *worklist, *tos, bb;
630 num_edges = NUM_EDGES (edge_list);
632 /* Allocate a worklist array/queue. Entries are only added to the
633 list if they were not already on the list. So the size is
634 bounded by the number of basic blocks. */
635 tos = worklist
636 = (basic_block *) xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
638 /* Initialize NEARER for each edge and build a mapping from an edge to
639 its index. */
640 for (i = 0; i < num_edges; i++)
641 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
643 /* We want a maximal solution. */
644 sbitmap_vector_ones (nearer, num_edges);
646 /* Note that even though we want an optimistic setting of NEARER, we
647 do not want to be overly optimistic. Consider an incoming edge to
648 the exit block. That edge should always have a NEARER value the
649 same as FARTHEST for that edge. */
650 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
651 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
653 /* Add all the blocks to the worklist. This prevents an early exit
654 from the loop given our optimistic initialization of NEARER. */
655 FOR_EACH_BB (bb)
657 *tos++ = bb;
658 bb->aux = bb;
661 /* Iterate until the worklist is empty. */
662 while (tos != worklist)
664 /* Take the first entry off the worklist. */
665 bb = *--tos;
666 bb->aux = NULL;
668 /* Compute the intersection of NEARER for each outgoing edge from B. */
669 sbitmap_ones (nearerout[bb->index]);
670 for (e = bb->succ; e != NULL; e = e->succ_next)
671 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
672 nearer[(size_t) e->aux]);
674 /* Calculate NEARER for all incoming edges. */
675 for (e = bb->pred; e != NULL; e = e->pred_next)
676 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
677 farthest[(size_t) e->aux],
678 nearerout[e->dest->index],
679 st_avloc[e->dest->index])
680 /* If NEARER for an incoming edge was changed, then we need
681 to add the source of the incoming edge to the worklist. */
682 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
684 *tos++ = e->src;
685 e->src->aux = e;
689 /* Computation of insertion and deletion points requires computing NEAREROUT
690 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
691 for just this purpose. */
692 sbitmap_ones (nearerout[last_basic_block]);
693 for (e = ENTRY_BLOCK_PTR->succ; e != NULL; e = e->succ_next)
694 sbitmap_a_and_b (nearerout[last_basic_block],
695 nearerout[last_basic_block],
696 nearer[(size_t) e->aux]);
698 clear_aux_for_edges ();
699 free (tos);
702 /* Compute the insertion and deletion points for edge based LCM. */
704 static void
705 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
706 insert, delete)
707 struct edge_list *edge_list;
708 sbitmap *st_avloc, *nearer, *nearerout, *insert, *delete;
710 int x;
711 basic_block bb;
713 FOR_EACH_BB (bb)
714 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
716 for (x = 0; x < NUM_EDGES (edge_list); x++)
718 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
719 if (b == ENTRY_BLOCK_PTR)
720 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
721 else
722 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
726 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
727 insert and delete vectors for edge based reverse LCM. Returns an
728 edgelist which is used to map the insert vector to what edge
729 an expression should be inserted on. */
731 struct edge_list *
732 pre_edge_rev_lcm (file, n_exprs, transp, st_avloc, st_antloc, kill,
733 insert, delete)
734 FILE *file ATTRIBUTE_UNUSED;
735 int n_exprs;
736 sbitmap *transp;
737 sbitmap *st_avloc;
738 sbitmap *st_antloc;
739 sbitmap *kill;
740 sbitmap **insert;
741 sbitmap **delete;
743 sbitmap *st_antin, *st_antout;
744 sbitmap *st_avout, *st_avin, *farthest;
745 sbitmap *nearer, *nearerout;
746 struct edge_list *edge_list;
747 int num_edges;
749 edge_list = create_edge_list ();
750 num_edges = NUM_EDGES (edge_list);
752 st_antin = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
753 st_antout = (sbitmap *) sbitmap_vector_alloc (last_basic_block, n_exprs);
754 sbitmap_vector_zero (st_antin, last_basic_block);
755 sbitmap_vector_zero (st_antout, last_basic_block);
756 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
758 /* Compute global anticipatability. */
759 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
760 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
761 compute_available (st_avloc, kill, st_avout, st_avin);
763 #ifdef LCM_DEBUG_INFO
764 if (file)
766 fprintf (file, "Edge List:\n");
767 verify_edge_list (file, edge_list);
768 print_edge_list (file, edge_list);
769 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
770 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
771 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
772 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
773 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
774 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
776 #endif
778 #ifdef LCM_DEBUG_INFO
779 if (file)
781 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
782 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
784 #endif
786 /* Compute farthestness. */
787 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
788 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
789 kill, farthest);
791 #ifdef LCM_DEBUG_INFO
792 if (file)
793 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
794 #endif
796 sbitmap_vector_free (st_antin);
797 sbitmap_vector_free (st_antout);
799 sbitmap_vector_free (st_avin);
800 sbitmap_vector_free (st_avout);
802 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
804 /* Allocate an extra element for the entry block. */
805 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
806 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
808 #ifdef LCM_DEBUG_INFO
809 if (file)
811 dump_sbitmap_vector (file, "nearerout", "", nearerout,
812 last_basic_block + 1);
813 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
815 #endif
817 sbitmap_vector_free (farthest);
819 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
820 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
821 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
822 *insert, *delete);
824 sbitmap_vector_free (nearerout);
825 sbitmap_vector_free (nearer);
827 #ifdef LCM_DEBUG_INFO
828 if (file)
830 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
831 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
832 last_basic_block);
834 #endif
835 return edge_list;
838 /* Mode switching:
840 The algorithm for setting the modes consists of scanning the insn list
841 and finding all the insns which require a specific mode. Each insn gets
842 a unique struct seginfo element. These structures are inserted into a list
843 for each basic block. For each entity, there is an array of bb_info over
844 the flow graph basic blocks (local var 'bb_info'), and contains a list
845 of all insns within that basic block, in the order they are encountered.
847 For each entity, any basic block WITHOUT any insns requiring a specific
848 mode are given a single entry, without a mode. (Each basic block
849 in the flow graph must have at least one entry in the segment table.)
851 The LCM algorithm is then run over the flow graph to determine where to
852 place the sets to the highest-priority value in respect of first the first
853 insn in any one block. Any adjustments required to the transparency
854 vectors are made, then the next iteration starts for the next-lower
855 priority mode, till for each entity all modes are exhausted.
857 More details are located in the code for optimize_mode_switching(). */
859 /* This structure contains the information for each insn which requires
860 either single or double mode to be set.
861 MODE is the mode this insn must be executed in.
862 INSN_PTR is the insn to be executed (may be the note that marks the
863 beginning of a basic block).
864 BBNUM is the flow graph basic block this insn occurs in.
865 NEXT is the next insn in the same basic block. */
866 struct seginfo
868 int mode;
869 rtx insn_ptr;
870 int bbnum;
871 struct seginfo *next;
872 HARD_REG_SET regs_live;
875 struct bb_info
877 struct seginfo *seginfo;
878 int computing;
881 /* These bitmaps are used for the LCM algorithm. */
883 #ifdef OPTIMIZE_MODE_SWITCHING
884 static sbitmap *antic;
885 static sbitmap *transp;
886 static sbitmap *comp;
887 static sbitmap *delete;
888 static sbitmap *insert;
890 static struct seginfo * new_seginfo PARAMS ((int, rtx, int, HARD_REG_SET));
891 static void add_seginfo PARAMS ((struct bb_info *, struct seginfo *));
892 static void reg_dies PARAMS ((rtx, HARD_REG_SET));
893 static void reg_becomes_live PARAMS ((rtx, rtx, void *));
894 static void make_preds_opaque PARAMS ((basic_block, int));
895 #endif
897 #ifdef OPTIMIZE_MODE_SWITCHING
899 /* This function will allocate a new BBINFO structure, initialized
900 with the MODE, INSN, and basic block BB parameters. */
902 static struct seginfo *
903 new_seginfo (mode, insn, bb, regs_live)
904 int mode;
905 rtx insn;
906 int bb;
907 HARD_REG_SET regs_live;
909 struct seginfo *ptr;
910 ptr = xmalloc (sizeof (struct seginfo));
911 ptr->mode = mode;
912 ptr->insn_ptr = insn;
913 ptr->bbnum = bb;
914 ptr->next = NULL;
915 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
916 return ptr;
919 /* Add a seginfo element to the end of a list.
920 HEAD is a pointer to the list beginning.
921 INFO is the structure to be linked in. */
923 static void
924 add_seginfo (head, info)
925 struct bb_info *head;
926 struct seginfo *info;
928 struct seginfo *ptr;
930 if (head->seginfo == NULL)
931 head->seginfo = info;
932 else
934 ptr = head->seginfo;
935 while (ptr->next != NULL)
936 ptr = ptr->next;
937 ptr->next = info;
941 /* Make all predecessors of basic block B opaque, recursively, till we hit
942 some that are already non-transparent, or an edge where aux is set; that
943 denotes that a mode set is to be done on that edge.
944 J is the bit number in the bitmaps that corresponds to the entity that
945 we are currently handling mode-switching for. */
947 static void
948 make_preds_opaque (b, j)
949 basic_block b;
950 int j;
952 edge e;
954 for (e = b->pred; e; e = e->pred_next)
956 basic_block pb = e->src;
958 if (e->aux || ! TEST_BIT (transp[pb->index], j))
959 continue;
961 RESET_BIT (transp[pb->index], j);
962 make_preds_opaque (pb, j);
966 /* Record in LIVE that register REG died. */
968 static void
969 reg_dies (reg, live)
970 rtx reg;
971 HARD_REG_SET live;
973 int regno, nregs;
975 if (GET_CODE (reg) != REG)
976 return;
978 regno = REGNO (reg);
979 if (regno < FIRST_PSEUDO_REGISTER)
980 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
981 nregs--)
982 CLEAR_HARD_REG_BIT (live, regno + nregs);
985 /* Record in LIVE that register REG became live.
986 This is called via note_stores. */
988 static void
989 reg_becomes_live (reg, setter, live)
990 rtx reg;
991 rtx setter ATTRIBUTE_UNUSED;
992 void *live;
994 int regno, nregs;
996 if (GET_CODE (reg) == SUBREG)
997 reg = SUBREG_REG (reg);
999 if (GET_CODE (reg) != REG)
1000 return;
1002 regno = REGNO (reg);
1003 if (regno < FIRST_PSEUDO_REGISTER)
1004 for (nregs = HARD_REGNO_NREGS (regno, GET_MODE (reg)) - 1; nregs >= 0;
1005 nregs--)
1006 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
1009 /* Find all insns that need a particular mode setting, and insert the
1010 necessary mode switches. Return true if we did work. */
1013 optimize_mode_switching (file)
1014 FILE *file;
1016 rtx insn;
1017 int e;
1018 basic_block bb;
1019 int need_commit = 0;
1020 sbitmap *kill;
1021 struct edge_list *edge_list;
1022 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1023 #define N_ENTITIES ARRAY_SIZE (num_modes)
1024 int entity_map[N_ENTITIES];
1025 struct bb_info *bb_info[N_ENTITIES];
1026 int i, j;
1027 int n_entities;
1028 int max_num_modes = 0;
1029 bool emited = false;
1030 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
1032 clear_bb_flags ();
1034 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1035 if (OPTIMIZE_MODE_SWITCHING (e))
1037 int entry_exit_extra = 0;
1039 /* Create the list of segments within each basic block.
1040 If NORMAL_MODE is defined, allow for two extra
1041 blocks split from the entry and exit block. */
1042 #ifdef NORMAL_MODE
1043 entry_exit_extra = 2;
1044 #endif
1045 bb_info[n_entities]
1046 = (struct bb_info *) xcalloc (last_basic_block + entry_exit_extra,
1047 sizeof **bb_info);
1048 entity_map[n_entities++] = e;
1049 if (num_modes[e] > max_num_modes)
1050 max_num_modes = num_modes[e];
1053 if (! n_entities)
1054 return 0;
1056 #ifdef NORMAL_MODE
1058 /* Split the edge from the entry block and the fallthrough edge to the
1059 exit block, so that we can note that there NORMAL_MODE is supplied /
1060 required. */
1061 edge eg;
1062 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1063 /* The only non-call predecessor at this stage is a block with a
1064 fallthrough edge; there can be at most one, but there could be
1065 none at all, e.g. when exit is called. */
1066 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1067 if (eg->flags & EDGE_FALLTHRU)
1069 regset live_at_end = eg->src->global_live_at_end;
1071 if (pre_exit)
1072 abort ();
1073 pre_exit = split_edge (eg);
1074 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1075 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1078 #endif
1080 /* Create the bitmap vectors. */
1082 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1083 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1084 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1086 sbitmap_vector_ones (transp, last_basic_block);
1088 for (j = n_entities - 1; j >= 0; j--)
1090 int e = entity_map[j];
1091 int no_mode = num_modes[e];
1092 struct bb_info *info = bb_info[j];
1094 /* Determine what the first use (if any) need for a mode of entity E is.
1095 This will be the mode that is anticipatable for this block.
1096 Also compute the initial transparency settings. */
1097 FOR_EACH_BB (bb)
1099 struct seginfo *ptr;
1100 int last_mode = no_mode;
1101 HARD_REG_SET live_now;
1103 REG_SET_TO_HARD_REG_SET (live_now,
1104 bb->global_live_at_start);
1105 for (insn = bb->head;
1106 insn != NULL && insn != NEXT_INSN (bb->end);
1107 insn = NEXT_INSN (insn))
1109 if (INSN_P (insn))
1111 int mode = MODE_NEEDED (e, insn);
1112 rtx link;
1114 if (mode != no_mode && mode != last_mode)
1116 last_mode = mode;
1117 ptr = new_seginfo (mode, insn, bb->index, live_now);
1118 add_seginfo (info + bb->index, ptr);
1119 RESET_BIT (transp[bb->index], j);
1122 /* Update LIVE_NOW. */
1123 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1124 if (REG_NOTE_KIND (link) == REG_DEAD)
1125 reg_dies (XEXP (link, 0), live_now);
1127 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1128 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1129 if (REG_NOTE_KIND (link) == REG_UNUSED)
1130 reg_dies (XEXP (link, 0), live_now);
1134 info[bb->index].computing = last_mode;
1135 /* Check for blocks without ANY mode requirements. */
1136 if (last_mode == no_mode)
1138 ptr = new_seginfo (no_mode, bb->end, bb->index, live_now);
1139 add_seginfo (info + bb->index, ptr);
1142 #ifdef NORMAL_MODE
1144 int mode = NORMAL_MODE (e);
1146 if (mode != no_mode)
1148 bb = post_entry;
1150 /* By always making this nontransparent, we save
1151 an extra check in make_preds_opaque. We also
1152 need this to avoid confusing pre_edge_lcm when
1153 antic is cleared but transp and comp are set. */
1154 RESET_BIT (transp[bb->index], j);
1156 /* Insert a fake computing definition of MODE into entry
1157 blocks which compute no mode. This represents the mode on
1158 entry. */
1159 info[bb->index].computing = mode;
1161 if (pre_exit)
1162 info[pre_exit->index].seginfo->mode = mode;
1165 #endif /* NORMAL_MODE */
1168 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1169 for (i = 0; i < max_num_modes; i++)
1171 int current_mode[N_ENTITIES];
1173 /* Set the anticipatable and computing arrays. */
1174 sbitmap_vector_zero (antic, last_basic_block);
1175 sbitmap_vector_zero (comp, last_basic_block);
1176 for (j = n_entities - 1; j >= 0; j--)
1178 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1179 struct bb_info *info = bb_info[j];
1181 FOR_EACH_BB (bb)
1183 if (info[bb->index].seginfo->mode == m)
1184 SET_BIT (antic[bb->index], j);
1186 if (info[bb->index].computing == m)
1187 SET_BIT (comp[bb->index], j);
1191 /* Calculate the optimal locations for the
1192 placement mode switches to modes with priority I. */
1194 FOR_EACH_BB (bb)
1195 sbitmap_not (kill[bb->index], transp[bb->index]);
1196 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1197 kill, &insert, &delete);
1199 for (j = n_entities - 1; j >= 0; j--)
1201 /* Insert all mode sets that have been inserted by lcm. */
1202 int no_mode = num_modes[entity_map[j]];
1204 /* Wherever we have moved a mode setting upwards in the flow graph,
1205 the blocks between the new setting site and the now redundant
1206 computation ceases to be transparent for any lower-priority
1207 mode of the same entity. First set the aux field of each
1208 insertion site edge non-transparent, then propagate the new
1209 non-transparency from the redundant computation upwards till
1210 we hit an insertion site or an already non-transparent block. */
1211 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1213 edge eg = INDEX_EDGE (edge_list, e);
1214 int mode;
1215 basic_block src_bb;
1216 HARD_REG_SET live_at_edge;
1217 rtx mode_set;
1219 eg->aux = 0;
1221 if (! TEST_BIT (insert[e], j))
1222 continue;
1224 eg->aux = (void *)1;
1226 mode = current_mode[j];
1227 src_bb = eg->src;
1229 REG_SET_TO_HARD_REG_SET (live_at_edge,
1230 src_bb->global_live_at_end);
1232 start_sequence ();
1233 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1234 mode_set = get_insns ();
1235 end_sequence ();
1237 /* Do not bother to insert empty sequence. */
1238 if (mode_set == NULL_RTX)
1239 continue;
1241 /* If this is an abnormal edge, we'll insert at the end
1242 of the previous block. */
1243 if (eg->flags & EDGE_ABNORMAL)
1245 emited = true;
1246 if (GET_CODE (src_bb->end) == JUMP_INSN)
1247 emit_insn_before (mode_set, src_bb->end);
1248 /* It doesn't make sense to switch to normal mode
1249 after a CALL_INSN, so we're going to abort if we
1250 find one. The cases in which a CALL_INSN may
1251 have an abnormal edge are sibcalls and EH edges.
1252 In the case of sibcalls, the dest basic-block is
1253 the EXIT_BLOCK, that runs in normal mode; it is
1254 assumed that a sibcall insn requires normal mode
1255 itself, so no mode switch would be required after
1256 the call (it wouldn't make sense, anyway). In
1257 the case of EH edges, EH entry points also start
1258 in normal mode, so a similar reasoning applies. */
1259 else if (GET_CODE (src_bb->end) == INSN)
1260 emit_insn_after (mode_set, src_bb->end);
1261 else
1262 abort ();
1263 bb_info[j][src_bb->index].computing = mode;
1264 RESET_BIT (transp[src_bb->index], j);
1266 else
1268 need_commit = 1;
1269 insert_insn_on_edge (mode_set, eg);
1273 FOR_EACH_BB_REVERSE (bb)
1274 if (TEST_BIT (delete[bb->index], j))
1276 make_preds_opaque (bb, j);
1277 /* Cancel the 'deleted' mode set. */
1278 bb_info[j][bb->index].seginfo->mode = no_mode;
1282 clear_aux_for_edges ();
1283 free_edge_list (edge_list);
1286 /* Now output the remaining mode sets in all the segments. */
1287 for (j = n_entities - 1; j >= 0; j--)
1289 int no_mode = num_modes[entity_map[j]];
1291 FOR_EACH_BB_REVERSE (bb)
1293 struct seginfo *ptr, *next;
1294 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1296 next = ptr->next;
1297 if (ptr->mode != no_mode)
1299 rtx mode_set;
1301 start_sequence ();
1302 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1303 mode_set = get_insns ();
1304 end_sequence ();
1306 /* Do not bother to insert empty sequence. */
1307 if (mode_set == NULL_RTX)
1308 continue;
1310 emited = true;
1311 if (GET_CODE (ptr->insn_ptr) == NOTE
1312 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1313 == NOTE_INSN_BASIC_BLOCK))
1314 emit_insn_after (mode_set, ptr->insn_ptr);
1315 else
1316 emit_insn_before (mode_set, ptr->insn_ptr);
1319 free (ptr);
1323 free (bb_info[j]);
1326 /* Finished. Free up all the things we've allocated. */
1328 sbitmap_vector_free (kill);
1329 sbitmap_vector_free (antic);
1330 sbitmap_vector_free (transp);
1331 sbitmap_vector_free (comp);
1332 sbitmap_vector_free (delete);
1333 sbitmap_vector_free (insert);
1335 if (need_commit)
1336 commit_edge_insertions ();
1338 #ifdef NORMAL_MODE
1339 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1340 #else
1341 if (!need_commit && !emited)
1342 return 0;
1343 #endif
1345 max_regno = max_reg_num ();
1346 allocate_reg_info (max_regno, FALSE, FALSE);
1347 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1348 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1349 | PROP_SCAN_DEAD_CODE));
1351 return 1;
1353 #endif /* OPTIMIZE_MODE_SWITCHING */