* basic-block.h (FOR_EACH_EDGE): Record initial edge count.
[official-gcc.git] / gcc / lcm.c
blobbdd83cfdd0f104e1323f7db87c84c7c9b35f9974
1 /* Generic partial redundancy elimination with lazy code motion support.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* These routines are meant to be used by various optimization
23 passes which can be modeled as lazy code motion problems.
24 Including, but not limited to:
26 * Traditional partial redundancy elimination.
28 * Placement of caller/caller register save/restores.
30 * Load/store motion.
32 * Copy motion.
34 * Conversion of flat register files to a stacked register
35 model.
37 * Dead load/store elimination.
39 These routines accept as input:
41 * Basic block information (number of blocks, lists of
42 predecessors and successors). Note the granularity
43 does not need to be basic block, they could be statements
44 or functions.
46 * Bitmaps of local properties (computed, transparent and
47 anticipatable expressions).
49 The output of these routines is bitmap of redundant computations
50 and a bitmap of optimal placement points. */
53 #include "config.h"
54 #include "system.h"
55 #include "coretypes.h"
56 #include "tm.h"
57 #include "rtl.h"
58 #include "regs.h"
59 #include "hard-reg-set.h"
60 #include "flags.h"
61 #include "real.h"
62 #include "insn-config.h"
63 #include "recog.h"
64 #include "basic-block.h"
65 #include "output.h"
66 #include "tm_p.h"
67 #include "function.h"
69 /* We want target macros for the mode switching code to be able to refer
70 to instruction attribute values. */
71 #include "insn-attr.h"
73 /* Edge based LCM routines. */
74 static void compute_antinout_edge (sbitmap *, sbitmap *, sbitmap *, sbitmap *);
75 static void compute_earliest (struct edge_list *, int, sbitmap *, sbitmap *,
76 sbitmap *, sbitmap *, sbitmap *);
77 static void compute_laterin (struct edge_list *, sbitmap *, sbitmap *,
78 sbitmap *, sbitmap *);
79 static void compute_insert_delete (struct edge_list *edge_list, sbitmap *,
80 sbitmap *, sbitmap *, sbitmap *, sbitmap *);
82 /* Edge based LCM routines on a reverse flowgraph. */
83 static void compute_farthest (struct edge_list *, int, sbitmap *, sbitmap *,
84 sbitmap*, sbitmap *, sbitmap *);
85 static void compute_nearerout (struct edge_list *, sbitmap *, sbitmap *,
86 sbitmap *, sbitmap *);
87 static void compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *,
88 sbitmap *, sbitmap *, sbitmap *,
89 sbitmap *);
91 /* Edge based lcm routines. */
93 /* Compute expression anticipatability at entrance and exit of each block.
94 This is done based on the flow graph, and not on the pred-succ lists.
95 Other than that, its pretty much identical to compute_antinout. */
97 static void
98 compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin,
99 sbitmap *antout)
101 basic_block bb;
102 edge e;
103 basic_block *worklist, *qin, *qout, *qend;
104 unsigned int qlen;
106 /* Allocate a worklist array/queue. Entries are only added to the
107 list if they were not already on the list. So the size is
108 bounded by the number of basic blocks. */
109 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
111 /* We want a maximal solution, so make an optimistic initialization of
112 ANTIN. */
113 sbitmap_vector_ones (antin, last_basic_block);
115 /* Put every block on the worklist; this is necessary because of the
116 optimistic initialization of ANTIN above. */
117 FOR_EACH_BB_REVERSE (bb)
119 *qin++ = bb;
120 bb->aux = bb;
123 qin = worklist;
124 qend = &worklist[n_basic_blocks];
125 qlen = n_basic_blocks;
127 /* Mark blocks which are predecessors of the exit block so that we
128 can easily identify them below. */
129 FOR_EACH_EDGE (e, EXIT_BLOCK_PTR->preds)
131 e->src->aux = EXIT_BLOCK_PTR;
133 END_FOR_EACH_EDGE;
135 /* Iterate until the worklist is empty. */
136 while (qlen)
138 /* Take the first entry off the worklist. */
139 bb = *qout++;
140 qlen--;
142 if (qout >= qend)
143 qout = worklist;
145 if (bb->aux == EXIT_BLOCK_PTR)
146 /* Do not clear the aux field for blocks which are predecessors of
147 the EXIT block. That way we never add then to the worklist
148 again. */
149 sbitmap_zero (antout[bb->index]);
150 else
152 /* Clear the aux field of this block so that it can be added to
153 the worklist again if necessary. */
154 bb->aux = NULL;
155 sbitmap_intersection_of_succs (antout[bb->index], antin, bb->index);
158 if (sbitmap_a_or_b_and_c_cg (antin[bb->index], antloc[bb->index],
159 transp[bb->index], antout[bb->index]))
160 /* If the in state of this block changed, then we need
161 to add the predecessors of this block to the worklist
162 if they are not already on the worklist. */
163 FOR_EACH_EDGE (e, bb->preds)
165 if (!e->src->aux && e->src != ENTRY_BLOCK_PTR)
167 *qin++ = e->src;
168 e->src->aux = e;
169 qlen++;
170 if (qin >= qend)
171 qin = worklist;
174 END_FOR_EACH_EDGE;
177 clear_aux_for_edges ();
178 clear_aux_for_blocks ();
179 free (worklist);
182 /* Compute the earliest vector for edge based lcm. */
184 static void
185 compute_earliest (struct edge_list *edge_list, int n_exprs, sbitmap *antin,
186 sbitmap *antout, sbitmap *avout, sbitmap *kill,
187 sbitmap *earliest)
189 sbitmap difference, temp_bitmap;
190 int x, num_edges;
191 basic_block pred, succ;
193 num_edges = NUM_EDGES (edge_list);
195 difference = sbitmap_alloc (n_exprs);
196 temp_bitmap = sbitmap_alloc (n_exprs);
198 for (x = 0; x < num_edges; x++)
200 pred = INDEX_EDGE_PRED_BB (edge_list, x);
201 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
202 if (pred == ENTRY_BLOCK_PTR)
203 sbitmap_copy (earliest[x], antin[succ->index]);
204 else
206 if (succ == EXIT_BLOCK_PTR)
207 sbitmap_zero (earliest[x]);
208 else
210 sbitmap_difference (difference, antin[succ->index],
211 avout[pred->index]);
212 sbitmap_not (temp_bitmap, antout[pred->index]);
213 sbitmap_a_and_b_or_c (earliest[x], difference,
214 kill[pred->index], temp_bitmap);
219 sbitmap_free (temp_bitmap);
220 sbitmap_free (difference);
223 /* later(p,s) is dependent on the calculation of laterin(p).
224 laterin(p) is dependent on the calculation of later(p2,p).
226 laterin(ENTRY) is defined as all 0's
227 later(ENTRY, succs(ENTRY)) are defined using laterin(ENTRY)
228 laterin(succs(ENTRY)) is defined by later(ENTRY, succs(ENTRY)).
230 If we progress in this manner, starting with all basic blocks
231 in the work list, anytime we change later(bb), we need to add
232 succs(bb) to the worklist if they are not already on the worklist.
234 Boundary conditions:
236 We prime the worklist all the normal basic blocks. The ENTRY block can
237 never be added to the worklist since it is never the successor of any
238 block. We explicitly prevent the EXIT block from being added to the
239 worklist.
241 We optimistically initialize LATER. That is the only time this routine
242 will compute LATER for an edge out of the entry block since the entry
243 block is never on the worklist. Thus, LATERIN is neither used nor
244 computed for the ENTRY block.
246 Since the EXIT block is never added to the worklist, we will neither
247 use nor compute LATERIN for the exit block. Edges which reach the
248 EXIT block are handled in the normal fashion inside the loop. However,
249 the insertion/deletion computation needs LATERIN(EXIT), so we have
250 to compute it. */
252 static void
253 compute_laterin (struct edge_list *edge_list, sbitmap *earliest,
254 sbitmap *antloc, sbitmap *later, sbitmap *laterin)
256 int num_edges, i;
257 edge e;
258 basic_block *worklist, *qin, *qout, *qend, bb;
259 unsigned int qlen;
261 num_edges = NUM_EDGES (edge_list);
263 /* Allocate a worklist array/queue. Entries are only added to the
264 list if they were not already on the list. So the size is
265 bounded by the number of basic blocks. */
266 qin = qout = worklist
267 = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
269 /* Initialize a mapping from each edge to its index. */
270 for (i = 0; i < num_edges; i++)
271 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
273 /* We want a maximal solution, so initially consider LATER true for
274 all edges. This allows propagation through a loop since the incoming
275 loop edge will have LATER set, so if all the other incoming edges
276 to the loop are set, then LATERIN will be set for the head of the
277 loop.
279 If the optimistic setting of LATER on that edge was incorrect (for
280 example the expression is ANTLOC in a block within the loop) then
281 this algorithm will detect it when we process the block at the head
282 of the optimistic edge. That will requeue the affected blocks. */
283 sbitmap_vector_ones (later, num_edges);
285 /* Note that even though we want an optimistic setting of LATER, we
286 do not want to be overly optimistic. Consider an outgoing edge from
287 the entry block. That edge should always have a LATER value the
288 same as EARLIEST for that edge. */
289 FOR_EACH_EDGE (e, ENTRY_BLOCK_PTR->succs)
291 sbitmap_copy (later[(size_t) e->aux], earliest[(size_t) e->aux]);
293 END_FOR_EACH_EDGE;
295 /* Add all the blocks to the worklist. This prevents an early exit from
296 the loop given our optimistic initialization of LATER above. */
297 FOR_EACH_BB (bb)
299 *qin++ = bb;
300 bb->aux = bb;
302 qin = worklist;
303 /* Note that we do not use the last allocated element for our queue,
304 as EXIT_BLOCK is never inserted into it. In fact the above allocation
305 of n_basic_blocks + 1 elements is not necessary. */
306 qend = &worklist[n_basic_blocks];
307 qlen = n_basic_blocks;
309 /* Iterate until the worklist is empty. */
310 while (qlen)
312 /* Take the first entry off the worklist. */
313 bb = *qout++;
314 bb->aux = NULL;
315 qlen--;
316 if (qout >= qend)
317 qout = worklist;
319 /* Compute the intersection of LATERIN for each incoming edge to B. */
320 sbitmap_ones (laterin[bb->index]);
321 FOR_EACH_EDGE (e, bb->preds)
323 sbitmap_a_and_b (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]);
325 END_FOR_EACH_EDGE;
327 /* Calculate LATER for all outgoing edges. */
328 FOR_EACH_EDGE (e, bb->succs)
330 if (sbitmap_union_of_diff_cg (later[(size_t) e->aux],
331 earliest[(size_t) e->aux],
332 laterin[e->src->index],
333 antloc[e->src->index])
334 /* If LATER for an outgoing edge was changed, then we need
335 to add the target of the outgoing edge to the worklist. */
336 && e->dest != EXIT_BLOCK_PTR && e->dest->aux == 0)
338 *qin++ = e->dest;
339 e->dest->aux = e;
340 qlen++;
341 if (qin >= qend)
342 qin = worklist;
345 END_FOR_EACH_EDGE;
348 /* Computation of insertion and deletion points requires computing LATERIN
349 for the EXIT block. We allocated an extra entry in the LATERIN array
350 for just this purpose. */
351 sbitmap_ones (laterin[last_basic_block]);
352 FOR_EACH_EDGE (e, EXIT_BLOCK_PTR->preds)
354 sbitmap_a_and_b (laterin[last_basic_block],
355 laterin[last_basic_block],
356 later[(size_t) e->aux]);
358 END_FOR_EACH_EDGE;
360 clear_aux_for_edges ();
361 free (worklist);
364 /* Compute the insertion and deletion points for edge based LCM. */
366 static void
367 compute_insert_delete (struct edge_list *edge_list, sbitmap *antloc,
368 sbitmap *later, sbitmap *laterin, sbitmap *insert,
369 sbitmap *delete)
371 int x;
372 basic_block bb;
374 FOR_EACH_BB (bb)
375 sbitmap_difference (delete[bb->index], antloc[bb->index], laterin[bb->index]);
377 for (x = 0; x < NUM_EDGES (edge_list); x++)
379 basic_block b = INDEX_EDGE_SUCC_BB (edge_list, x);
381 if (b == EXIT_BLOCK_PTR)
382 sbitmap_difference (insert[x], later[x], laterin[last_basic_block]);
383 else
384 sbitmap_difference (insert[x], later[x], laterin[b->index]);
388 /* Given local properties TRANSP, ANTLOC, AVOUT, KILL return the insert and
389 delete vectors for edge based LCM. Returns an edgelist which is used to
390 map the insert vector to what edge an expression should be inserted on. */
392 struct edge_list *
393 pre_edge_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
394 sbitmap *avloc, sbitmap *antloc, sbitmap *kill,
395 sbitmap **insert, sbitmap **delete)
397 sbitmap *antin, *antout, *earliest;
398 sbitmap *avin, *avout;
399 sbitmap *later, *laterin;
400 struct edge_list *edge_list;
401 int num_edges;
403 edge_list = create_edge_list ();
404 num_edges = NUM_EDGES (edge_list);
406 #ifdef LCM_DEBUG_INFO
407 if (file)
409 fprintf (file, "Edge List:\n");
410 verify_edge_list (file, edge_list);
411 print_edge_list (file, edge_list);
412 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
413 dump_sbitmap_vector (file, "antloc", "", antloc, last_basic_block);
414 dump_sbitmap_vector (file, "avloc", "", avloc, last_basic_block);
415 dump_sbitmap_vector (file, "kill", "", kill, last_basic_block);
417 #endif
419 /* Compute global availability. */
420 avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
421 avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
422 compute_available (avloc, kill, avout, avin);
423 sbitmap_vector_free (avin);
425 /* Compute global anticipatability. */
426 antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
427 antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
428 compute_antinout_edge (antloc, transp, antin, antout);
430 #ifdef LCM_DEBUG_INFO
431 if (file)
433 dump_sbitmap_vector (file, "antin", "", antin, last_basic_block);
434 dump_sbitmap_vector (file, "antout", "", antout, last_basic_block);
436 #endif
438 /* Compute earliestness. */
439 earliest = sbitmap_vector_alloc (num_edges, n_exprs);
440 compute_earliest (edge_list, n_exprs, antin, antout, avout, kill, earliest);
442 #ifdef LCM_DEBUG_INFO
443 if (file)
444 dump_sbitmap_vector (file, "earliest", "", earliest, num_edges);
445 #endif
447 sbitmap_vector_free (antout);
448 sbitmap_vector_free (antin);
449 sbitmap_vector_free (avout);
451 later = sbitmap_vector_alloc (num_edges, n_exprs);
453 /* Allocate an extra element for the exit block in the laterin vector. */
454 laterin = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
455 compute_laterin (edge_list, earliest, antloc, later, laterin);
457 #ifdef LCM_DEBUG_INFO
458 if (file)
460 dump_sbitmap_vector (file, "laterin", "", laterin, last_basic_block + 1);
461 dump_sbitmap_vector (file, "later", "", later, num_edges);
463 #endif
465 sbitmap_vector_free (earliest);
467 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
468 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
469 compute_insert_delete (edge_list, antloc, later, laterin, *insert, *delete);
471 sbitmap_vector_free (laterin);
472 sbitmap_vector_free (later);
474 #ifdef LCM_DEBUG_INFO
475 if (file)
477 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
478 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
479 last_basic_block);
481 #endif
483 return edge_list;
486 /* Compute the AVIN and AVOUT vectors from the AVLOC and KILL vectors.
487 Return the number of passes we performed to iterate to a solution. */
489 void
490 compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout,
491 sbitmap *avin)
493 edge e;
494 basic_block *worklist, *qin, *qout, *qend, bb;
495 unsigned int qlen;
497 /* Allocate a worklist array/queue. Entries are only added to the
498 list if they were not already on the list. So the size is
499 bounded by the number of basic blocks. */
500 qin = qout = worklist = xmalloc (sizeof (basic_block) * n_basic_blocks);
502 /* We want a maximal solution. */
503 sbitmap_vector_ones (avout, last_basic_block);
505 /* Put every block on the worklist; this is necessary because of the
506 optimistic initialization of AVOUT above. */
507 FOR_EACH_BB (bb)
509 *qin++ = bb;
510 bb->aux = bb;
513 qin = worklist;
514 qend = &worklist[n_basic_blocks];
515 qlen = n_basic_blocks;
517 /* Mark blocks which are successors of the entry block so that we
518 can easily identify them below. */
519 FOR_EACH_EDGE (e, ENTRY_BLOCK_PTR->succs)
521 e->dest->aux = ENTRY_BLOCK_PTR;
523 END_FOR_EACH_EDGE;
525 /* Iterate until the worklist is empty. */
526 while (qlen)
528 /* Take the first entry off the worklist. */
529 bb = *qout++;
530 qlen--;
532 if (qout >= qend)
533 qout = worklist;
535 /* If one of the predecessor blocks is the ENTRY block, then the
536 intersection of avouts is the null set. We can identify such blocks
537 by the special value in the AUX field in the block structure. */
538 if (bb->aux == ENTRY_BLOCK_PTR)
539 /* Do not clear the aux field for blocks which are successors of the
540 ENTRY block. That way we never add then to the worklist again. */
541 sbitmap_zero (avin[bb->index]);
542 else
544 /* Clear the aux field of this block so that it can be added to
545 the worklist again if necessary. */
546 bb->aux = NULL;
547 sbitmap_intersection_of_preds (avin[bb->index], avout, bb->index);
550 if (sbitmap_union_of_diff_cg (avout[bb->index], avloc[bb->index], avin[bb->index], kill[bb->index]))
551 /* If the out state of this block changed, then we need
552 to add the successors of this block to the worklist
553 if they are not already on the worklist. */
554 FOR_EACH_EDGE (e, bb->succs)
556 if (!e->dest->aux && e->dest != EXIT_BLOCK_PTR)
558 *qin++ = e->dest;
559 e->dest->aux = e;
560 qlen++;
562 if (qin >= qend)
563 qin = worklist;
566 END_FOR_EACH_EDGE;
569 clear_aux_for_edges ();
570 clear_aux_for_blocks ();
571 free (worklist);
574 /* Compute the farthest vector for edge based lcm. */
576 static void
577 compute_farthest (struct edge_list *edge_list, int n_exprs,
578 sbitmap *st_avout, sbitmap *st_avin, sbitmap *st_antin,
579 sbitmap *kill, sbitmap *farthest)
581 sbitmap difference, temp_bitmap;
582 int x, num_edges;
583 basic_block pred, succ;
585 num_edges = NUM_EDGES (edge_list);
587 difference = sbitmap_alloc (n_exprs);
588 temp_bitmap = sbitmap_alloc (n_exprs);
590 for (x = 0; x < num_edges; x++)
592 pred = INDEX_EDGE_PRED_BB (edge_list, x);
593 succ = INDEX_EDGE_SUCC_BB (edge_list, x);
594 if (succ == EXIT_BLOCK_PTR)
595 sbitmap_copy (farthest[x], st_avout[pred->index]);
596 else
598 if (pred == ENTRY_BLOCK_PTR)
599 sbitmap_zero (farthest[x]);
600 else
602 sbitmap_difference (difference, st_avout[pred->index],
603 st_antin[succ->index]);
604 sbitmap_not (temp_bitmap, st_avin[succ->index]);
605 sbitmap_a_and_b_or_c (farthest[x], difference,
606 kill[succ->index], temp_bitmap);
611 sbitmap_free (temp_bitmap);
612 sbitmap_free (difference);
615 /* Compute nearer and nearerout vectors for edge based lcm.
617 This is the mirror of compute_laterin, additional comments on the
618 implementation can be found before compute_laterin. */
620 static void
621 compute_nearerout (struct edge_list *edge_list, sbitmap *farthest,
622 sbitmap *st_avloc, sbitmap *nearer, sbitmap *nearerout)
624 int num_edges, i;
625 edge e;
626 basic_block *worklist, *tos, bb;
628 num_edges = NUM_EDGES (edge_list);
630 /* Allocate a worklist array/queue. Entries are only added to the
631 list if they were not already on the list. So the size is
632 bounded by the number of basic blocks. */
633 tos = worklist = xmalloc (sizeof (basic_block) * (n_basic_blocks + 1));
635 /* Initialize NEARER for each edge and build a mapping from an edge to
636 its index. */
637 for (i = 0; i < num_edges; i++)
638 INDEX_EDGE (edge_list, i)->aux = (void *) (size_t) i;
640 /* We want a maximal solution. */
641 sbitmap_vector_ones (nearer, num_edges);
643 /* Note that even though we want an optimistic setting of NEARER, we
644 do not want to be overly optimistic. Consider an incoming edge to
645 the exit block. That edge should always have a NEARER value the
646 same as FARTHEST for that edge. */
647 FOR_EACH_EDGE (e, EXIT_BLOCK_PTR->preds)
649 sbitmap_copy (nearer[(size_t)e->aux], farthest[(size_t)e->aux]);
651 END_FOR_EACH_EDGE;
653 /* Add all the blocks to the worklist. This prevents an early exit
654 from the loop given our optimistic initialization of NEARER. */
655 FOR_EACH_BB (bb)
657 *tos++ = bb;
658 bb->aux = bb;
661 /* Iterate until the worklist is empty. */
662 while (tos != worklist)
664 /* Take the first entry off the worklist. */
665 bb = *--tos;
666 bb->aux = NULL;
668 /* Compute the intersection of NEARER for each outgoing edge from B. */
669 sbitmap_ones (nearerout[bb->index]);
670 FOR_EACH_EDGE (e, bb->succs)
672 sbitmap_a_and_b (nearerout[bb->index], nearerout[bb->index],
673 nearer[(size_t) e->aux]);
675 END_FOR_EACH_EDGE;
677 /* Calculate NEARER for all incoming edges. */
678 FOR_EACH_EDGE (e, bb->preds)
680 if (sbitmap_union_of_diff_cg (nearer[(size_t) e->aux],
681 farthest[(size_t) e->aux],
682 nearerout[e->dest->index],
683 st_avloc[e->dest->index])
684 /* If NEARER for an incoming edge was changed, then we need
685 to add the source of the incoming edge to the worklist. */
686 && e->src != ENTRY_BLOCK_PTR && e->src->aux == 0)
688 *tos++ = e->src;
689 e->src->aux = e;
692 END_FOR_EACH_EDGE;
695 /* Computation of insertion and deletion points requires computing NEAREROUT
696 for the ENTRY block. We allocated an extra entry in the NEAREROUT array
697 for just this purpose. */
698 sbitmap_ones (nearerout[last_basic_block]);
699 FOR_EACH_EDGE (e, ENTRY_BLOCK_PTR->succs)
701 sbitmap_a_and_b (nearerout[last_basic_block],
702 nearerout[last_basic_block],
703 nearer[(size_t) e->aux]);
705 END_FOR_EACH_EDGE;
707 clear_aux_for_edges ();
708 free (tos);
711 /* Compute the insertion and deletion points for edge based LCM. */
713 static void
714 compute_rev_insert_delete (struct edge_list *edge_list, sbitmap *st_avloc,
715 sbitmap *nearer, sbitmap *nearerout,
716 sbitmap *insert, sbitmap *delete)
718 int x;
719 basic_block bb;
721 FOR_EACH_BB (bb)
722 sbitmap_difference (delete[bb->index], st_avloc[bb->index], nearerout[bb->index]);
724 for (x = 0; x < NUM_EDGES (edge_list); x++)
726 basic_block b = INDEX_EDGE_PRED_BB (edge_list, x);
727 if (b == ENTRY_BLOCK_PTR)
728 sbitmap_difference (insert[x], nearer[x], nearerout[last_basic_block]);
729 else
730 sbitmap_difference (insert[x], nearer[x], nearerout[b->index]);
734 /* Given local properties TRANSP, ST_AVLOC, ST_ANTLOC, KILL return the
735 insert and delete vectors for edge based reverse LCM. Returns an
736 edgelist which is used to map the insert vector to what edge
737 an expression should be inserted on. */
739 struct edge_list *
740 pre_edge_rev_lcm (FILE *file ATTRIBUTE_UNUSED, int n_exprs, sbitmap *transp,
741 sbitmap *st_avloc, sbitmap *st_antloc, sbitmap *kill,
742 sbitmap **insert, sbitmap **delete)
744 sbitmap *st_antin, *st_antout;
745 sbitmap *st_avout, *st_avin, *farthest;
746 sbitmap *nearer, *nearerout;
747 struct edge_list *edge_list;
748 int num_edges;
750 edge_list = create_edge_list ();
751 num_edges = NUM_EDGES (edge_list);
753 st_antin = sbitmap_vector_alloc (last_basic_block, n_exprs);
754 st_antout = sbitmap_vector_alloc (last_basic_block, n_exprs);
755 sbitmap_vector_zero (st_antin, last_basic_block);
756 sbitmap_vector_zero (st_antout, last_basic_block);
757 compute_antinout_edge (st_antloc, transp, st_antin, st_antout);
759 /* Compute global anticipatability. */
760 st_avout = sbitmap_vector_alloc (last_basic_block, n_exprs);
761 st_avin = sbitmap_vector_alloc (last_basic_block, n_exprs);
762 compute_available (st_avloc, kill, st_avout, st_avin);
764 #ifdef LCM_DEBUG_INFO
765 if (file)
767 fprintf (file, "Edge List:\n");
768 verify_edge_list (file, edge_list);
769 print_edge_list (file, edge_list);
770 dump_sbitmap_vector (file, "transp", "", transp, last_basic_block);
771 dump_sbitmap_vector (file, "st_avloc", "", st_avloc, last_basic_block);
772 dump_sbitmap_vector (file, "st_antloc", "", st_antloc, last_basic_block);
773 dump_sbitmap_vector (file, "st_antin", "", st_antin, last_basic_block);
774 dump_sbitmap_vector (file, "st_antout", "", st_antout, last_basic_block);
775 dump_sbitmap_vector (file, "st_kill", "", kill, last_basic_block);
777 #endif
779 #ifdef LCM_DEBUG_INFO
780 if (file)
782 dump_sbitmap_vector (file, "st_avout", "", st_avout, last_basic_block);
783 dump_sbitmap_vector (file, "st_avin", "", st_avin, last_basic_block);
785 #endif
787 /* Compute farthestness. */
788 farthest = sbitmap_vector_alloc (num_edges, n_exprs);
789 compute_farthest (edge_list, n_exprs, st_avout, st_avin, st_antin,
790 kill, farthest);
792 #ifdef LCM_DEBUG_INFO
793 if (file)
794 dump_sbitmap_vector (file, "farthest", "", farthest, num_edges);
795 #endif
797 sbitmap_vector_free (st_antin);
798 sbitmap_vector_free (st_antout);
800 sbitmap_vector_free (st_avin);
801 sbitmap_vector_free (st_avout);
803 nearer = sbitmap_vector_alloc (num_edges, n_exprs);
805 /* Allocate an extra element for the entry block. */
806 nearerout = sbitmap_vector_alloc (last_basic_block + 1, n_exprs);
807 compute_nearerout (edge_list, farthest, st_avloc, nearer, nearerout);
809 #ifdef LCM_DEBUG_INFO
810 if (file)
812 dump_sbitmap_vector (file, "nearerout", "", nearerout,
813 last_basic_block + 1);
814 dump_sbitmap_vector (file, "nearer", "", nearer, num_edges);
816 #endif
818 sbitmap_vector_free (farthest);
820 *insert = sbitmap_vector_alloc (num_edges, n_exprs);
821 *delete = sbitmap_vector_alloc (last_basic_block, n_exprs);
822 compute_rev_insert_delete (edge_list, st_avloc, nearer, nearerout,
823 *insert, *delete);
825 sbitmap_vector_free (nearerout);
826 sbitmap_vector_free (nearer);
828 #ifdef LCM_DEBUG_INFO
829 if (file)
831 dump_sbitmap_vector (file, "pre_insert_map", "", *insert, num_edges);
832 dump_sbitmap_vector (file, "pre_delete_map", "", *delete,
833 last_basic_block);
835 #endif
836 return edge_list;
839 /* Mode switching:
841 The algorithm for setting the modes consists of scanning the insn list
842 and finding all the insns which require a specific mode. Each insn gets
843 a unique struct seginfo element. These structures are inserted into a list
844 for each basic block. For each entity, there is an array of bb_info over
845 the flow graph basic blocks (local var 'bb_info'), and contains a list
846 of all insns within that basic block, in the order they are encountered.
848 For each entity, any basic block WITHOUT any insns requiring a specific
849 mode are given a single entry, without a mode. (Each basic block
850 in the flow graph must have at least one entry in the segment table.)
852 The LCM algorithm is then run over the flow graph to determine where to
853 place the sets to the highest-priority value in respect of first the first
854 insn in any one block. Any adjustments required to the transparency
855 vectors are made, then the next iteration starts for the next-lower
856 priority mode, till for each entity all modes are exhausted.
858 More details are located in the code for optimize_mode_switching(). */
860 /* This structure contains the information for each insn which requires
861 either single or double mode to be set.
862 MODE is the mode this insn must be executed in.
863 INSN_PTR is the insn to be executed (may be the note that marks the
864 beginning of a basic block).
865 BBNUM is the flow graph basic block this insn occurs in.
866 NEXT is the next insn in the same basic block. */
867 struct seginfo
869 int mode;
870 rtx insn_ptr;
871 int bbnum;
872 struct seginfo *next;
873 HARD_REG_SET regs_live;
876 struct bb_info
878 struct seginfo *seginfo;
879 int computing;
882 /* These bitmaps are used for the LCM algorithm. */
884 #ifdef OPTIMIZE_MODE_SWITCHING
885 static sbitmap *antic;
886 static sbitmap *transp;
887 static sbitmap *comp;
888 static sbitmap *delete;
889 static sbitmap *insert;
891 static struct seginfo * new_seginfo (int, rtx, int, HARD_REG_SET);
892 static void add_seginfo (struct bb_info *, struct seginfo *);
893 static void reg_dies (rtx, HARD_REG_SET);
894 static void reg_becomes_live (rtx, rtx, void *);
895 static void make_preds_opaque (basic_block, int);
896 #endif
898 #ifdef OPTIMIZE_MODE_SWITCHING
900 /* This function will allocate a new BBINFO structure, initialized
901 with the MODE, INSN, and basic block BB parameters. */
903 static struct seginfo *
904 new_seginfo (int mode, rtx insn, int bb, HARD_REG_SET regs_live)
906 struct seginfo *ptr;
907 ptr = xmalloc (sizeof (struct seginfo));
908 ptr->mode = mode;
909 ptr->insn_ptr = insn;
910 ptr->bbnum = bb;
911 ptr->next = NULL;
912 COPY_HARD_REG_SET (ptr->regs_live, regs_live);
913 return ptr;
916 /* Add a seginfo element to the end of a list.
917 HEAD is a pointer to the list beginning.
918 INFO is the structure to be linked in. */
920 static void
921 add_seginfo (struct bb_info *head, struct seginfo *info)
923 struct seginfo *ptr;
925 if (head->seginfo == NULL)
926 head->seginfo = info;
927 else
929 ptr = head->seginfo;
930 while (ptr->next != NULL)
931 ptr = ptr->next;
932 ptr->next = info;
936 /* Make all predecessors of basic block B opaque, recursively, till we hit
937 some that are already non-transparent, or an edge where aux is set; that
938 denotes that a mode set is to be done on that edge.
939 J is the bit number in the bitmaps that corresponds to the entity that
940 we are currently handling mode-switching for. */
942 static void
943 make_preds_opaque (basic_block b, int j)
945 edge e;
947 FOR_EACH_EDGE (e, b->preds)
949 basic_block pb = e->src;
951 if (e->aux || ! TEST_BIT (transp[pb->index], j))
952 continue;
954 RESET_BIT (transp[pb->index], j);
955 make_preds_opaque (pb, j);
957 END_FOR_EACH_EDGE;
960 /* Record in LIVE that register REG died. */
962 static void
963 reg_dies (rtx reg, HARD_REG_SET live)
965 int regno, nregs;
967 if (!REG_P (reg))
968 return;
970 regno = REGNO (reg);
971 if (regno < FIRST_PSEUDO_REGISTER)
972 for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
973 nregs--)
974 CLEAR_HARD_REG_BIT (live, regno + nregs);
977 /* Record in LIVE that register REG became live.
978 This is called via note_stores. */
980 static void
981 reg_becomes_live (rtx reg, rtx setter ATTRIBUTE_UNUSED, void *live)
983 int regno, nregs;
985 if (GET_CODE (reg) == SUBREG)
986 reg = SUBREG_REG (reg);
988 if (!REG_P (reg))
989 return;
991 regno = REGNO (reg);
992 if (regno < FIRST_PSEUDO_REGISTER)
993 for (nregs = hard_regno_nregs[regno][GET_MODE (reg)] - 1; nregs >= 0;
994 nregs--)
995 SET_HARD_REG_BIT (* (HARD_REG_SET *) live, regno + nregs);
998 /* Make sure if MODE_ENTRY is defined the MODE_EXIT is defined
999 and vice versa. */
1000 #if defined (MODE_ENTRY) != defined (MODE_EXIT)
1001 #error "Both MODE_ENTRY and MODE_EXIT must be defined"
1002 #endif
1004 /* Find all insns that need a particular mode setting, and insert the
1005 necessary mode switches. Return true if we did work. */
1008 optimize_mode_switching (FILE *file)
1010 rtx insn;
1011 int e;
1012 basic_block bb;
1013 int need_commit = 0;
1014 sbitmap *kill;
1015 struct edge_list *edge_list;
1016 static const int num_modes[] = NUM_MODES_FOR_MODE_SWITCHING;
1017 #define N_ENTITIES ARRAY_SIZE (num_modes)
1018 int entity_map[N_ENTITIES];
1019 struct bb_info *bb_info[N_ENTITIES];
1020 int i, j;
1021 int n_entities;
1022 int max_num_modes = 0;
1023 bool emited = false;
1024 basic_block post_entry ATTRIBUTE_UNUSED, pre_exit ATTRIBUTE_UNUSED;
1026 clear_bb_flags ();
1028 for (e = N_ENTITIES - 1, n_entities = 0; e >= 0; e--)
1029 if (OPTIMIZE_MODE_SWITCHING (e))
1031 int entry_exit_extra = 0;
1033 /* Create the list of segments within each basic block.
1034 If NORMAL_MODE is defined, allow for two extra
1035 blocks split from the entry and exit block. */
1036 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1037 entry_exit_extra = 2;
1038 #endif
1039 bb_info[n_entities]
1040 = xcalloc (last_basic_block + entry_exit_extra, sizeof **bb_info);
1041 entity_map[n_entities++] = e;
1042 if (num_modes[e] > max_num_modes)
1043 max_num_modes = num_modes[e];
1046 if (! n_entities)
1047 return 0;
1049 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1051 /* Split the edge from the entry block and the fallthrough edge to the
1052 exit block, so that we can note that there NORMAL_MODE is supplied /
1053 required. */
1054 edge eg;
1055 post_entry = split_edge (ENTRY_BLOCK_PTR->succ);
1056 /* The only non-call predecessor at this stage is a block with a
1057 fallthrough edge; there can be at most one, but there could be
1058 none at all, e.g. when exit is called. */
1059 for (pre_exit = 0, eg = EXIT_BLOCK_PTR->pred; eg; eg = eg->pred_next)
1060 if (eg->flags & EDGE_FALLTHRU)
1062 regset live_at_end = eg->src->global_live_at_end;
1064 if (pre_exit)
1065 abort ();
1066 pre_exit = split_edge (eg);
1067 COPY_REG_SET (pre_exit->global_live_at_start, live_at_end);
1068 COPY_REG_SET (pre_exit->global_live_at_end, live_at_end);
1071 #endif
1073 /* Create the bitmap vectors. */
1075 antic = sbitmap_vector_alloc (last_basic_block, n_entities);
1076 transp = sbitmap_vector_alloc (last_basic_block, n_entities);
1077 comp = sbitmap_vector_alloc (last_basic_block, n_entities);
1079 sbitmap_vector_ones (transp, last_basic_block);
1081 for (j = n_entities - 1; j >= 0; j--)
1083 int e = entity_map[j];
1084 int no_mode = num_modes[e];
1085 struct bb_info *info = bb_info[j];
1087 /* Determine what the first use (if any) need for a mode of entity E is.
1088 This will be the mode that is anticipatable for this block.
1089 Also compute the initial transparency settings. */
1090 FOR_EACH_BB (bb)
1092 struct seginfo *ptr;
1093 int last_mode = no_mode;
1094 HARD_REG_SET live_now;
1096 REG_SET_TO_HARD_REG_SET (live_now,
1097 bb->global_live_at_start);
1098 for (insn = BB_HEAD (bb);
1099 insn != NULL && insn != NEXT_INSN (BB_END (bb));
1100 insn = NEXT_INSN (insn))
1102 if (INSN_P (insn))
1104 int mode = MODE_NEEDED (e, insn);
1105 rtx link;
1107 if (mode != no_mode && mode != last_mode)
1109 last_mode = mode;
1110 ptr = new_seginfo (mode, insn, bb->index, live_now);
1111 add_seginfo (info + bb->index, ptr);
1112 RESET_BIT (transp[bb->index], j);
1114 #ifdef MODE_AFTER
1115 last_mode = MODE_AFTER (last_mode, insn);
1116 #endif
1117 /* Update LIVE_NOW. */
1118 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1119 if (REG_NOTE_KIND (link) == REG_DEAD)
1120 reg_dies (XEXP (link, 0), live_now);
1122 note_stores (PATTERN (insn), reg_becomes_live, &live_now);
1123 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
1124 if (REG_NOTE_KIND (link) == REG_UNUSED)
1125 reg_dies (XEXP (link, 0), live_now);
1129 info[bb->index].computing = last_mode;
1130 /* Check for blocks without ANY mode requirements. */
1131 if (last_mode == no_mode)
1133 ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
1134 add_seginfo (info + bb->index, ptr);
1137 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1139 int mode = MODE_ENTRY (e);
1141 if (mode != no_mode)
1143 bb = post_entry;
1145 /* By always making this nontransparent, we save
1146 an extra check in make_preds_opaque. We also
1147 need this to avoid confusing pre_edge_lcm when
1148 antic is cleared but transp and comp are set. */
1149 RESET_BIT (transp[bb->index], j);
1151 /* Insert a fake computing definition of MODE into entry
1152 blocks which compute no mode. This represents the mode on
1153 entry. */
1154 info[bb->index].computing = mode;
1156 if (pre_exit)
1157 info[pre_exit->index].seginfo->mode = MODE_EXIT (e);
1160 #endif /* NORMAL_MODE */
1163 kill = sbitmap_vector_alloc (last_basic_block, n_entities);
1164 for (i = 0; i < max_num_modes; i++)
1166 int current_mode[N_ENTITIES];
1168 /* Set the anticipatable and computing arrays. */
1169 sbitmap_vector_zero (antic, last_basic_block);
1170 sbitmap_vector_zero (comp, last_basic_block);
1171 for (j = n_entities - 1; j >= 0; j--)
1173 int m = current_mode[j] = MODE_PRIORITY_TO_MODE (entity_map[j], i);
1174 struct bb_info *info = bb_info[j];
1176 FOR_EACH_BB (bb)
1178 if (info[bb->index].seginfo->mode == m)
1179 SET_BIT (antic[bb->index], j);
1181 if (info[bb->index].computing == m)
1182 SET_BIT (comp[bb->index], j);
1186 /* Calculate the optimal locations for the
1187 placement mode switches to modes with priority I. */
1189 FOR_EACH_BB (bb)
1190 sbitmap_not (kill[bb->index], transp[bb->index]);
1191 edge_list = pre_edge_lcm (file, 1, transp, comp, antic,
1192 kill, &insert, &delete);
1194 for (j = n_entities - 1; j >= 0; j--)
1196 /* Insert all mode sets that have been inserted by lcm. */
1197 int no_mode = num_modes[entity_map[j]];
1199 /* Wherever we have moved a mode setting upwards in the flow graph,
1200 the blocks between the new setting site and the now redundant
1201 computation ceases to be transparent for any lower-priority
1202 mode of the same entity. First set the aux field of each
1203 insertion site edge non-transparent, then propagate the new
1204 non-transparency from the redundant computation upwards till
1205 we hit an insertion site or an already non-transparent block. */
1206 for (e = NUM_EDGES (edge_list) - 1; e >= 0; e--)
1208 edge eg = INDEX_EDGE (edge_list, e);
1209 int mode;
1210 basic_block src_bb;
1211 HARD_REG_SET live_at_edge;
1212 rtx mode_set;
1214 eg->aux = 0;
1216 if (! TEST_BIT (insert[e], j))
1217 continue;
1219 eg->aux = (void *)1;
1221 mode = current_mode[j];
1222 src_bb = eg->src;
1224 REG_SET_TO_HARD_REG_SET (live_at_edge,
1225 src_bb->global_live_at_end);
1227 start_sequence ();
1228 EMIT_MODE_SET (entity_map[j], mode, live_at_edge);
1229 mode_set = get_insns ();
1230 end_sequence ();
1232 /* Do not bother to insert empty sequence. */
1233 if (mode_set == NULL_RTX)
1234 continue;
1236 /* If this is an abnormal edge, we'll insert at the end
1237 of the previous block. */
1238 if (eg->flags & EDGE_ABNORMAL)
1240 emited = true;
1241 if (JUMP_P (BB_END (src_bb)))
1242 emit_insn_before (mode_set, BB_END (src_bb));
1243 /* It doesn't make sense to switch to normal mode
1244 after a CALL_INSN, so we're going to abort if we
1245 find one. The cases in which a CALL_INSN may
1246 have an abnormal edge are sibcalls and EH edges.
1247 In the case of sibcalls, the dest basic-block is
1248 the EXIT_BLOCK, that runs in normal mode; it is
1249 assumed that a sibcall insn requires normal mode
1250 itself, so no mode switch would be required after
1251 the call (it wouldn't make sense, anyway). In
1252 the case of EH edges, EH entry points also start
1253 in normal mode, so a similar reasoning applies. */
1254 else if (NONJUMP_INSN_P (BB_END (src_bb)))
1255 emit_insn_after (mode_set, BB_END (src_bb));
1256 else
1257 abort ();
1258 bb_info[j][src_bb->index].computing = mode;
1259 RESET_BIT (transp[src_bb->index], j);
1261 else
1263 need_commit = 1;
1264 insert_insn_on_edge (mode_set, eg);
1268 FOR_EACH_BB_REVERSE (bb)
1269 if (TEST_BIT (delete[bb->index], j))
1271 make_preds_opaque (bb, j);
1272 /* Cancel the 'deleted' mode set. */
1273 bb_info[j][bb->index].seginfo->mode = no_mode;
1277 clear_aux_for_edges ();
1278 free_edge_list (edge_list);
1281 /* Now output the remaining mode sets in all the segments. */
1282 for (j = n_entities - 1; j >= 0; j--)
1284 int no_mode = num_modes[entity_map[j]];
1286 FOR_EACH_BB_REVERSE (bb)
1288 struct seginfo *ptr, *next;
1289 for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
1291 next = ptr->next;
1292 if (ptr->mode != no_mode)
1294 rtx mode_set;
1296 start_sequence ();
1297 EMIT_MODE_SET (entity_map[j], ptr->mode, ptr->regs_live);
1298 mode_set = get_insns ();
1299 end_sequence ();
1301 /* Do not bother to insert empty sequence. */
1302 if (mode_set == NULL_RTX)
1303 continue;
1305 emited = true;
1306 if (NOTE_P (ptr->insn_ptr)
1307 && (NOTE_LINE_NUMBER (ptr->insn_ptr)
1308 == NOTE_INSN_BASIC_BLOCK))
1309 emit_insn_after (mode_set, ptr->insn_ptr);
1310 else
1311 emit_insn_before (mode_set, ptr->insn_ptr);
1314 free (ptr);
1318 free (bb_info[j]);
1321 /* Finished. Free up all the things we've allocated. */
1323 sbitmap_vector_free (kill);
1324 sbitmap_vector_free (antic);
1325 sbitmap_vector_free (transp);
1326 sbitmap_vector_free (comp);
1327 sbitmap_vector_free (delete);
1328 sbitmap_vector_free (insert);
1330 if (need_commit)
1331 commit_edge_insertions ();
1333 #if defined (MODE_ENTRY) && defined (MODE_EXIT)
1334 cleanup_cfg (CLEANUP_NO_INSN_DEL);
1335 #else
1336 if (!need_commit && !emited)
1337 return 0;
1338 #endif
1340 max_regno = max_reg_num ();
1341 allocate_reg_info (max_regno, FALSE, FALSE);
1342 update_life_info_in_dirty_blocks (UPDATE_LIFE_GLOBAL_RM_NOTES,
1343 (PROP_DEATH_NOTES | PROP_KILL_DEAD_CODE
1344 | PROP_SCAN_DEAD_CODE));
1346 return 1;
1348 #endif /* OPTIMIZE_MODE_SWITCHING */