1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GNU CC.
9 GNU CC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the
11 Free Software Foundation; either version 2, or (at your option) any
14 GNU CC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GNU CC; see the file COPYING. If not, write to the Free
21 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
58 #include "insn-config.h"
59 #include "insn-attr.h"
63 #include "sched-int.h"
65 #ifdef INSN_SCHEDULING
66 /* Some accessor macros for h_i_d members only used within this file. */
67 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
68 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
69 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
71 #define MAX_RGN_BLOCKS 10
72 #define MAX_RGN_INSNS 100
74 /* nr_inter/spec counts interblock/speculative motion for the function. */
75 static int nr_inter
, nr_spec
;
77 /* Control flow graph edges are kept in circular lists. */
86 static haifa_edge
*edge_table
;
88 #define NEXT_IN(edge) (edge_table[edge].next_in)
89 #define NEXT_OUT(edge) (edge_table[edge].next_out)
90 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
91 #define TO_BLOCK(edge) (edge_table[edge].to_block)
93 /* Number of edges in the control flow graph. (In fact, larger than
94 that by 1, since edge 0 is unused.) */
97 /* Circular list of incoming/outgoing edges of a block. */
99 static int *out_edges
;
101 #define IN_EDGES(block) (in_edges[block])
102 #define OUT_EDGES(block) (out_edges[block])
104 static int is_cfg_nonregular
PARAMS ((void));
105 static int build_control_flow
PARAMS ((struct edge_list
*));
106 static void new_edge
PARAMS ((int, int));
108 /* A region is the main entity for interblock scheduling: insns
109 are allowed to move between blocks in the same region, along
110 control flow graph edges, in the 'up' direction. */
113 int rgn_nr_blocks
; /* Number of blocks in region. */
114 int rgn_blocks
; /* cblocks in the region (actually index in rgn_bb_table). */
118 /* Number of regions in the procedure. */
119 static int nr_regions
;
121 /* Table of region descriptions. */
122 static region
*rgn_table
;
124 /* Array of lists of regions' blocks. */
125 static int *rgn_bb_table
;
127 /* Topological order of blocks in the region (if b2 is reachable from
128 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
129 always referred to by either block or b, while its topological
130 order name (in the region) is refered to by bb. */
131 static int *block_to_bb
;
133 /* The number of the region containing a block. */
134 static int *containing_rgn
;
136 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
137 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
138 #define BLOCK_TO_BB(block) (block_to_bb[block])
139 #define CONTAINING_RGN(block) (containing_rgn[block])
141 void debug_regions
PARAMS ((void));
142 static void find_single_block_region
PARAMS ((void));
143 static void find_rgns
PARAMS ((struct edge_list
*, sbitmap
*));
144 static int too_large
PARAMS ((int, int *, int *));
146 extern void debug_live
PARAMS ((int, int));
148 /* Blocks of the current region being scheduled. */
149 static int current_nr_blocks
;
150 static int current_blocks
;
152 /* The mapping from bb to block. */
153 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
155 /* Bit vectors and bitset operations are needed for computations on
156 the control flow graph. */
158 typedef unsigned HOST_WIDE_INT
*bitset
;
161 int *first_member
; /* Pointer to the list start in bitlst_table. */
162 int nr_members
; /* The number of members of the bit list. */
166 static int bitlst_table_last
;
167 static int bitlst_table_size
;
168 static int *bitlst_table
;
170 static char bitset_member
PARAMS ((bitset
, int, int));
171 static void extract_bitlst
PARAMS ((bitset
, int, int, bitlst
*));
173 /* Target info declarations.
175 The block currently being scheduled is referred to as the "target" block,
176 while other blocks in the region from which insns can be moved to the
177 target are called "source" blocks. The candidate structure holds info
178 about such sources: are they valid? Speculative? Etc. */
179 typedef bitlst bblst
;
190 static candidate
*candidate_table
;
192 /* A speculative motion requires checking live information on the path
193 from 'source' to 'target'. The split blocks are those to be checked.
194 After a speculative motion, live information should be modified in
197 Lists of split and update blocks for each candidate of the current
198 target are in array bblst_table. */
199 static int *bblst_table
, bblst_size
, bblst_last
;
201 #define IS_VALID(src) ( candidate_table[src].is_valid )
202 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
203 #define SRC_PROB(src) ( candidate_table[src].src_prob )
205 /* The bb being currently scheduled. */
206 static int target_bb
;
209 typedef bitlst edgelst
;
211 /* Target info functions. */
212 static void split_edges
PARAMS ((int, int, edgelst
*));
213 static void compute_trg_info
PARAMS ((int));
214 void debug_candidate
PARAMS ((int));
215 void debug_candidates
PARAMS ((int));
217 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
218 typedef bitset bbset
;
220 /* Number of words of the bbset. */
221 static int bbset_size
;
223 /* Dominators array: dom[i] contains the bbset of dominators of
224 bb i in the region. */
227 /* bb 0 is the only region entry. */
228 #define IS_RGN_ENTRY(bb) (!bb)
230 /* Is bb_src dominated by bb_trg. */
231 #define IS_DOMINATED(bb_src, bb_trg) \
232 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
234 /* Probability: Prob[i] is a float in [0, 1] which is the probability
235 of bb i relative to the region entry. */
238 /* The probability of bb_src, relative to bb_trg. Note, that while the
239 'prob[bb]' is a float in [0, 1], this macro returns an integer
241 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 /* Bit-set of edges, where bit i stands for edge i. */
245 typedef bitset edgeset
;
247 /* Number of edges in the region. */
248 static int rgn_nr_edges
;
250 /* Array of size rgn_nr_edges. */
251 static int *rgn_edges
;
253 /* Number of words in an edgeset. */
254 static int edgeset_size
;
256 /* Number of bits in an edgeset. */
257 static int edgeset_bitsize
;
259 /* Mapping from each edge in the graph to its number in the rgn. */
260 static int *edge_to_bit
;
261 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
263 /* The split edges of a source bb is different for each target
264 bb. In order to compute this efficiently, the 'potential-split edges'
265 are computed for each bb prior to scheduling a region. This is actually
266 the split edges of each bb relative to the region entry.
268 pot_split[bb] is the set of potential split edges of bb. */
269 static edgeset
*pot_split
;
271 /* For every bb, a set of its ancestor edges. */
272 static edgeset
*ancestor_edges
;
274 static void compute_dom_prob_ps
PARAMS ((int));
276 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
277 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
278 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
279 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
281 /* Parameters affecting the decision of rank_for_schedule().
282 ??? Nope. But MIN_PROBABILITY is used in copmute_trg_info. */
283 #define MIN_DIFF_PRIORITY 2
284 #define MIN_PROBABILITY 40
285 #define MIN_PROB_DIFF 10
287 /* Speculative scheduling functions. */
288 static int check_live_1
PARAMS ((int, rtx
));
289 static void update_live_1
PARAMS ((int, rtx
));
290 static int check_live
PARAMS ((rtx
, int));
291 static void update_live
PARAMS ((rtx
, int));
292 static void set_spec_fed
PARAMS ((rtx
));
293 static int is_pfree
PARAMS ((rtx
, int, int));
294 static int find_conditional_protection
PARAMS ((rtx
, int));
295 static int is_conditionally_protected
PARAMS ((rtx
, int, int));
296 static int may_trap_exp
PARAMS ((rtx
, int));
297 static int haifa_classify_insn
PARAMS ((rtx
));
298 static int is_prisky
PARAMS ((rtx
, int, int));
299 static int is_exception_free
PARAMS ((rtx
, int, int));
301 static void add_branch_dependences
PARAMS ((rtx
, rtx
));
302 static void compute_block_backward_dependences
PARAMS ((int));
303 void debug_dependencies
PARAMS ((void));
305 static void init_regions
PARAMS ((void));
306 static void schedule_region
PARAMS ((int));
307 static void propagate_deps
PARAMS ((int, struct deps
*));
308 static void free_pending_lists
PARAMS ((void));
310 /* Functions for construction of the control flow graph. */
312 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
314 We decide not to build the control flow graph if there is possibly more
315 than one entry to the function, if computed branches exist, of if we
316 have nonlocal gotos. */
325 /* If we have a label that could be the target of a nonlocal goto, then
326 the cfg is not well structured. */
327 if (nonlocal_goto_handler_labels
)
330 /* If we have any forced labels, then the cfg is not well structured. */
334 /* If this function has a computed jump, then we consider the cfg
335 not well structured. */
336 if (current_function_has_computed_jump
)
339 /* If we have exception handlers, then we consider the cfg not well
340 structured. ?!? We should be able to handle this now that flow.c
341 computes an accurate cfg for EH. */
342 if (exception_handler_labels
)
345 /* If we have non-jumping insns which refer to labels, then we consider
346 the cfg not well structured. */
347 /* Check for labels referred to other thn by jumps. */
348 for (b
= 0; b
< n_basic_blocks
; b
++)
349 for (insn
= BLOCK_HEAD (b
);; insn
= NEXT_INSN (insn
))
351 code
= GET_CODE (insn
);
352 if (GET_RTX_CLASS (code
) == 'i' && code
!= JUMP_INSN
)
354 rtx note
= find_reg_note (insn
, REG_LABEL
, NULL_RTX
);
357 && ! (GET_CODE (NEXT_INSN (insn
)) == JUMP_INSN
358 && find_reg_note (NEXT_INSN (insn
), REG_LABEL
,
363 if (insn
== BLOCK_END (b
))
367 /* All the tests passed. Consider the cfg well structured. */
371 /* Build the control flow graph and set nr_edges.
373 Instead of trying to build a cfg ourselves, we rely on flow to
374 do it for us. Stamp out useless code (and bug) duplication.
376 Return nonzero if an irregularity in the cfg is found which would
377 prevent cross block scheduling. */
380 build_control_flow (edge_list
)
381 struct edge_list
*edge_list
;
383 int i
, unreachable
, num_edges
;
385 /* This already accounts for entry/exit edges. */
386 num_edges
= NUM_EDGES (edge_list
);
388 /* Unreachable loops with more than one basic block are detected
389 during the DFS traversal in find_rgns.
391 Unreachable loops with a single block are detected here. This
392 test is redundant with the one in find_rgns, but it's much
393 cheaper to go ahead and catch the trivial case here. */
395 for (i
= 0; i
< n_basic_blocks
; i
++)
397 basic_block b
= BASIC_BLOCK (i
);
400 || (b
->pred
->src
== b
401 && b
->pred
->pred_next
== NULL
))
405 /* ??? We can kill these soon. */
406 in_edges
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
407 out_edges
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
408 edge_table
= (haifa_edge
*) xcalloc (num_edges
, sizeof (haifa_edge
));
411 for (i
= 0; i
< num_edges
; i
++)
413 edge e
= INDEX_EDGE (edge_list
, i
);
415 if (e
->dest
!= EXIT_BLOCK_PTR
416 && e
->src
!= ENTRY_BLOCK_PTR
)
417 new_edge (e
->src
->index
, e
->dest
->index
);
420 /* Increment by 1, since edge 0 is unused. */
426 /* Record an edge in the control flow graph from SOURCE to TARGET.
428 In theory, this is redundant with the s_succs computed above, but
429 we have not converted all of haifa to use information from the
433 new_edge (source
, target
)
437 int curr_edge
, fst_edge
;
439 /* Check for duplicates. */
440 fst_edge
= curr_edge
= OUT_EDGES (source
);
443 if (FROM_BLOCK (curr_edge
) == source
444 && TO_BLOCK (curr_edge
) == target
)
449 curr_edge
= NEXT_OUT (curr_edge
);
451 if (fst_edge
== curr_edge
)
457 FROM_BLOCK (e
) = source
;
458 TO_BLOCK (e
) = target
;
460 if (OUT_EDGES (source
))
462 next_edge
= NEXT_OUT (OUT_EDGES (source
));
463 NEXT_OUT (OUT_EDGES (source
)) = e
;
464 NEXT_OUT (e
) = next_edge
;
468 OUT_EDGES (source
) = e
;
472 if (IN_EDGES (target
))
474 next_edge
= NEXT_IN (IN_EDGES (target
));
475 NEXT_IN (IN_EDGES (target
)) = e
;
476 NEXT_IN (e
) = next_edge
;
480 IN_EDGES (target
) = e
;
485 /* BITSET macros for operations on the control flow graph. */
487 /* Compute bitwise union of two bitsets. */
488 #define BITSET_UNION(set1, set2, len) \
489 do { register bitset tp = set1, sp = set2; \
491 for (i = 0; i < len; i++) \
492 *(tp++) |= *(sp++); } while (0)
494 /* Compute bitwise intersection of two bitsets. */
495 #define BITSET_INTER(set1, set2, len) \
496 do { register bitset tp = set1, sp = set2; \
498 for (i = 0; i < len; i++) \
499 *(tp++) &= *(sp++); } while (0)
501 /* Compute bitwise difference of two bitsets. */
502 #define BITSET_DIFFER(set1, set2, len) \
503 do { register bitset tp = set1, sp = set2; \
505 for (i = 0; i < len; i++) \
506 *(tp++) &= ~*(sp++); } while (0)
508 /* Inverts every bit of bitset 'set'. */
509 #define BITSET_INVERT(set, len) \
510 do { register bitset tmpset = set; \
512 for (i = 0; i < len; i++, tmpset++) \
513 *tmpset = ~*tmpset; } while (0)
515 /* Turn on the index'th bit in bitset set. */
516 #define BITSET_ADD(set, index, len) \
518 if (index >= HOST_BITS_PER_WIDE_INT * len) \
521 set[index/HOST_BITS_PER_WIDE_INT] |= \
522 ((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT); \
525 /* Turn off the index'th bit in set. */
526 #define BITSET_REMOVE(set, index, len) \
528 if (index >= HOST_BITS_PER_WIDE_INT * len) \
531 set[index/HOST_BITS_PER_WIDE_INT] &= \
532 ~(((unsigned HOST_WIDE_INT) 1) << (index % HOST_BITS_PER_WIDE_INT)); \
535 /* Check if the index'th bit in bitset set is on. */
538 bitset_member (set
, index
, len
)
542 if (index
>= HOST_BITS_PER_WIDE_INT
* len
)
544 return ((set
[index
/ HOST_BITS_PER_WIDE_INT
] &
545 ((unsigned HOST_WIDE_INT
) 1) << (index
% HOST_BITS_PER_WIDE_INT
))
549 /* Translate a bit-set SET to a list BL of the bit-set members. */
552 extract_bitlst (set
, len
, bitlen
, bl
)
559 unsigned HOST_WIDE_INT word
;
561 /* bblst table space is reused in each call to extract_bitlst. */
562 bitlst_table_last
= 0;
564 bl
->first_member
= &bitlst_table
[bitlst_table_last
];
567 /* Iterate over each word in the bitset. */
568 for (i
= 0; i
< len
; i
++)
571 offset
= i
* HOST_BITS_PER_WIDE_INT
;
573 /* Iterate over each bit in the word, but do not
574 go beyond the end of the defined bits. */
575 for (j
= 0; offset
< bitlen
&& word
; j
++)
579 bitlst_table
[bitlst_table_last
++] = offset
;
589 /* Functions for the construction of regions. */
591 /* Print the regions, for debugging purposes. Callable from debugger. */
598 fprintf (sched_dump
, "\n;; ------------ REGIONS ----------\n\n");
599 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
601 fprintf (sched_dump
, ";;\trgn %d nr_blocks %d:\n", rgn
,
602 rgn_table
[rgn
].rgn_nr_blocks
);
603 fprintf (sched_dump
, ";;\tbb/block: ");
605 for (bb
= 0; bb
< rgn_table
[rgn
].rgn_nr_blocks
; bb
++)
607 current_blocks
= RGN_BLOCKS (rgn
);
609 if (bb
!= BLOCK_TO_BB (BB_TO_BLOCK (bb
)))
612 fprintf (sched_dump
, " %d/%d ", bb
, BB_TO_BLOCK (bb
));
615 fprintf (sched_dump
, "\n\n");
619 /* Build a single block region for each basic block in the function.
620 This allows for using the same code for interblock and basic block
624 find_single_block_region ()
628 for (i
= 0; i
< n_basic_blocks
; i
++)
631 RGN_NR_BLOCKS (i
) = 1;
633 CONTAINING_RGN (i
) = i
;
636 nr_regions
= n_basic_blocks
;
639 /* Update number of blocks and the estimate for number of insns
640 in the region. Return 1 if the region is "too large" for interblock
641 scheduling (compile time considerations), otherwise return 0. */
644 too_large (block
, num_bbs
, num_insns
)
645 int block
, *num_bbs
, *num_insns
;
648 (*num_insns
) += (INSN_LUID (BLOCK_END (block
)) -
649 INSN_LUID (BLOCK_HEAD (block
)));
650 if ((*num_bbs
> MAX_RGN_BLOCKS
) || (*num_insns
> MAX_RGN_INSNS
))
656 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
657 is still an inner loop. Put in max_hdr[blk] the header of the most inner
658 loop containing blk. */
659 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
661 if (max_hdr[blk] == -1) \
662 max_hdr[blk] = hdr; \
663 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
664 RESET_BIT (inner, hdr); \
665 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
667 RESET_BIT (inner,max_hdr[blk]); \
668 max_hdr[blk] = hdr; \
672 /* Find regions for interblock scheduling.
674 A region for scheduling can be:
676 * A loop-free procedure, or
678 * A reducible inner loop, or
680 * A basic block not contained in any other region.
682 ?!? In theory we could build other regions based on extended basic
683 blocks or reverse extended basic blocks. Is it worth the trouble?
685 Loop blocks that form a region are put into the region's block list
686 in topological order.
688 This procedure stores its results into the following global (ick) variables
696 We use dominator relationships to avoid making regions out of non-reducible
699 This procedure needs to be converted to work on pred/succ lists instead
700 of edge tables. That would simplify it somewhat. */
703 find_rgns (edge_list
, dom
)
704 struct edge_list
*edge_list
;
707 int *max_hdr
, *dfs_nr
, *stack
, *degree
;
709 int node
, child
, loop_head
, i
, head
, tail
;
710 int count
= 0, sp
, idx
= 0, current_edge
= out_edges
[0];
711 int num_bbs
, num_insns
, unreachable
;
712 int too_large_failure
;
714 /* Note if an edge has been passed. */
717 /* Note if a block is a natural loop header. */
720 /* Note if a block is an natural inner loop header. */
723 /* Note if a block is in the block queue. */
726 /* Note if a block is in the block queue. */
729 int num_edges
= NUM_EDGES (edge_list
);
731 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
732 and a mapping from block to its loop header (if the block is contained
735 Store results in HEADER, INNER, and MAX_HDR respectively, these will
736 be used as inputs to the second traversal.
738 STACK, SP and DFS_NR are only used during the first traversal. */
740 /* Allocate and initialize variables for the first traversal. */
741 max_hdr
= (int *) xmalloc (n_basic_blocks
* sizeof (int));
742 dfs_nr
= (int *) xcalloc (n_basic_blocks
, sizeof (int));
743 stack
= (int *) xmalloc (nr_edges
* sizeof (int));
745 inner
= sbitmap_alloc (n_basic_blocks
);
746 sbitmap_ones (inner
);
748 header
= sbitmap_alloc (n_basic_blocks
);
749 sbitmap_zero (header
);
751 passed
= sbitmap_alloc (nr_edges
);
752 sbitmap_zero (passed
);
754 in_queue
= sbitmap_alloc (n_basic_blocks
);
755 sbitmap_zero (in_queue
);
757 in_stack
= sbitmap_alloc (n_basic_blocks
);
758 sbitmap_zero (in_stack
);
760 for (i
= 0; i
< n_basic_blocks
; i
++)
763 /* DFS traversal to find inner loops in the cfg. */
768 if (current_edge
== 0 || TEST_BIT (passed
, current_edge
))
770 /* We have reached a leaf node or a node that was already
771 processed. Pop edges off the stack until we find
772 an edge that has not yet been processed. */
774 && (current_edge
== 0 || TEST_BIT (passed
, current_edge
)))
776 /* Pop entry off the stack. */
777 current_edge
= stack
[sp
--];
778 node
= FROM_BLOCK (current_edge
);
779 child
= TO_BLOCK (current_edge
);
780 RESET_BIT (in_stack
, child
);
781 if (max_hdr
[child
] >= 0 && TEST_BIT (in_stack
, max_hdr
[child
]))
782 UPDATE_LOOP_RELATIONS (node
, max_hdr
[child
]);
783 current_edge
= NEXT_OUT (current_edge
);
786 /* See if have finished the DFS tree traversal. */
787 if (sp
< 0 && TEST_BIT (passed
, current_edge
))
790 /* Nope, continue the traversal with the popped node. */
794 /* Process a node. */
795 node
= FROM_BLOCK (current_edge
);
796 child
= TO_BLOCK (current_edge
);
797 SET_BIT (in_stack
, node
);
798 dfs_nr
[node
] = ++count
;
800 /* If the successor is in the stack, then we've found a loop.
801 Mark the loop, if it is not a natural loop, then it will
802 be rejected during the second traversal. */
803 if (TEST_BIT (in_stack
, child
))
806 SET_BIT (header
, child
);
807 UPDATE_LOOP_RELATIONS (node
, child
);
808 SET_BIT (passed
, current_edge
);
809 current_edge
= NEXT_OUT (current_edge
);
813 /* If the child was already visited, then there is no need to visit
814 it again. Just update the loop relationships and restart
818 if (max_hdr
[child
] >= 0 && TEST_BIT (in_stack
, max_hdr
[child
]))
819 UPDATE_LOOP_RELATIONS (node
, max_hdr
[child
]);
820 SET_BIT (passed
, current_edge
);
821 current_edge
= NEXT_OUT (current_edge
);
825 /* Push an entry on the stack and continue DFS traversal. */
826 stack
[++sp
] = current_edge
;
827 SET_BIT (passed
, current_edge
);
828 current_edge
= OUT_EDGES (child
);
830 /* This is temporary until haifa is converted to use rth's new
831 cfg routines which have true entry/exit blocks and the
832 appropriate edges from/to those blocks.
834 Generally we update dfs_nr for a node when we process its
835 out edge. However, if the node has no out edge then we will
836 not set dfs_nr for that node. This can confuse the scheduler
837 into thinking that we have unreachable blocks, which in turn
838 disables cross block scheduling.
840 So, if we have a node with no out edges, go ahead and mark it
842 if (current_edge
== 0)
843 dfs_nr
[child
] = ++count
;
846 /* Another check for unreachable blocks. The earlier test in
847 is_cfg_nonregular only finds unreachable blocks that do not
850 The DFS traversal will mark every block that is reachable from
851 the entry node by placing a nonzero value in dfs_nr. Thus if
852 dfs_nr is zero for any block, then it must be unreachable. */
854 for (i
= 0; i
< n_basic_blocks
; i
++)
861 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
862 to hold degree counts. */
865 for (i
= 0; i
< n_basic_blocks
; i
++)
867 for (i
= 0; i
< num_edges
; i
++)
869 edge e
= INDEX_EDGE (edge_list
, i
);
871 if (e
->dest
!= EXIT_BLOCK_PTR
)
872 degree
[e
->dest
->index
]++;
875 /* Do not perform region scheduling if there are any unreachable
884 /* Second travsersal:find reducible inner loops and topologically sort
885 block of each region. */
887 queue
= (int *) xmalloc (n_basic_blocks
* sizeof (int));
889 /* Find blocks which are inner loop headers. We still have non-reducible
890 loops to consider at this point. */
891 for (i
= 0; i
< n_basic_blocks
; i
++)
893 if (TEST_BIT (header
, i
) && TEST_BIT (inner
, i
))
898 /* Now check that the loop is reducible. We do this separate
899 from finding inner loops so that we do not find a reducible
900 loop which contains an inner non-reducible loop.
902 A simple way to find reducible/natural loops is to verify
903 that each block in the loop is dominated by the loop
906 If there exists a block that is not dominated by the loop
907 header, then the block is reachable from outside the loop
908 and thus the loop is not a natural loop. */
909 for (j
= 0; j
< n_basic_blocks
; j
++)
911 /* First identify blocks in the loop, except for the loop
913 if (i
== max_hdr
[j
] && i
!= j
)
915 /* Now verify that the block is dominated by the loop
917 if (!TEST_BIT (dom
[j
], i
))
922 /* If we exited the loop early, then I is the header of
923 a non-reducible loop and we should quit processing it
925 if (j
!= n_basic_blocks
)
928 /* I is a header of an inner loop, or block 0 in a subroutine
929 with no loops at all. */
931 too_large_failure
= 0;
932 loop_head
= max_hdr
[i
];
934 /* Decrease degree of all I's successors for topological
936 for (e
= BASIC_BLOCK (i
)->succ
; e
; e
= e
->succ_next
)
937 if (e
->dest
!= EXIT_BLOCK_PTR
)
938 --degree
[e
->dest
->index
];
940 /* Estimate # insns, and count # blocks in the region. */
942 num_insns
= (INSN_LUID (BLOCK_END (i
))
943 - INSN_LUID (BLOCK_HEAD (i
)));
945 /* Find all loop latches (blocks with back edges to the loop
946 header) or all the leaf blocks in the cfg has no loops.
948 Place those blocks into the queue. */
951 for (j
= 0; j
< n_basic_blocks
; j
++)
952 /* Leaf nodes have only a single successor which must
954 if (BASIC_BLOCK (j
)->succ
955 && BASIC_BLOCK (j
)->succ
->dest
== EXIT_BLOCK_PTR
956 && BASIC_BLOCK (j
)->succ
->succ_next
== NULL
)
959 SET_BIT (in_queue
, j
);
961 if (too_large (j
, &num_bbs
, &num_insns
))
963 too_large_failure
= 1;
972 for (e
= BASIC_BLOCK (i
)->pred
; e
; e
= e
->pred_next
)
974 if (e
->src
== ENTRY_BLOCK_PTR
)
977 node
= e
->src
->index
;
979 if (max_hdr
[node
] == loop_head
&& node
!= i
)
981 /* This is a loop latch. */
982 queue
[++tail
] = node
;
983 SET_BIT (in_queue
, node
);
985 if (too_large (node
, &num_bbs
, &num_insns
))
987 too_large_failure
= 1;
994 /* Now add all the blocks in the loop to the queue.
996 We know the loop is a natural loop; however the algorithm
997 above will not always mark certain blocks as being in the
1005 The algorithm in the DFS traversal may not mark B & D as part
1006 of the loop (ie they will not have max_hdr set to A).
1008 We know they can not be loop latches (else they would have
1009 had max_hdr set since they'd have a backedge to a dominator
1010 block). So we don't need them on the initial queue.
1012 We know they are part of the loop because they are dominated
1013 by the loop header and can be reached by a backwards walk of
1014 the edges starting with nodes on the initial queue.
1016 It is safe and desirable to include those nodes in the
1017 loop/scheduling region. To do so we would need to decrease
1018 the degree of a node if it is the target of a backedge
1019 within the loop itself as the node is placed in the queue.
1021 We do not do this because I'm not sure that the actual
1022 scheduling code will properly handle this case. ?!? */
1024 while (head
< tail
&& !too_large_failure
)
1027 child
= queue
[++head
];
1029 for (e
= BASIC_BLOCK (child
)->pred
; e
; e
= e
->pred_next
)
1031 node
= e
->src
->index
;
1033 /* See discussion above about nodes not marked as in
1034 this loop during the initial DFS traversal. */
1035 if (e
->src
== ENTRY_BLOCK_PTR
1036 || max_hdr
[node
] != loop_head
)
1041 else if (!TEST_BIT (in_queue
, node
) && node
!= i
)
1043 queue
[++tail
] = node
;
1044 SET_BIT (in_queue
, node
);
1046 if (too_large (node
, &num_bbs
, &num_insns
))
1048 too_large_failure
= 1;
1055 if (tail
>= 0 && !too_large_failure
)
1057 /* Place the loop header into list of region blocks. */
1059 rgn_bb_table
[idx
] = i
;
1060 RGN_NR_BLOCKS (nr_regions
) = num_bbs
;
1061 RGN_BLOCKS (nr_regions
) = idx
++;
1062 CONTAINING_RGN (i
) = nr_regions
;
1063 BLOCK_TO_BB (i
) = count
= 0;
1065 /* Remove blocks from queue[] when their in degree
1066 becomes zero. Repeat until no blocks are left on the
1067 list. This produces a topological list of blocks in
1073 child
= queue
[head
];
1074 if (degree
[child
] == 0)
1079 rgn_bb_table
[idx
++] = child
;
1080 BLOCK_TO_BB (child
) = ++count
;
1081 CONTAINING_RGN (child
) = nr_regions
;
1082 queue
[head
] = queue
[tail
--];
1084 for (e
= BASIC_BLOCK (child
)->succ
;
1087 if (e
->dest
!= EXIT_BLOCK_PTR
)
1088 --degree
[e
->dest
->index
];
1100 /* Any block that did not end up in a region is placed into a region
1102 for (i
= 0; i
< n_basic_blocks
; i
++)
1105 rgn_bb_table
[idx
] = i
;
1106 RGN_NR_BLOCKS (nr_regions
) = 1;
1107 RGN_BLOCKS (nr_regions
) = idx
++;
1108 CONTAINING_RGN (i
) = nr_regions
++;
1109 BLOCK_TO_BB (i
) = 0;
1122 /* Functions for regions scheduling information. */
1124 /* Compute dominators, probability, and potential-split-edges of bb.
1125 Assume that these values were already computed for bb's predecessors. */
1128 compute_dom_prob_ps (bb
)
1131 int nxt_in_edge
, fst_in_edge
, pred
;
1132 int fst_out_edge
, nxt_out_edge
, nr_out_edges
, nr_rgn_out_edges
;
1135 if (IS_RGN_ENTRY (bb
))
1137 BITSET_ADD (dom
[bb
], 0, bbset_size
);
1142 fst_in_edge
= nxt_in_edge
= IN_EDGES (BB_TO_BLOCK (bb
));
1144 /* Intialize dom[bb] to '111..1'. */
1145 BITSET_INVERT (dom
[bb
], bbset_size
);
1149 pred
= FROM_BLOCK (nxt_in_edge
);
1150 BITSET_INTER (dom
[bb
], dom
[BLOCK_TO_BB (pred
)], bbset_size
);
1152 BITSET_UNION (ancestor_edges
[bb
], ancestor_edges
[BLOCK_TO_BB (pred
)],
1155 BITSET_ADD (ancestor_edges
[bb
], EDGE_TO_BIT (nxt_in_edge
), edgeset_size
);
1158 nr_rgn_out_edges
= 0;
1159 fst_out_edge
= OUT_EDGES (pred
);
1160 nxt_out_edge
= NEXT_OUT (fst_out_edge
);
1161 BITSET_UNION (pot_split
[bb
], pot_split
[BLOCK_TO_BB (pred
)],
1164 BITSET_ADD (pot_split
[bb
], EDGE_TO_BIT (fst_out_edge
), edgeset_size
);
1166 /* The successor doesn't belong in the region? */
1167 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge
)) !=
1168 CONTAINING_RGN (BB_TO_BLOCK (bb
)))
1171 while (fst_out_edge
!= nxt_out_edge
)
1174 /* The successor doesn't belong in the region? */
1175 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge
)) !=
1176 CONTAINING_RGN (BB_TO_BLOCK (bb
)))
1178 BITSET_ADD (pot_split
[bb
], EDGE_TO_BIT (nxt_out_edge
), edgeset_size
);
1179 nxt_out_edge
= NEXT_OUT (nxt_out_edge
);
1183 /* Now nr_rgn_out_edges is the number of region-exit edges from
1184 pred, and nr_out_edges will be the number of pred out edges
1185 not leaving the region. */
1186 nr_out_edges
-= nr_rgn_out_edges
;
1187 if (nr_rgn_out_edges
> 0)
1188 prob
[bb
] += 0.9 * prob
[BLOCK_TO_BB (pred
)] / nr_out_edges
;
1190 prob
[bb
] += prob
[BLOCK_TO_BB (pred
)] / nr_out_edges
;
1191 nxt_in_edge
= NEXT_IN (nxt_in_edge
);
1193 while (fst_in_edge
!= nxt_in_edge
);
1195 BITSET_ADD (dom
[bb
], bb
, bbset_size
);
1196 BITSET_DIFFER (pot_split
[bb
], ancestor_edges
[bb
], edgeset_size
);
1198 if (sched_verbose
>= 2)
1199 fprintf (sched_dump
, ";; bb_prob(%d, %d) = %3d\n", bb
, BB_TO_BLOCK (bb
),
1200 (int) (100.0 * prob
[bb
]));
1203 /* Functions for target info. */
1205 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1206 Note that bb_trg dominates bb_src. */
1209 split_edges (bb_src
, bb_trg
, bl
)
1214 int es
= edgeset_size
;
1215 edgeset src
= (edgeset
) xcalloc (es
, sizeof (HOST_WIDE_INT
));
1218 src
[es
] = (pot_split
[bb_src
])[es
];
1219 BITSET_DIFFER (src
, pot_split
[bb_trg
], edgeset_size
);
1220 extract_bitlst (src
, edgeset_size
, edgeset_bitsize
, bl
);
1224 /* Find the valid candidate-source-blocks for the target block TRG, compute
1225 their probability, and check if they are speculative or not.
1226 For speculative sources, compute their update-blocks and split-blocks. */
1229 compute_trg_info (trg
)
1232 register candidate
*sp
;
1234 int check_block
, update_idx
;
1235 int i
, j
, k
, fst_edge
, nxt_edge
;
1237 /* Define some of the fields for the target bb as well. */
1238 sp
= candidate_table
+ trg
;
1240 sp
->is_speculative
= 0;
1243 for (i
= trg
+ 1; i
< current_nr_blocks
; i
++)
1245 sp
= candidate_table
+ i
;
1247 sp
->is_valid
= IS_DOMINATED (i
, trg
);
1250 sp
->src_prob
= GET_SRC_PROB (i
, trg
);
1251 sp
->is_valid
= (sp
->src_prob
>= MIN_PROBABILITY
);
1256 split_edges (i
, trg
, &el
);
1257 sp
->is_speculative
= (el
.nr_members
) ? 1 : 0;
1258 if (sp
->is_speculative
&& !flag_schedule_speculative
)
1264 char *update_blocks
;
1266 /* Compute split blocks and store them in bblst_table.
1267 The TO block of every split edge is a split block. */
1268 sp
->split_bbs
.first_member
= &bblst_table
[bblst_last
];
1269 sp
->split_bbs
.nr_members
= el
.nr_members
;
1270 for (j
= 0; j
< el
.nr_members
; bblst_last
++, j
++)
1271 bblst_table
[bblst_last
] =
1272 TO_BLOCK (rgn_edges
[el
.first_member
[j
]]);
1273 sp
->update_bbs
.first_member
= &bblst_table
[bblst_last
];
1275 /* Compute update blocks and store them in bblst_table.
1276 For every split edge, look at the FROM block, and check
1277 all out edges. For each out edge that is not a split edge,
1278 add the TO block to the update block list. This list can end
1279 up with a lot of duplicates. We need to weed them out to avoid
1280 overrunning the end of the bblst_table. */
1281 update_blocks
= (char *) alloca (n_basic_blocks
);
1282 memset (update_blocks
, 0, n_basic_blocks
);
1285 for (j
= 0; j
< el
.nr_members
; j
++)
1287 check_block
= FROM_BLOCK (rgn_edges
[el
.first_member
[j
]]);
1288 fst_edge
= nxt_edge
= OUT_EDGES (check_block
);
1291 if (! update_blocks
[TO_BLOCK (nxt_edge
)])
1293 for (k
= 0; k
< el
.nr_members
; k
++)
1294 if (EDGE_TO_BIT (nxt_edge
) == el
.first_member
[k
])
1297 if (k
>= el
.nr_members
)
1299 bblst_table
[bblst_last
++] = TO_BLOCK (nxt_edge
);
1300 update_blocks
[TO_BLOCK (nxt_edge
)] = 1;
1305 nxt_edge
= NEXT_OUT (nxt_edge
);
1307 while (fst_edge
!= nxt_edge
);
1309 sp
->update_bbs
.nr_members
= update_idx
;
1311 /* Make sure we didn't overrun the end of bblst_table. */
1312 if (bblst_last
> bblst_size
)
1317 sp
->split_bbs
.nr_members
= sp
->update_bbs
.nr_members
= 0;
1319 sp
->is_speculative
= 0;
1325 /* Print candidates info, for debugging purposes. Callable from debugger. */
1331 if (!candidate_table
[i
].is_valid
)
1334 if (candidate_table
[i
].is_speculative
)
1337 fprintf (sched_dump
, "src b %d bb %d speculative \n", BB_TO_BLOCK (i
), i
);
1339 fprintf (sched_dump
, "split path: ");
1340 for (j
= 0; j
< candidate_table
[i
].split_bbs
.nr_members
; j
++)
1342 int b
= candidate_table
[i
].split_bbs
.first_member
[j
];
1344 fprintf (sched_dump
, " %d ", b
);
1346 fprintf (sched_dump
, "\n");
1348 fprintf (sched_dump
, "update path: ");
1349 for (j
= 0; j
< candidate_table
[i
].update_bbs
.nr_members
; j
++)
1351 int b
= candidate_table
[i
].update_bbs
.first_member
[j
];
1353 fprintf (sched_dump
, " %d ", b
);
1355 fprintf (sched_dump
, "\n");
1359 fprintf (sched_dump
, " src %d equivalent\n", BB_TO_BLOCK (i
));
1363 /* Print candidates info, for debugging purposes. Callable from debugger. */
1366 debug_candidates (trg
)
1371 fprintf (sched_dump
, "----------- candidate table: target: b=%d bb=%d ---\n",
1372 BB_TO_BLOCK (trg
), trg
);
1373 for (i
= trg
+ 1; i
< current_nr_blocks
; i
++)
1374 debug_candidate (i
);
1377 /* Functions for speculative scheduing. */
1379 /* Return 0 if x is a set of a register alive in the beginning of one
1380 of the split-blocks of src, otherwise return 1. */
1383 check_live_1 (src
, x
)
1389 register rtx reg
= SET_DEST (x
);
1394 while (GET_CODE (reg
) == SUBREG
|| GET_CODE (reg
) == ZERO_EXTRACT
1395 || GET_CODE (reg
) == SIGN_EXTRACT
1396 || GET_CODE (reg
) == STRICT_LOW_PART
)
1397 reg
= XEXP (reg
, 0);
1399 if (GET_CODE (reg
) == PARALLEL
)
1403 for (i
= XVECLEN (reg
, 0) - 1; i
>= 0; i
--)
1404 if (XEXP (XVECEXP (reg
, 0, i
), 0) != 0)
1405 if (check_live_1 (src
, XEXP (XVECEXP (reg
, 0, i
), 0)))
1411 if (GET_CODE (reg
) != REG
)
1414 regno
= REGNO (reg
);
1416 if (regno
< FIRST_PSEUDO_REGISTER
&& global_regs
[regno
])
1418 /* Global registers are assumed live. */
1423 if (regno
< FIRST_PSEUDO_REGISTER
)
1425 /* Check for hard registers. */
1426 int j
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1429 for (i
= 0; i
< candidate_table
[src
].split_bbs
.nr_members
; i
++)
1431 int b
= candidate_table
[src
].split_bbs
.first_member
[i
];
1433 if (REGNO_REG_SET_P (BASIC_BLOCK (b
)->global_live_at_start
,
1443 /* Check for psuedo registers. */
1444 for (i
= 0; i
< candidate_table
[src
].split_bbs
.nr_members
; i
++)
1446 int b
= candidate_table
[src
].split_bbs
.first_member
[i
];
1448 if (REGNO_REG_SET_P (BASIC_BLOCK (b
)->global_live_at_start
, regno
))
1459 /* If x is a set of a register R, mark that R is alive in the beginning
1460 of every update-block of src. */
1463 update_live_1 (src
, x
)
1469 register rtx reg
= SET_DEST (x
);
1474 while (GET_CODE (reg
) == SUBREG
|| GET_CODE (reg
) == ZERO_EXTRACT
1475 || GET_CODE (reg
) == SIGN_EXTRACT
1476 || GET_CODE (reg
) == STRICT_LOW_PART
)
1477 reg
= XEXP (reg
, 0);
1479 if (GET_CODE (reg
) == PARALLEL
)
1483 for (i
= XVECLEN (reg
, 0) - 1; i
>= 0; i
--)
1484 if (XEXP (XVECEXP (reg
, 0, i
), 0) != 0)
1485 update_live_1 (src
, XEXP (XVECEXP (reg
, 0, i
), 0));
1490 if (GET_CODE (reg
) != REG
)
1493 /* Global registers are always live, so the code below does not apply
1496 regno
= REGNO (reg
);
1498 if (regno
>= FIRST_PSEUDO_REGISTER
|| !global_regs
[regno
])
1500 if (regno
< FIRST_PSEUDO_REGISTER
)
1502 int j
= HARD_REGNO_NREGS (regno
, GET_MODE (reg
));
1505 for (i
= 0; i
< candidate_table
[src
].update_bbs
.nr_members
; i
++)
1507 int b
= candidate_table
[src
].update_bbs
.first_member
[i
];
1509 SET_REGNO_REG_SET (BASIC_BLOCK (b
)->global_live_at_start
,
1516 for (i
= 0; i
< candidate_table
[src
].update_bbs
.nr_members
; i
++)
1518 int b
= candidate_table
[src
].update_bbs
.first_member
[i
];
1520 SET_REGNO_REG_SET (BASIC_BLOCK (b
)->global_live_at_start
, regno
);
1526 /* Return 1 if insn can be speculatively moved from block src to trg,
1527 otherwise return 0. Called before first insertion of insn to
1528 ready-list or before the scheduling. */
1531 check_live (insn
, src
)
1535 /* Find the registers set by instruction. */
1536 if (GET_CODE (PATTERN (insn
)) == SET
1537 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1538 return check_live_1 (src
, PATTERN (insn
));
1539 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1542 for (j
= XVECLEN (PATTERN (insn
), 0) - 1; j
>= 0; j
--)
1543 if ((GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == SET
1544 || GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == CLOBBER
)
1545 && !check_live_1 (src
, XVECEXP (PATTERN (insn
), 0, j
)))
1554 /* Update the live registers info after insn was moved speculatively from
1555 block src to trg. */
1558 update_live (insn
, src
)
1562 /* Find the registers set by instruction. */
1563 if (GET_CODE (PATTERN (insn
)) == SET
1564 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1565 update_live_1 (src
, PATTERN (insn
));
1566 else if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1569 for (j
= XVECLEN (PATTERN (insn
), 0) - 1; j
>= 0; j
--)
1570 if (GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == SET
1571 || GET_CODE (XVECEXP (PATTERN (insn
), 0, j
)) == CLOBBER
)
1572 update_live_1 (src
, XVECEXP (PATTERN (insn
), 0, j
));
1576 /* Exception Free Loads:
1578 We define five classes of speculative loads: IFREE, IRISKY,
1579 PFREE, PRISKY, and MFREE.
1581 IFREE loads are loads that are proved to be exception-free, just
1582 by examining the load insn. Examples for such loads are loads
1583 from TOC and loads of global data.
1585 IRISKY loads are loads that are proved to be exception-risky,
1586 just by examining the load insn. Examples for such loads are
1587 volatile loads and loads from shared memory.
1589 PFREE loads are loads for which we can prove, by examining other
1590 insns, that they are exception-free. Currently, this class consists
1591 of loads for which we are able to find a "similar load", either in
1592 the target block, or, if only one split-block exists, in that split
1593 block. Load2 is similar to load1 if both have same single base
1594 register. We identify only part of the similar loads, by finding
1595 an insn upon which both load1 and load2 have a DEF-USE dependence.
1597 PRISKY loads are loads for which we can prove, by examining other
1598 insns, that they are exception-risky. Currently we have two proofs for
1599 such loads. The first proof detects loads that are probably guarded by a
1600 test on the memory address. This proof is based on the
1601 backward and forward data dependence information for the region.
1602 Let load-insn be the examined load.
1603 Load-insn is PRISKY iff ALL the following hold:
1605 - insn1 is not in the same block as load-insn
1606 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
1607 - test-insn is either a compare or a branch, not in the same block
1609 - load-insn is reachable from test-insn
1610 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
1612 This proof might fail when the compare and the load are fed
1613 by an insn not in the region. To solve this, we will add to this
1614 group all loads that have no input DEF-USE dependence.
1616 The second proof detects loads that are directly or indirectly
1617 fed by a speculative load. This proof is affected by the
1618 scheduling process. We will use the flag fed_by_spec_load.
1619 Initially, all insns have this flag reset. After a speculative
1620 motion of an insn, if insn is either a load, or marked as
1621 fed_by_spec_load, we will also mark as fed_by_spec_load every
1622 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
1623 load which is fed_by_spec_load is also PRISKY.
1625 MFREE (maybe-free) loads are all the remaining loads. They may be
1626 exception-free, but we cannot prove it.
1628 Now, all loads in IFREE and PFREE classes are considered
1629 exception-free, while all loads in IRISKY and PRISKY classes are
1630 considered exception-risky. As for loads in the MFREE class,
1631 these are considered either exception-free or exception-risky,
1632 depending on whether we are pessimistic or optimistic. We have
1633 to take the pessimistic approach to assure the safety of
1634 speculative scheduling, but we can take the optimistic approach
1635 by invoking the -fsched_spec_load_dangerous option. */
1637 enum INSN_TRAP_CLASS
1639 TRAP_FREE
= 0, IFREE
= 1, PFREE_CANDIDATE
= 2,
1640 PRISKY_CANDIDATE
= 3, IRISKY
= 4, TRAP_RISKY
= 5
1643 #define WORST_CLASS(class1, class2) \
1644 ((class1 > class2) ? class1 : class2)
1646 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
1647 #define IS_REACHABLE(bb_from, bb_to) \
1649 || IS_RGN_ENTRY (bb_from) \
1650 || (bitset_member (ancestor_edges[bb_to], \
1651 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
1654 /* Non-zero iff the address is comprised from at most 1 register. */
1655 #define CONST_BASED_ADDRESS_P(x) \
1656 (GET_CODE (x) == REG \
1657 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
1658 || (GET_CODE (x) == LO_SUM)) \
1659 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
1660 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
1662 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1665 set_spec_fed (load_insn
)
1670 for (link
= INSN_DEPEND (load_insn
); link
; link
= XEXP (link
, 1))
1671 if (GET_MODE (link
) == VOIDmode
)
1672 FED_BY_SPEC_LOAD (XEXP (link
, 0)) = 1;
1673 } /* set_spec_fed */
1675 /* On the path from the insn to load_insn_bb, find a conditional
1676 branch depending on insn, that guards the speculative load. */
1679 find_conditional_protection (insn
, load_insn_bb
)
1685 /* Iterate through DEF-USE forward dependences. */
1686 for (link
= INSN_DEPEND (insn
); link
; link
= XEXP (link
, 1))
1688 rtx next
= XEXP (link
, 0);
1689 if ((CONTAINING_RGN (BLOCK_NUM (next
)) ==
1690 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb
)))
1691 && IS_REACHABLE (INSN_BB (next
), load_insn_bb
)
1692 && load_insn_bb
!= INSN_BB (next
)
1693 && GET_MODE (link
) == VOIDmode
1694 && (GET_CODE (next
) == JUMP_INSN
1695 || find_conditional_protection (next
, load_insn_bb
)))
1699 } /* find_conditional_protection */
1701 /* Returns 1 if the same insn1 that participates in the computation
1702 of load_insn's address is feeding a conditional branch that is
1703 guarding on load_insn. This is true if we find a the two DEF-USE
1705 insn1 -> ... -> conditional-branch
1706 insn1 -> ... -> load_insn,
1707 and if a flow path exist:
1708 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1709 and if insn1 is on the path
1710 region-entry -> ... -> bb_trg -> ... load_insn.
1712 Locate insn1 by climbing on LOG_LINKS from load_insn.
1713 Locate the branch by following INSN_DEPEND from insn1. */
1716 is_conditionally_protected (load_insn
, bb_src
, bb_trg
)
1722 for (link
= LOG_LINKS (load_insn
); link
; link
= XEXP (link
, 1))
1724 rtx insn1
= XEXP (link
, 0);
1726 /* Must be a DEF-USE dependence upon non-branch. */
1727 if (GET_MODE (link
) != VOIDmode
1728 || GET_CODE (insn1
) == JUMP_INSN
)
1731 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1732 if (INSN_BB (insn1
) == bb_src
1733 || (CONTAINING_RGN (BLOCK_NUM (insn1
))
1734 != CONTAINING_RGN (BB_TO_BLOCK (bb_src
)))
1735 || (!IS_REACHABLE (bb_trg
, INSN_BB (insn1
))
1736 && !IS_REACHABLE (INSN_BB (insn1
), bb_trg
)))
1739 /* Now search for the conditional-branch. */
1740 if (find_conditional_protection (insn1
, bb_src
))
1743 /* Recursive step: search another insn1, "above" current insn1. */
1744 return is_conditionally_protected (insn1
, bb_src
, bb_trg
);
1747 /* The chain does not exist. */
1749 } /* is_conditionally_protected */
1751 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1752 load_insn can move speculatively from bb_src to bb_trg. All the
1753 following must hold:
1755 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1756 (2) load_insn and load1 have a def-use dependence upon
1757 the same insn 'insn1'.
1758 (3) either load2 is in bb_trg, or:
1759 - there's only one split-block, and
1760 - load1 is on the escape path, and
1762 From all these we can conclude that the two loads access memory
1763 addresses that differ at most by a constant, and hence if moving
1764 load_insn would cause an exception, it would have been caused by
1768 is_pfree (load_insn
, bb_src
, bb_trg
)
1773 register candidate
*candp
= candidate_table
+ bb_src
;
1775 if (candp
->split_bbs
.nr_members
!= 1)
1776 /* Must have exactly one escape block. */
1779 for (back_link
= LOG_LINKS (load_insn
);
1780 back_link
; back_link
= XEXP (back_link
, 1))
1782 rtx insn1
= XEXP (back_link
, 0);
1784 if (GET_MODE (back_link
) == VOIDmode
)
1786 /* Found a DEF-USE dependence (insn1, load_insn). */
1789 for (fore_link
= INSN_DEPEND (insn1
);
1790 fore_link
; fore_link
= XEXP (fore_link
, 1))
1792 rtx insn2
= XEXP (fore_link
, 0);
1793 if (GET_MODE (fore_link
) == VOIDmode
)
1795 /* Found a DEF-USE dependence (insn1, insn2). */
1796 if (haifa_classify_insn (insn2
) != PFREE_CANDIDATE
)
1797 /* insn2 not guaranteed to be a 1 base reg load. */
1800 if (INSN_BB (insn2
) == bb_trg
)
1801 /* insn2 is the similar load, in the target block. */
1804 if (*(candp
->split_bbs
.first_member
) == BLOCK_NUM (insn2
))
1805 /* insn2 is a similar load, in a split-block. */
1812 /* Couldn't find a similar load. */
1816 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
1817 as found by analyzing insn's expression. */
1820 may_trap_exp (x
, is_store
)
1828 code
= GET_CODE (x
);
1838 /* The insn uses memory: a volatile load. */
1839 if (MEM_VOLATILE_P (x
))
1841 /* An exception-free load. */
1842 if (!may_trap_p (x
))
1844 /* A load with 1 base register, to be further checked. */
1845 if (CONST_BASED_ADDRESS_P (XEXP (x
, 0)))
1846 return PFREE_CANDIDATE
;
1847 /* No info on the load, to be further checked. */
1848 return PRISKY_CANDIDATE
;
1853 int i
, insn_class
= TRAP_FREE
;
1855 /* Neither store nor load, check if it may cause a trap. */
1858 /* Recursive step: walk the insn... */
1859 fmt
= GET_RTX_FORMAT (code
);
1860 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
1864 int tmp_class
= may_trap_exp (XEXP (x
, i
), is_store
);
1865 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
1867 else if (fmt
[i
] == 'E')
1870 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
1872 int tmp_class
= may_trap_exp (XVECEXP (x
, i
, j
), is_store
);
1873 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
1874 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
1878 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
1885 /* Classifies insn for the purpose of verifying that it can be
1886 moved speculatively, by examining it's patterns, returning:
1887 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
1888 TRAP_FREE: non-load insn.
1889 IFREE: load from a globaly safe location.
1890 IRISKY: volatile load.
1891 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
1892 being either PFREE or PRISKY. */
1895 haifa_classify_insn (insn
)
1898 rtx pat
= PATTERN (insn
);
1899 int tmp_class
= TRAP_FREE
;
1900 int insn_class
= TRAP_FREE
;
1903 if (GET_CODE (pat
) == PARALLEL
)
1905 int i
, len
= XVECLEN (pat
, 0);
1907 for (i
= len
- 1; i
>= 0; i
--)
1909 code
= GET_CODE (XVECEXP (pat
, 0, i
));
1913 /* Test if it is a 'store'. */
1914 tmp_class
= may_trap_exp (XEXP (XVECEXP (pat
, 0, i
), 0), 1);
1917 /* Test if it is a store. */
1918 tmp_class
= may_trap_exp (SET_DEST (XVECEXP (pat
, 0, i
)), 1);
1919 if (tmp_class
== TRAP_RISKY
)
1921 /* Test if it is a load. */
1923 = WORST_CLASS (tmp_class
,
1924 may_trap_exp (SET_SRC (XVECEXP (pat
, 0, i
)),
1929 tmp_class
= TRAP_RISKY
;
1934 insn_class
= WORST_CLASS (insn_class
, tmp_class
);
1935 if (insn_class
== TRAP_RISKY
|| insn_class
== IRISKY
)
1941 code
= GET_CODE (pat
);
1945 /* Test if it is a 'store'. */
1946 tmp_class
= may_trap_exp (XEXP (pat
, 0), 1);
1949 /* Test if it is a store. */
1950 tmp_class
= may_trap_exp (SET_DEST (pat
), 1);
1951 if (tmp_class
== TRAP_RISKY
)
1953 /* Test if it is a load. */
1955 WORST_CLASS (tmp_class
,
1956 may_trap_exp (SET_SRC (pat
), 0));
1960 tmp_class
= TRAP_RISKY
;
1964 insn_class
= tmp_class
;
1970 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1971 a load moved speculatively, or if load_insn is protected by
1972 a compare on load_insn's address). */
1975 is_prisky (load_insn
, bb_src
, bb_trg
)
1979 if (FED_BY_SPEC_LOAD (load_insn
))
1982 if (LOG_LINKS (load_insn
) == NULL
)
1983 /* Dependence may 'hide' out of the region. */
1986 if (is_conditionally_protected (load_insn
, bb_src
, bb_trg
))
1992 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1993 Return 1 if insn is exception-free (and the motion is valid)
1997 is_exception_free (insn
, bb_src
, bb_trg
)
2001 int insn_class
= haifa_classify_insn (insn
);
2003 /* Handle non-load insns. */
2014 if (!flag_schedule_speculative_load
)
2016 IS_LOAD_INSN (insn
) = 1;
2023 case PFREE_CANDIDATE
:
2024 if (is_pfree (insn
, bb_src
, bb_trg
))
2026 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2027 case PRISKY_CANDIDATE
:
2028 if (!flag_schedule_speculative_load_dangerous
2029 || is_prisky (insn
, bb_src
, bb_trg
))
2035 return flag_schedule_speculative_load_dangerous
;
2038 /* The number of insns from the current block scheduled so far. */
2039 static int sched_target_n_insns
;
2040 /* The number of insns from the current block to be scheduled in total. */
2041 static int target_n_insns
;
2042 /* The number of insns from the entire region scheduled so far. */
2043 static int sched_n_insns
;
2044 /* Nonzero if the last scheduled insn was a jump. */
2045 static int last_was_jump
;
2047 /* Implementations of the sched_info functions for region scheduling. */
2048 static void init_ready_list
PARAMS ((struct ready_list
*));
2049 static int can_schedule_ready_p
PARAMS ((rtx
));
2050 static int new_ready
PARAMS ((rtx
));
2051 static int schedule_more_p
PARAMS ((void));
2052 static const char *rgn_print_insn
PARAMS ((rtx
, int));
2053 static int rgn_rank
PARAMS ((rtx
, rtx
));
2054 static int contributes_to_priority
PARAMS ((rtx
, rtx
));
2055 static void compute_jump_reg_dependencies
PARAMS ((rtx
, regset
));
2057 /* Return nonzero if there are more insns that should be scheduled. */
2062 return ! last_was_jump
&& sched_target_n_insns
< target_n_insns
;
2065 /* Add all insns that are initially ready to the ready list READY. Called
2066 once before scheduling a set of insns. */
2069 init_ready_list (ready
)
2070 struct ready_list
*ready
;
2072 rtx prev_head
= current_sched_info
->prev_head
;
2073 rtx next_tail
= current_sched_info
->next_tail
;
2078 sched_target_n_insns
= 0;
2082 /* Print debugging information. */
2083 if (sched_verbose
>= 5)
2084 debug_dependencies ();
2086 /* Prepare current target block info. */
2087 if (current_nr_blocks
> 1)
2089 candidate_table
= (candidate
*) xmalloc (current_nr_blocks
2090 * sizeof (candidate
));
2093 /* bblst_table holds split blocks and update blocks for each block after
2094 the current one in the region. split blocks and update blocks are
2095 the TO blocks of region edges, so there can be at most rgn_nr_edges
2097 bblst_size
= (current_nr_blocks
- target_bb
) * rgn_nr_edges
;
2098 bblst_table
= (int *) xmalloc (bblst_size
* sizeof (int));
2100 bitlst_table_last
= 0;
2101 bitlst_table_size
= rgn_nr_edges
;
2102 bitlst_table
= (int *) xmalloc (rgn_nr_edges
* sizeof (int));
2104 compute_trg_info (target_bb
);
2107 /* Initialize ready list with all 'ready' insns in target block.
2108 Count number of insns in the target block being scheduled. */
2109 for (insn
= NEXT_INSN (prev_head
); insn
!= next_tail
; insn
= NEXT_INSN (insn
))
2113 if (! INSN_P (insn
))
2115 next
= NEXT_INSN (insn
);
2117 if (INSN_DEP_COUNT (insn
) == 0
2118 && (SCHED_GROUP_P (next
) == 0 || ! INSN_P (next
)))
2119 ready_add (ready
, insn
);
2120 if (!(SCHED_GROUP_P (insn
)))
2124 /* Add to ready list all 'ready' insns in valid source blocks.
2125 For speculative insns, check-live, exception-free, and
2127 for (bb_src
= target_bb
+ 1; bb_src
< current_nr_blocks
; bb_src
++)
2128 if (IS_VALID (bb_src
))
2134 get_block_head_tail (BB_TO_BLOCK (bb_src
), &head
, &tail
);
2135 src_next_tail
= NEXT_INSN (tail
);
2138 for (insn
= src_head
; insn
!= src_next_tail
; insn
= NEXT_INSN (insn
))
2140 if (! INSN_P (insn
))
2143 if (!CANT_MOVE (insn
)
2144 && (!IS_SPECULATIVE_INSN (insn
)
2145 || (insn_issue_delay (insn
) <= 3
2146 && check_live (insn
, bb_src
)
2147 && is_exception_free (insn
, bb_src
, target_bb
))))
2151 /* Note that we havn't squirrled away the notes for
2152 blocks other than the current. So if this is a
2153 speculative insn, NEXT might otherwise be a note. */
2154 next
= next_nonnote_insn (insn
);
2155 if (INSN_DEP_COUNT (insn
) == 0
2157 || SCHED_GROUP_P (next
) == 0
2158 || ! INSN_P (next
)))
2159 ready_add (ready
, insn
);
2165 /* Called after taking INSN from the ready list. Returns nonzero if this
2166 insn can be scheduled, nonzero if we should silently discard it. */
2169 can_schedule_ready_p (insn
)
2172 if (GET_CODE (insn
) == JUMP_INSN
)
2175 /* An interblock motion? */
2176 if (INSN_BB (insn
) != target_bb
)
2181 if (IS_SPECULATIVE_INSN (insn
))
2183 if (!check_live (insn
, INSN_BB (insn
)))
2185 update_live (insn
, INSN_BB (insn
));
2187 /* For speculative load, mark insns fed by it. */
2188 if (IS_LOAD_INSN (insn
) || FED_BY_SPEC_LOAD (insn
))
2189 set_spec_fed (insn
);
2195 /* Find the beginning of the scheduling group. */
2196 /* ??? Ought to update basic block here, but later bits of
2197 schedule_block assumes the original insn block is
2201 while (SCHED_GROUP_P (temp
))
2202 temp
= PREV_INSN (temp
);
2204 /* Update source block boundaries. */
2205 b1
= BLOCK_FOR_INSN (temp
);
2206 if (temp
== b1
->head
&& insn
== b1
->end
)
2208 /* We moved all the insns in the basic block.
2209 Emit a note after the last insn and update the
2210 begin/end boundaries to point to the note. */
2211 rtx note
= emit_note_after (NOTE_INSN_DELETED
, insn
);
2215 else if (insn
== b1
->end
)
2217 /* We took insns from the end of the basic block,
2218 so update the end of block boundary so that it
2219 points to the first insn we did not move. */
2220 b1
->end
= PREV_INSN (temp
);
2222 else if (temp
== b1
->head
)
2224 /* We took insns from the start of the basic block,
2225 so update the start of block boundary so that
2226 it points to the first insn we did not move. */
2227 b1
->head
= NEXT_INSN (insn
);
2232 /* In block motion. */
2233 sched_target_n_insns
++;
2240 /* Called after INSN has all its dependencies resolved. Return nonzero
2241 if it should be moved to the ready list or the queue, or zero if we
2242 should silently discard it. */
2247 /* For speculative insns, before inserting to ready/queue,
2248 check live, exception-free, and issue-delay. */
2249 if (INSN_BB (next
) != target_bb
2250 && (!IS_VALID (INSN_BB (next
))
2252 || (IS_SPECULATIVE_INSN (next
)
2253 && (insn_issue_delay (next
) > 3
2254 || !check_live (next
, INSN_BB (next
))
2255 || !is_exception_free (next
, INSN_BB (next
), target_bb
)))))
2260 /* Return a string that contains the insn uid and optionally anything else
2261 necessary to identify this insn in an output. It's valid to use a
2262 static buffer for this. The ALIGNED parameter should cause the string
2263 to be formatted so that multiple output lines will line up nicely. */
2266 rgn_print_insn (insn
, aligned
)
2270 static char tmp
[80];
2273 sprintf (tmp
, "b%3d: i%4d", INSN_BB (insn
), INSN_UID (insn
));
2276 if (current_nr_blocks
> 1 && INSN_BB (insn
) != target_bb
)
2277 sprintf (tmp
, "%d/b%d", INSN_UID (insn
), INSN_BB (insn
));
2279 sprintf (tmp
, "%d", INSN_UID (insn
));
2284 /* Compare priority of two insns. Return a positive number if the second
2285 insn is to be preferred for scheduling, and a negative one if the first
2286 is to be preferred. Zero if they are equally good. */
2289 rgn_rank (insn1
, insn2
)
2292 /* Some comparison make sense in interblock scheduling only. */
2293 if (INSN_BB (insn1
) != INSN_BB (insn2
))
2295 int spec_val
, prob_val
;
2297 /* Prefer an inblock motion on an interblock motion. */
2298 if ((INSN_BB (insn2
) == target_bb
) && (INSN_BB (insn1
) != target_bb
))
2300 if ((INSN_BB (insn1
) == target_bb
) && (INSN_BB (insn2
) != target_bb
))
2303 /* Prefer a useful motion on a speculative one. */
2304 spec_val
= IS_SPECULATIVE_INSN (insn1
) - IS_SPECULATIVE_INSN (insn2
);
2308 /* Prefer a more probable (speculative) insn. */
2309 prob_val
= INSN_PROBABILITY (insn2
) - INSN_PROBABILITY (insn1
);
2316 /* NEXT is an instruction that depends on INSN (a backward dependence);
2317 return nonzero if we should include this dependence in priority
2321 contributes_to_priority (next
, insn
)
2324 return BLOCK_NUM (next
) == BLOCK_NUM (insn
);
2327 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
2328 to be set by this jump in SET. */
2331 compute_jump_reg_dependencies (insn
, set
)
2332 rtx insn ATTRIBUTE_UNUSED
;
2333 regset set ATTRIBUTE_UNUSED
;
2335 /* Nothing to do here, since we postprocess jumps in
2336 add_branch_dependences. */
2339 /* Used in schedule_insns to initialize current_sched_info for scheduling
2340 regions (or single basic blocks). */
2342 static struct sched_info region_sched_info
=
2345 can_schedule_ready_p
,
2350 contributes_to_priority
,
2351 compute_jump_reg_dependencies
,
2358 /* Add dependences so that branches are scheduled to run last in their
2362 add_branch_dependences (head
, tail
)
2367 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
2368 to remain in order at the end of the block by adding dependencies and
2369 giving the last a high priority. There may be notes present, and
2370 prev_head may also be a note.
2372 Branches must obviously remain at the end. Calls should remain at the
2373 end since moving them results in worse register allocation. Uses remain
2374 at the end to ensure proper register allocation. cc0 setters remaim
2375 at the end because they can't be moved away from their cc0 user. */
2378 while (GET_CODE (insn
) == CALL_INSN
2379 || GET_CODE (insn
) == JUMP_INSN
2380 || (GET_CODE (insn
) == INSN
2381 && (GET_CODE (PATTERN (insn
)) == USE
2382 || GET_CODE (PATTERN (insn
)) == CLOBBER
2384 || sets_cc0_p (PATTERN (insn
))
2387 || GET_CODE (insn
) == NOTE
)
2389 if (GET_CODE (insn
) != NOTE
)
2392 && !find_insn_list (insn
, LOG_LINKS (last
)))
2394 add_dependence (last
, insn
, REG_DEP_ANTI
);
2395 INSN_REF_COUNT (insn
)++;
2398 CANT_MOVE (insn
) = 1;
2401 /* Skip over insns that are part of a group.
2402 Make each insn explicitly depend on the previous insn.
2403 This ensures that only the group header will ever enter
2404 the ready queue (and, when scheduled, will automatically
2405 schedule the SCHED_GROUP_P block). */
2406 while (SCHED_GROUP_P (insn
))
2408 rtx temp
= prev_nonnote_insn (insn
);
2409 add_dependence (insn
, temp
, REG_DEP_ANTI
);
2414 /* Don't overrun the bounds of the basic block. */
2418 insn
= PREV_INSN (insn
);
2421 /* Make sure these insns are scheduled last in their block. */
2424 while (insn
!= head
)
2426 insn
= prev_nonnote_insn (insn
);
2428 if (INSN_REF_COUNT (insn
) != 0)
2431 add_dependence (last
, insn
, REG_DEP_ANTI
);
2432 INSN_REF_COUNT (insn
) = 1;
2434 /* Skip over insns that are part of a group. */
2435 while (SCHED_GROUP_P (insn
))
2436 insn
= prev_nonnote_insn (insn
);
2440 /* Data structures for the computation of data dependences in a regions. We
2441 keep one `deps' structure for every basic block. Before analyzing the
2442 data dependences for a bb, its variables are initialized as a function of
2443 the variables of its predecessors. When the analysis for a bb completes,
2444 we save the contents to the corresponding bb_deps[bb] variable. */
2446 static struct deps
*bb_deps
;
2448 /* After computing the dependencies for block BB, propagate the dependencies
2449 found in TMP_DEPS to the successors of the block. */
2451 propagate_deps (bb
, tmp_deps
)
2453 struct deps
*tmp_deps
;
2455 int b
= BB_TO_BLOCK (bb
);
2458 rtx link_insn
, link_mem
;
2461 /* These lists should point to the right place, for correct
2463 bb_deps
[bb
].pending_read_insns
= tmp_deps
->pending_read_insns
;
2464 bb_deps
[bb
].pending_read_mems
= tmp_deps
->pending_read_mems
;
2465 bb_deps
[bb
].pending_write_insns
= tmp_deps
->pending_write_insns
;
2466 bb_deps
[bb
].pending_write_mems
= tmp_deps
->pending_write_mems
;
2468 /* bb's structures are inherited by its successors. */
2469 first_edge
= e
= OUT_EDGES (b
);
2476 int b_succ
= TO_BLOCK (e
);
2477 int bb_succ
= BLOCK_TO_BB (b_succ
);
2478 struct deps
*succ_deps
= bb_deps
+ bb_succ
;
2480 /* Only bbs "below" bb, in the same region, are interesting. */
2481 if (CONTAINING_RGN (b
) != CONTAINING_RGN (b_succ
)
2488 /* The reg_last lists are inherited by bb_succ. */
2489 EXECUTE_IF_SET_IN_REG_SET (&tmp_deps
->reg_last_in_use
, 0, reg
,
2491 struct deps_reg
*tmp_deps_reg
= &tmp_deps
->reg_last
[reg
];
2492 struct deps_reg
*succ_deps_reg
= &succ_deps
->reg_last
[reg
];
2494 for (u
= tmp_deps_reg
->uses
; u
; u
= XEXP (u
, 1))
2495 if (! find_insn_list (XEXP (u
, 0), succ_deps_reg
->uses
))
2497 = alloc_INSN_LIST (XEXP (u
, 0), succ_deps_reg
->uses
);
2499 for (u
= tmp_deps_reg
->sets
; u
; u
= XEXP (u
, 1))
2500 if (! find_insn_list (XEXP (u
, 0), succ_deps_reg
->sets
))
2502 = alloc_INSN_LIST (XEXP (u
, 0), succ_deps_reg
->sets
);
2504 for (u
= tmp_deps_reg
->clobbers
; u
; u
= XEXP (u
, 1))
2505 if (! find_insn_list (XEXP (u
, 0), succ_deps_reg
->clobbers
))
2506 succ_deps_reg
->clobbers
2507 = alloc_INSN_LIST (XEXP (u
, 0), succ_deps_reg
->clobbers
);
2509 IOR_REG_SET (&succ_deps
->reg_last_in_use
, &tmp_deps
->reg_last_in_use
);
2511 /* Mem read/write lists are inherited by bb_succ. */
2512 link_insn
= tmp_deps
->pending_read_insns
;
2513 link_mem
= tmp_deps
->pending_read_mems
;
2516 if (!(find_insn_mem_list (XEXP (link_insn
, 0),
2518 succ_deps
->pending_read_insns
,
2519 succ_deps
->pending_read_mems
)))
2520 add_insn_mem_dependence (succ_deps
, &succ_deps
->pending_read_insns
,
2521 &succ_deps
->pending_read_mems
,
2522 XEXP (link_insn
, 0), XEXP (link_mem
, 0));
2523 link_insn
= XEXP (link_insn
, 1);
2524 link_mem
= XEXP (link_mem
, 1);
2527 link_insn
= tmp_deps
->pending_write_insns
;
2528 link_mem
= tmp_deps
->pending_write_mems
;
2531 if (!(find_insn_mem_list (XEXP (link_insn
, 0),
2533 succ_deps
->pending_write_insns
,
2534 succ_deps
->pending_write_mems
)))
2535 add_insn_mem_dependence (succ_deps
,
2536 &succ_deps
->pending_write_insns
,
2537 &succ_deps
->pending_write_mems
,
2538 XEXP (link_insn
, 0), XEXP (link_mem
, 0));
2540 link_insn
= XEXP (link_insn
, 1);
2541 link_mem
= XEXP (link_mem
, 1);
2544 /* last_function_call is inherited by bb_succ. */
2545 for (u
= tmp_deps
->last_function_call
; u
; u
= XEXP (u
, 1))
2546 if (! find_insn_list (XEXP (u
, 0), succ_deps
->last_function_call
))
2547 succ_deps
->last_function_call
2548 = alloc_INSN_LIST (XEXP (u
, 0), succ_deps
->last_function_call
);
2550 /* last_pending_memory_flush is inherited by bb_succ. */
2551 for (u
= tmp_deps
->last_pending_memory_flush
; u
; u
= XEXP (u
, 1))
2552 if (! find_insn_list (XEXP (u
, 0),
2553 succ_deps
->last_pending_memory_flush
))
2554 succ_deps
->last_pending_memory_flush
2555 = alloc_INSN_LIST (XEXP (u
, 0),
2556 succ_deps
->last_pending_memory_flush
);
2558 /* sched_before_next_call is inherited by bb_succ. */
2559 x
= LOG_LINKS (tmp_deps
->sched_before_next_call
);
2560 for (; x
; x
= XEXP (x
, 1))
2561 add_dependence (succ_deps
->sched_before_next_call
,
2562 XEXP (x
, 0), REG_DEP_ANTI
);
2566 while (e
!= first_edge
);
2569 /* Compute backward dependences inside bb. In a multiple blocks region:
2570 (1) a bb is analyzed after its predecessors, and (2) the lists in
2571 effect at the end of bb (after analyzing for bb) are inherited by
2574 Specifically for reg-reg data dependences, the block insns are
2575 scanned by sched_analyze () top-to-bottom. Two lists are
2576 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2577 and reg_last[].uses for register USEs.
2579 When analysis is completed for bb, we update for its successors:
2580 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2581 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2583 The mechanism for computing mem-mem data dependence is very
2584 similar, and the result is interblock dependences in the region. */
2587 compute_block_backward_dependences (bb
)
2591 struct deps tmp_deps
;
2593 tmp_deps
= bb_deps
[bb
];
2595 /* Do the analysis for this block. */
2596 get_block_head_tail (BB_TO_BLOCK (bb
), &head
, &tail
);
2597 sched_analyze (&tmp_deps
, head
, tail
);
2598 add_branch_dependences (head
, tail
);
2600 if (current_nr_blocks
> 1)
2601 propagate_deps (bb
, &tmp_deps
);
2603 /* Free up the INSN_LISTs. */
2604 free_deps (&tmp_deps
);
2607 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2608 them to the unused_*_list variables, so that they can be reused. */
2611 free_pending_lists ()
2615 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2617 free_INSN_LIST_list (&bb_deps
[bb
].pending_read_insns
);
2618 free_INSN_LIST_list (&bb_deps
[bb
].pending_write_insns
);
2619 free_EXPR_LIST_list (&bb_deps
[bb
].pending_read_mems
);
2620 free_EXPR_LIST_list (&bb_deps
[bb
].pending_write_mems
);
2624 /* Print dependences for debugging, callable from debugger. */
2627 debug_dependencies ()
2631 fprintf (sched_dump
, ";; --------------- forward dependences: ------------ \n");
2632 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2640 get_block_head_tail (BB_TO_BLOCK (bb
), &head
, &tail
);
2641 next_tail
= NEXT_INSN (tail
);
2642 fprintf (sched_dump
, "\n;; --- Region Dependences --- b %d bb %d \n",
2643 BB_TO_BLOCK (bb
), bb
);
2645 fprintf (sched_dump
, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2646 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2647 fprintf (sched_dump
, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2648 "----", "----", "--", "---", "----", "----", "--------", "-----");
2649 for (insn
= head
; insn
!= next_tail
; insn
= NEXT_INSN (insn
))
2654 if (! INSN_P (insn
))
2657 fprintf (sched_dump
, ";; %6d ", INSN_UID (insn
));
2658 if (GET_CODE (insn
) == NOTE
)
2660 n
= NOTE_LINE_NUMBER (insn
);
2662 fprintf (sched_dump
, "%s\n", GET_NOTE_INSN_NAME (n
));
2664 fprintf (sched_dump
, "line %d, file %s\n", n
,
2665 NOTE_SOURCE_FILE (insn
));
2668 fprintf (sched_dump
, " {%s}\n", GET_RTX_NAME (GET_CODE (insn
)));
2672 unit
= insn_unit (insn
);
2674 || function_units
[unit
].blockage_range_function
== 0) ? 0 :
2675 function_units
[unit
].blockage_range_function (insn
);
2676 fprintf (sched_dump
,
2677 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2678 (SCHED_GROUP_P (insn
) ? "+" : " "),
2682 INSN_DEP_COUNT (insn
),
2683 INSN_PRIORITY (insn
),
2684 insn_cost (insn
, 0, 0),
2685 (int) MIN_BLOCKAGE_COST (range
),
2686 (int) MAX_BLOCKAGE_COST (range
));
2687 insn_print_units (insn
);
2688 fprintf (sched_dump
, "\t: ");
2689 for (link
= INSN_DEPEND (insn
); link
; link
= XEXP (link
, 1))
2690 fprintf (sched_dump
, "%d ", INSN_UID (XEXP (link
, 0)));
2691 fprintf (sched_dump
, "\n");
2695 fprintf (sched_dump
, "\n");
2698 /* Schedule a region. A region is either an inner loop, a loop-free
2699 subroutine, or a single basic block. Each bb in the region is
2700 scheduled after its flow predecessors. */
2703 schedule_region (rgn
)
2707 int rgn_n_insns
= 0;
2708 int sched_rgn_n_insns
= 0;
2710 /* Set variables for the current region. */
2711 current_nr_blocks
= RGN_NR_BLOCKS (rgn
);
2712 current_blocks
= RGN_BLOCKS (rgn
);
2714 init_deps_global ();
2716 /* Initializations for region data dependence analyisis. */
2717 bb_deps
= (struct deps
*) xmalloc (sizeof (struct deps
) * current_nr_blocks
);
2718 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2719 init_deps (bb_deps
+ bb
);
2721 /* Compute LOG_LINKS. */
2722 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2723 compute_block_backward_dependences (bb
);
2725 /* Compute INSN_DEPEND. */
2726 for (bb
= current_nr_blocks
- 1; bb
>= 0; bb
--)
2729 get_block_head_tail (BB_TO_BLOCK (bb
), &head
, &tail
);
2731 compute_forward_dependences (head
, tail
);
2734 /* Set priorities. */
2735 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2738 get_block_head_tail (BB_TO_BLOCK (bb
), &head
, &tail
);
2740 rgn_n_insns
+= set_priorities (head
, tail
);
2743 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2744 if (current_nr_blocks
> 1)
2748 prob
= (float *) xmalloc ((current_nr_blocks
) * sizeof (float));
2750 bbset_size
= current_nr_blocks
/ HOST_BITS_PER_WIDE_INT
+ 1;
2751 dom
= (bbset
*) xmalloc (current_nr_blocks
* sizeof (bbset
));
2752 for (i
= 0; i
< current_nr_blocks
; i
++)
2753 dom
[i
] = (bbset
) xcalloc (bbset_size
, sizeof (HOST_WIDE_INT
));
2757 edge_to_bit
= (int *) xmalloc (nr_edges
* sizeof (int));
2758 for (i
= 1; i
< nr_edges
; i
++)
2759 if (CONTAINING_RGN (FROM_BLOCK (i
)) == rgn
)
2760 EDGE_TO_BIT (i
) = rgn_nr_edges
++;
2761 rgn_edges
= (int *) xmalloc (rgn_nr_edges
* sizeof (int));
2764 for (i
= 1; i
< nr_edges
; i
++)
2765 if (CONTAINING_RGN (FROM_BLOCK (i
)) == (rgn
))
2766 rgn_edges
[rgn_nr_edges
++] = i
;
2769 edgeset_size
= rgn_nr_edges
/ HOST_BITS_PER_WIDE_INT
+ 1;
2770 edgeset_bitsize
= rgn_nr_edges
;
2771 pot_split
= (edgeset
*) xmalloc (current_nr_blocks
* sizeof (edgeset
));
2773 = (edgeset
*) xmalloc (current_nr_blocks
* sizeof (edgeset
));
2774 for (i
= 0; i
< current_nr_blocks
; i
++)
2777 (edgeset
) xcalloc (edgeset_size
, sizeof (HOST_WIDE_INT
));
2779 (edgeset
) xcalloc (edgeset_size
, sizeof (HOST_WIDE_INT
));
2782 /* Compute probabilities, dominators, split_edges. */
2783 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2784 compute_dom_prob_ps (bb
);
2787 /* Now we can schedule all blocks. */
2788 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2791 int b
= BB_TO_BLOCK (bb
);
2793 get_block_head_tail (b
, &head
, &tail
);
2795 if (no_real_insns_p (head
, tail
))
2798 current_sched_info
->prev_head
= PREV_INSN (head
);
2799 current_sched_info
->next_tail
= NEXT_INSN (tail
);
2801 if (write_symbols
!= NO_DEBUG
)
2803 save_line_notes (b
, head
, tail
);
2804 rm_line_notes (head
, tail
);
2807 /* rm_other_notes only removes notes which are _inside_ the
2808 block---that is, it won't remove notes before the first real insn
2809 or after the last real insn of the block. So if the first insn
2810 has a REG_SAVE_NOTE which would otherwise be emitted before the
2811 insn, it is redundant with the note before the start of the
2812 block, and so we have to take it out.
2814 FIXME: Probably the same thing should be done with REG_SAVE_NOTEs
2815 referencing NOTE_INSN_SETJMP at the end of the block. */
2820 for (note
= REG_NOTES (head
); note
; note
= XEXP (note
, 1))
2821 if (REG_NOTE_KIND (note
) == REG_SAVE_NOTE
)
2823 if (INTVAL (XEXP (note
, 0)) != NOTE_INSN_SETJMP
)
2825 remove_note (head
, note
);
2826 note
= XEXP (note
, 1);
2827 remove_note (head
, note
);
2830 note
= XEXP (note
, 1);
2834 /* Remove remaining note insns from the block, save them in
2835 note_list. These notes are restored at the end of
2836 schedule_block (). */
2837 rm_other_notes (head
, tail
);
2841 current_sched_info
->queue_must_finish_empty
2842 = current_nr_blocks
> 1 && !flag_schedule_interblock
;
2844 schedule_block (b
, rgn_n_insns
);
2845 sched_rgn_n_insns
+= sched_n_insns
;
2847 /* Update target block boundaries. */
2848 if (head
== BLOCK_HEAD (b
))
2849 BLOCK_HEAD (b
) = current_sched_info
->head
;
2850 if (tail
== BLOCK_END (b
))
2851 BLOCK_END (b
) = current_sched_info
->tail
;
2854 if (current_nr_blocks
> 1)
2856 free (candidate_table
);
2858 free (bitlst_table
);
2862 /* Sanity check: verify that all region insns were scheduled. */
2863 if (sched_rgn_n_insns
!= rgn_n_insns
)
2866 /* Restore line notes. */
2867 if (write_symbols
!= NO_DEBUG
)
2869 for (bb
= 0; bb
< current_nr_blocks
; bb
++)
2872 get_block_head_tail (BB_TO_BLOCK (bb
), &head
, &tail
);
2873 restore_line_notes (head
, tail
);
2877 /* Done with this region. */
2878 free_pending_lists ();
2880 finish_deps_global ();
2884 if (current_nr_blocks
> 1)
2889 for (i
= 0; i
< current_nr_blocks
; ++i
)
2892 free (pot_split
[i
]);
2893 free (ancestor_edges
[i
]);
2899 free (ancestor_edges
);
2903 /* Indexed by region, holds the number of death notes found in that region.
2904 Used for consistency checks. */
2905 static int *deaths_in_region
;
2907 /* Initialize data structures for region scheduling. */
2916 rgn_table
= (region
*) xmalloc ((n_basic_blocks
) * sizeof (region
));
2917 rgn_bb_table
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
2918 block_to_bb
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
2919 containing_rgn
= (int *) xmalloc ((n_basic_blocks
) * sizeof (int));
2921 blocks
= sbitmap_alloc (n_basic_blocks
);
2923 /* Compute regions for scheduling. */
2924 if (reload_completed
2925 || n_basic_blocks
== 1
2926 || !flag_schedule_interblock
)
2928 find_single_block_region ();
2932 /* Verify that a 'good' control flow graph can be built. */
2933 if (is_cfg_nonregular ())
2935 find_single_block_region ();
2940 struct edge_list
*edge_list
;
2942 dom
= sbitmap_vector_alloc (n_basic_blocks
, n_basic_blocks
);
2944 /* The scheduler runs after flow; therefore, we can't blindly call
2945 back into find_basic_blocks since doing so could invalidate the
2946 info in global_live_at_start.
2948 Consider a block consisting entirely of dead stores; after life
2949 analysis it would be a block of NOTE_INSN_DELETED notes. If
2950 we call find_basic_blocks again, then the block would be removed
2951 entirely and invalidate our the register live information.
2953 We could (should?) recompute register live information. Doing
2954 so may even be beneficial. */
2955 edge_list
= create_edge_list ();
2957 /* Compute the dominators and post dominators. */
2958 calculate_dominance_info (NULL
, dom
, CDI_DOMINATORS
);
2960 /* build_control_flow will return nonzero if it detects unreachable
2961 blocks or any other irregularity with the cfg which prevents
2962 cross block scheduling. */
2963 if (build_control_flow (edge_list
) != 0)
2964 find_single_block_region ();
2966 find_rgns (edge_list
, dom
);
2968 if (sched_verbose
>= 3)
2971 /* We are done with flow's edge list. */
2972 free_edge_list (edge_list
);
2974 /* For now. This will move as more and more of haifa is converted
2975 to using the cfg code in flow.c. */
2980 deaths_in_region
= (int *) xmalloc (sizeof (int) * nr_regions
);
2982 /* Remove all death notes from the subroutine. */
2983 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
2987 sbitmap_zero (blocks
);
2988 for (b
= RGN_NR_BLOCKS (rgn
) - 1; b
>= 0; --b
)
2989 SET_BIT (blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
) + b
]);
2991 deaths_in_region
[rgn
] = count_or_remove_death_notes (blocks
, 1);
2994 sbitmap_free (blocks
);
2997 /* The one entry point in this file. DUMP_FILE is the dump file for
3001 schedule_insns (dump_file
)
3004 sbitmap large_region_blocks
, blocks
;
3006 int any_large_regions
;
3008 /* Taking care of this degenerate case makes the rest of
3009 this code simpler. */
3010 if (n_basic_blocks
== 0)
3016 sched_init (dump_file
);
3020 current_sched_info
= ®ion_sched_info
;
3022 /* Schedule every region in the subroutine. */
3023 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
3024 schedule_region (rgn
);
3026 /* Update life analysis for the subroutine. Do single block regions
3027 first so that we can verify that live_at_start didn't change. Then
3028 do all other blocks. */
3029 /* ??? There is an outside possibility that update_life_info, or more
3030 to the point propagate_block, could get called with non-zero flags
3031 more than once for one basic block. This would be kinda bad if it
3032 were to happen, since REG_INFO would be accumulated twice for the
3033 block, and we'd have twice the REG_DEAD notes.
3035 I'm fairly certain that this _shouldn't_ happen, since I don't think
3036 that live_at_start should change at region heads. Not sure what the
3037 best way to test for this kind of thing... */
3039 allocate_reg_life_data ();
3040 compute_bb_for_insn (get_max_uid ());
3042 any_large_regions
= 0;
3043 large_region_blocks
= sbitmap_alloc (n_basic_blocks
);
3044 sbitmap_ones (large_region_blocks
);
3046 blocks
= sbitmap_alloc (n_basic_blocks
);
3048 for (rgn
= 0; rgn
< nr_regions
; rgn
++)
3049 if (RGN_NR_BLOCKS (rgn
) > 1)
3050 any_large_regions
= 1;
3053 sbitmap_zero (blocks
);
3054 SET_BIT (blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
)]);
3055 RESET_BIT (large_region_blocks
, rgn_bb_table
[RGN_BLOCKS (rgn
)]);
3057 /* Don't update reg info after reload, since that affects
3058 regs_ever_live, which should not change after reload. */
3059 update_life_info (blocks
, UPDATE_LIFE_LOCAL
,
3060 (reload_completed
? PROP_DEATH_NOTES
3061 : PROP_DEATH_NOTES
| PROP_REG_INFO
));
3063 #ifndef HAVE_conditional_execution
3064 /* ??? REG_DEAD notes only exist for unconditional deaths. We need
3065 a count of the conditional plus unconditional deaths for this to
3067 /* In the single block case, the count of registers that died should
3068 not have changed during the schedule. */
3069 if (count_or_remove_death_notes (blocks
, 0) != deaths_in_region
[rgn
])
3074 if (any_large_regions
)
3076 update_life_info (large_region_blocks
, UPDATE_LIFE_GLOBAL
,
3077 PROP_DEATH_NOTES
| PROP_REG_INFO
);
3080 /* Reposition the prologue and epilogue notes in case we moved the
3081 prologue/epilogue insns. */
3082 if (reload_completed
)
3083 reposition_prologue_and_epilogue_notes (get_insns ());
3085 /* Delete redundant line notes. */
3086 if (write_symbols
!= NO_DEBUG
)
3087 rm_redundant_line_notes ();
3091 if (reload_completed
== 0 && flag_schedule_interblock
)
3093 fprintf (sched_dump
,
3094 "\n;; Procedure interblock/speculative motions == %d/%d \n",
3102 fprintf (sched_dump
, "\n\n");
3107 free (rgn_bb_table
);
3109 free (containing_rgn
);
3130 sbitmap_free (blocks
);
3131 sbitmap_free (large_region_blocks
);
3133 free (deaths_in_region
);