* basic-block.h (ei_safe_edge): New function.
[official-gcc.git] / gcc / sched-rgn.c
blobb5d9fc8c2894ed41bc552bfe5c447ab0384d8953
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "basic-block.h"
57 #include "regs.h"
58 #include "function.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "recog.h"
65 #include "cfglayout.h"
66 #include "params.h"
67 #include "sched-int.h"
68 #include "target.h"
70 /* Define when we want to do count REG_DEAD notes before and after scheduling
71 for sanity checking. We can't do that when conditional execution is used,
72 as REG_DEAD exist only for unconditional deaths. */
74 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
75 #define CHECK_DEAD_NOTES 1
76 #else
77 #define CHECK_DEAD_NOTES 0
78 #endif
81 #ifdef INSN_SCHEDULING
82 /* Some accessor macros for h_i_d members only used within this file. */
83 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
84 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
85 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
87 /* nr_inter/spec counts interblock/speculative motion for the function. */
88 static int nr_inter, nr_spec;
90 /* Control flow graph edges are kept in circular lists. */
91 typedef struct
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
98 haifa_edge;
99 static haifa_edge *edge_table;
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
106 /* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108 static int nr_edges;
110 /* Circular list of incoming/outgoing edges of a block. */
111 static int *in_edges;
112 static int *out_edges;
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
117 static int is_cfg_nonregular (void);
118 static int build_control_flow (struct edge_list *);
119 static void new_edge (int, int);
120 static bool sched_is_disabled_for_current_region_p (void);
122 /* A region is the main entity for interblock scheduling: insns
123 are allowed to move between blocks in the same region, along
124 control flow graph edges, in the 'up' direction. */
125 typedef struct
127 int rgn_nr_blocks; /* Number of blocks in region. */
128 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
130 region;
132 /* Number of regions in the procedure. */
133 static int nr_regions;
135 /* Table of region descriptions. */
136 static region *rgn_table;
138 /* Array of lists of regions' blocks. */
139 static int *rgn_bb_table;
141 /* Topological order of blocks in the region (if b2 is reachable from
142 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
143 always referred to by either block or b, while its topological
144 order name (in the region) is referred to by bb. */
145 static int *block_to_bb;
147 /* The number of the region containing a block. */
148 static int *containing_rgn;
150 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
151 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
152 #define BLOCK_TO_BB(block) (block_to_bb[block])
153 #define CONTAINING_RGN(block) (containing_rgn[block])
155 void debug_regions (void);
156 static void find_single_block_region (void);
157 static void find_rgns (struct edge_list *);
158 static bool too_large (int, int *, int *);
160 extern void debug_live (int, int);
162 /* Blocks of the current region being scheduled. */
163 static int current_nr_blocks;
164 static int current_blocks;
166 /* The mapping from bb to block. */
167 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
169 typedef struct
171 int *first_member; /* Pointer to the list start in bitlst_table. */
172 int nr_members; /* The number of members of the bit list. */
174 bitlst;
176 static int bitlst_table_last;
177 static int *bitlst_table;
179 static void extract_bitlst (sbitmap, bitlst *);
181 /* Target info declarations.
183 The block currently being scheduled is referred to as the "target" block,
184 while other blocks in the region from which insns can be moved to the
185 target are called "source" blocks. The candidate structure holds info
186 about such sources: are they valid? Speculative? Etc. */
187 typedef bitlst bblst;
188 typedef struct
190 char is_valid;
191 char is_speculative;
192 int src_prob;
193 bblst split_bbs;
194 bblst update_bbs;
196 candidate;
198 static candidate *candidate_table;
200 /* A speculative motion requires checking live information on the path
201 from 'source' to 'target'. The split blocks are those to be checked.
202 After a speculative motion, live information should be modified in
203 the 'update' blocks.
205 Lists of split and update blocks for each candidate of the current
206 target are in array bblst_table. */
207 static int *bblst_table, bblst_size, bblst_last;
209 #define IS_VALID(src) ( candidate_table[src].is_valid )
210 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211 #define SRC_PROB(src) ( candidate_table[src].src_prob )
213 /* The bb being currently scheduled. */
214 static int target_bb;
216 /* List of edges. */
217 typedef bitlst edgelst;
219 /* Target info functions. */
220 static void split_edges (int, int, edgelst *);
221 static void compute_trg_info (int);
222 void debug_candidate (int);
223 void debug_candidates (int);
225 /* Dominators array: dom[i] contains the sbitmap of dominators of
226 bb i in the region. */
227 static sbitmap *dom;
229 /* bb 0 is the only region entry. */
230 #define IS_RGN_ENTRY(bb) (!bb)
232 /* Is bb_src dominated by bb_trg. */
233 #define IS_DOMINATED(bb_src, bb_trg) \
234 ( TEST_BIT (dom[bb_src], bb_trg) )
236 /* Probability: Prob[i] is a float in [0, 1] which is the probability
237 of bb i relative to the region entry. */
238 static float *prob;
240 /* The probability of bb_src, relative to bb_trg. Note, that while the
241 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 in [0, 100]. */
243 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 prob[bb_trg])))
246 /* Bit-set of edges, where bit i stands for edge i. */
247 typedef sbitmap edgeset;
249 /* Number of edges in the region. */
250 static int rgn_nr_edges;
252 /* Array of size rgn_nr_edges. */
253 static int *rgn_edges;
256 /* Mapping from each edge in the graph to its number in the rgn. */
257 static int *edge_to_bit;
258 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
260 /* The split edges of a source bb is different for each target
261 bb. In order to compute this efficiently, the 'potential-split edges'
262 are computed for each bb prior to scheduling a region. This is actually
263 the split edges of each bb relative to the region entry.
265 pot_split[bb] is the set of potential split edges of bb. */
266 static edgeset *pot_split;
268 /* For every bb, a set of its ancestor edges. */
269 static edgeset *ancestor_edges;
271 static void compute_dom_prob_ps (int);
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
277 /* Parameters affecting the decision of rank_for_schedule().
278 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
279 #define MIN_PROBABILITY 40
281 /* Speculative scheduling functions. */
282 static int check_live_1 (int, rtx);
283 static void update_live_1 (int, rtx);
284 static int check_live (rtx, int);
285 static void update_live (rtx, int);
286 static void set_spec_fed (rtx);
287 static int is_pfree (rtx, int, int);
288 static int find_conditional_protection (rtx, int);
289 static int is_conditionally_protected (rtx, int, int);
290 static int is_prisky (rtx, int, int);
291 static int is_exception_free (rtx, int, int);
293 static bool sets_likely_spilled (rtx);
294 static void sets_likely_spilled_1 (rtx, rtx, void *);
295 static void add_branch_dependences (rtx, rtx);
296 static void compute_block_backward_dependences (int);
297 void debug_dependencies (void);
299 static void init_regions (void);
300 static void schedule_region (int);
301 static rtx concat_INSN_LIST (rtx, rtx);
302 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
303 static void propagate_deps (int, struct deps *);
304 static void free_pending_lists (void);
306 /* Functions for construction of the control flow graph. */
308 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
310 We decide not to build the control flow graph if there is possibly more
311 than one entry to the function, if computed branches exist, of if we
312 have nonlocal gotos. */
314 static int
315 is_cfg_nonregular (void)
317 basic_block b;
318 rtx insn;
319 RTX_CODE code;
321 /* If we have a label that could be the target of a nonlocal goto, then
322 the cfg is not well structured. */
323 if (nonlocal_goto_handler_labels)
324 return 1;
326 /* If we have any forced labels, then the cfg is not well structured. */
327 if (forced_labels)
328 return 1;
330 /* If this function has a computed jump, then we consider the cfg
331 not well structured. */
332 if (current_function_has_computed_jump)
333 return 1;
335 /* If we have exception handlers, then we consider the cfg not well
336 structured. ?!? We should be able to handle this now that flow.c
337 computes an accurate cfg for EH. */
338 if (current_function_has_exception_handlers ())
339 return 1;
341 /* If we have non-jumping insns which refer to labels, then we consider
342 the cfg not well structured. */
343 /* Check for labels referred to other thn by jumps. */
344 FOR_EACH_BB (b)
345 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
347 code = GET_CODE (insn);
348 if (INSN_P (insn) && code != JUMP_INSN)
350 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
352 if (note
353 && ! (JUMP_P (NEXT_INSN (insn))
354 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
355 XEXP (note, 0))))
356 return 1;
359 if (insn == BB_END (b))
360 break;
363 /* All the tests passed. Consider the cfg well structured. */
364 return 0;
367 /* Build the control flow graph and set nr_edges.
369 Instead of trying to build a cfg ourselves, we rely on flow to
370 do it for us. Stamp out useless code (and bug) duplication.
372 Return nonzero if an irregularity in the cfg is found which would
373 prevent cross block scheduling. */
375 static int
376 build_control_flow (struct edge_list *edge_list)
378 int i, unreachable, num_edges;
379 basic_block b;
381 /* This already accounts for entry/exit edges. */
382 num_edges = NUM_EDGES (edge_list);
384 /* Unreachable loops with more than one basic block are detected
385 during the DFS traversal in find_rgns.
387 Unreachable loops with a single block are detected here. This
388 test is redundant with the one in find_rgns, but it's much
389 cheaper to go ahead and catch the trivial case here. */
390 unreachable = 0;
391 FOR_EACH_BB (b)
393 if (EDGE_COUNT (b->preds) == 0
394 || (EDGE_PRED (b, 0)->src == b
395 && EDGE_COUNT (b->preds) == 1))
396 unreachable = 1;
399 /* ??? We can kill these soon. */
400 in_edges = xcalloc (last_basic_block, sizeof (int));
401 out_edges = xcalloc (last_basic_block, sizeof (int));
402 edge_table = xcalloc (num_edges, sizeof (haifa_edge));
404 nr_edges = 0;
405 for (i = 0; i < num_edges; i++)
407 edge e = INDEX_EDGE (edge_list, i);
409 if (e->dest != EXIT_BLOCK_PTR
410 && e->src != ENTRY_BLOCK_PTR)
411 new_edge (e->src->index, e->dest->index);
414 /* Increment by 1, since edge 0 is unused. */
415 nr_edges++;
417 return unreachable;
420 /* Record an edge in the control flow graph from SOURCE to TARGET.
422 In theory, this is redundant with the s_succs computed above, but
423 we have not converted all of haifa to use information from the
424 integer lists. */
426 static void
427 new_edge (int source, int target)
429 int e, next_edge;
430 int curr_edge, fst_edge;
432 /* Check for duplicates. */
433 fst_edge = curr_edge = OUT_EDGES (source);
434 while (curr_edge)
436 if (FROM_BLOCK (curr_edge) == source
437 && TO_BLOCK (curr_edge) == target)
439 return;
442 curr_edge = NEXT_OUT (curr_edge);
444 if (fst_edge == curr_edge)
445 break;
448 e = ++nr_edges;
450 FROM_BLOCK (e) = source;
451 TO_BLOCK (e) = target;
453 if (OUT_EDGES (source))
455 next_edge = NEXT_OUT (OUT_EDGES (source));
456 NEXT_OUT (OUT_EDGES (source)) = e;
457 NEXT_OUT (e) = next_edge;
459 else
461 OUT_EDGES (source) = e;
462 NEXT_OUT (e) = e;
465 if (IN_EDGES (target))
467 next_edge = NEXT_IN (IN_EDGES (target));
468 NEXT_IN (IN_EDGES (target)) = e;
469 NEXT_IN (e) = next_edge;
471 else
473 IN_EDGES (target) = e;
474 NEXT_IN (e) = e;
478 /* Translate a bit-set SET to a list BL of the bit-set members. */
480 static void
481 extract_bitlst (sbitmap set, bitlst *bl)
483 int i;
485 /* bblst table space is reused in each call to extract_bitlst. */
486 bitlst_table_last = 0;
488 bl->first_member = &bitlst_table[bitlst_table_last];
489 bl->nr_members = 0;
491 /* Iterate over each word in the bitset. */
492 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
494 bitlst_table[bitlst_table_last++] = i;
495 (bl->nr_members)++;
500 /* Functions for the construction of regions. */
502 /* Print the regions, for debugging purposes. Callable from debugger. */
504 void
505 debug_regions (void)
507 int rgn, bb;
509 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
510 for (rgn = 0; rgn < nr_regions; rgn++)
512 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
513 rgn_table[rgn].rgn_nr_blocks);
514 fprintf (sched_dump, ";;\tbb/block: ");
516 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
518 current_blocks = RGN_BLOCKS (rgn);
520 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
521 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
524 fprintf (sched_dump, "\n\n");
528 /* Build a single block region for each basic block in the function.
529 This allows for using the same code for interblock and basic block
530 scheduling. */
532 static void
533 find_single_block_region (void)
535 basic_block bb;
537 nr_regions = 0;
539 FOR_EACH_BB (bb)
541 rgn_bb_table[nr_regions] = bb->index;
542 RGN_NR_BLOCKS (nr_regions) = 1;
543 RGN_BLOCKS (nr_regions) = nr_regions;
544 CONTAINING_RGN (bb->index) = nr_regions;
545 BLOCK_TO_BB (bb->index) = 0;
546 nr_regions++;
550 /* Update number of blocks and the estimate for number of insns
551 in the region. Return true if the region is "too large" for interblock
552 scheduling (compile time considerations). */
554 static bool
555 too_large (int block, int *num_bbs, int *num_insns)
557 (*num_bbs)++;
558 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
559 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
561 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
562 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
565 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
566 is still an inner loop. Put in max_hdr[blk] the header of the most inner
567 loop containing blk. */
568 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
570 if (max_hdr[blk] == -1) \
571 max_hdr[blk] = hdr; \
572 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
573 RESET_BIT (inner, hdr); \
574 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
576 RESET_BIT (inner,max_hdr[blk]); \
577 max_hdr[blk] = hdr; \
581 /* Find regions for interblock scheduling.
583 A region for scheduling can be:
585 * A loop-free procedure, or
587 * A reducible inner loop, or
589 * A basic block not contained in any other region.
591 ?!? In theory we could build other regions based on extended basic
592 blocks or reverse extended basic blocks. Is it worth the trouble?
594 Loop blocks that form a region are put into the region's block list
595 in topological order.
597 This procedure stores its results into the following global (ick) variables
599 * rgn_nr
600 * rgn_table
601 * rgn_bb_table
602 * block_to_bb
603 * containing region
605 We use dominator relationships to avoid making regions out of non-reducible
606 loops.
608 This procedure needs to be converted to work on pred/succ lists instead
609 of edge tables. That would simplify it somewhat. */
611 static void
612 find_rgns (struct edge_list *edge_list)
614 int *max_hdr, *dfs_nr, *stack, *degree;
615 char no_loops = 1;
616 int node, child, loop_head, i, head, tail;
617 int count = 0, sp, idx = 0;
618 int current_edge = out_edges[EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest->index];
619 int num_bbs, num_insns, unreachable;
620 int too_large_failure;
621 basic_block bb;
623 /* Note if an edge has been passed. */
624 sbitmap passed;
626 /* Note if a block is a natural loop header. */
627 sbitmap header;
629 /* Note if a block is a natural inner loop header. */
630 sbitmap inner;
632 /* Note if a block is in the block queue. */
633 sbitmap in_queue;
635 /* Note if a block is in the block queue. */
636 sbitmap in_stack;
638 int num_edges = NUM_EDGES (edge_list);
640 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
641 and a mapping from block to its loop header (if the block is contained
642 in a loop, else -1).
644 Store results in HEADER, INNER, and MAX_HDR respectively, these will
645 be used as inputs to the second traversal.
647 STACK, SP and DFS_NR are only used during the first traversal. */
649 /* Allocate and initialize variables for the first traversal. */
650 max_hdr = xmalloc (last_basic_block * sizeof (int));
651 dfs_nr = xcalloc (last_basic_block, sizeof (int));
652 stack = xmalloc (nr_edges * sizeof (int));
654 inner = sbitmap_alloc (last_basic_block);
655 sbitmap_ones (inner);
657 header = sbitmap_alloc (last_basic_block);
658 sbitmap_zero (header);
660 passed = sbitmap_alloc (nr_edges);
661 sbitmap_zero (passed);
663 in_queue = sbitmap_alloc (last_basic_block);
664 sbitmap_zero (in_queue);
666 in_stack = sbitmap_alloc (last_basic_block);
667 sbitmap_zero (in_stack);
669 for (i = 0; i < last_basic_block; i++)
670 max_hdr[i] = -1;
672 /* DFS traversal to find inner loops in the cfg. */
674 sp = -1;
675 while (1)
677 if (current_edge == 0 || TEST_BIT (passed, current_edge))
679 /* We have reached a leaf node or a node that was already
680 processed. Pop edges off the stack until we find
681 an edge that has not yet been processed. */
682 while (sp >= 0
683 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
685 /* Pop entry off the stack. */
686 current_edge = stack[sp--];
687 node = FROM_BLOCK (current_edge);
688 child = TO_BLOCK (current_edge);
689 RESET_BIT (in_stack, child);
690 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
691 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
692 current_edge = NEXT_OUT (current_edge);
695 /* See if have finished the DFS tree traversal. */
696 if (sp < 0 && TEST_BIT (passed, current_edge))
697 break;
699 /* Nope, continue the traversal with the popped node. */
700 continue;
703 /* Process a node. */
704 node = FROM_BLOCK (current_edge);
705 child = TO_BLOCK (current_edge);
706 SET_BIT (in_stack, node);
707 dfs_nr[node] = ++count;
709 /* If the successor is in the stack, then we've found a loop.
710 Mark the loop, if it is not a natural loop, then it will
711 be rejected during the second traversal. */
712 if (TEST_BIT (in_stack, child))
714 no_loops = 0;
715 SET_BIT (header, child);
716 UPDATE_LOOP_RELATIONS (node, child);
717 SET_BIT (passed, current_edge);
718 current_edge = NEXT_OUT (current_edge);
719 continue;
722 /* If the child was already visited, then there is no need to visit
723 it again. Just update the loop relationships and restart
724 with a new edge. */
725 if (dfs_nr[child])
727 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
728 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
729 SET_BIT (passed, current_edge);
730 current_edge = NEXT_OUT (current_edge);
731 continue;
734 /* Push an entry on the stack and continue DFS traversal. */
735 stack[++sp] = current_edge;
736 SET_BIT (passed, current_edge);
737 current_edge = OUT_EDGES (child);
739 /* This is temporary until haifa is converted to use rth's new
740 cfg routines which have true entry/exit blocks and the
741 appropriate edges from/to those blocks.
743 Generally we update dfs_nr for a node when we process its
744 out edge. However, if the node has no out edge then we will
745 not set dfs_nr for that node. This can confuse the scheduler
746 into thinking that we have unreachable blocks, which in turn
747 disables cross block scheduling.
749 So, if we have a node with no out edges, go ahead and mark it
750 as reachable now. */
751 if (current_edge == 0)
752 dfs_nr[child] = ++count;
755 /* Another check for unreachable blocks. The earlier test in
756 is_cfg_nonregular only finds unreachable blocks that do not
757 form a loop.
759 The DFS traversal will mark every block that is reachable from
760 the entry node by placing a nonzero value in dfs_nr. Thus if
761 dfs_nr is zero for any block, then it must be unreachable. */
762 unreachable = 0;
763 FOR_EACH_BB (bb)
764 if (dfs_nr[bb->index] == 0)
766 unreachable = 1;
767 break;
770 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
771 to hold degree counts. */
772 degree = dfs_nr;
774 FOR_EACH_BB (bb)
775 degree[bb->index] = 0;
776 for (i = 0; i < num_edges; i++)
778 edge e = INDEX_EDGE (edge_list, i);
780 if (e->dest != EXIT_BLOCK_PTR)
781 degree[e->dest->index]++;
784 /* Do not perform region scheduling if there are any unreachable
785 blocks. */
786 if (!unreachable)
788 int *queue;
790 if (no_loops)
791 SET_BIT (header, 0);
793 /* Second traversal:find reducible inner loops and topologically sort
794 block of each region. */
796 queue = xmalloc (n_basic_blocks * sizeof (int));
798 /* Find blocks which are inner loop headers. We still have non-reducible
799 loops to consider at this point. */
800 FOR_EACH_BB (bb)
802 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
804 edge e;
805 edge_iterator ei;
806 basic_block jbb;
808 /* Now check that the loop is reducible. We do this separate
809 from finding inner loops so that we do not find a reducible
810 loop which contains an inner non-reducible loop.
812 A simple way to find reducible/natural loops is to verify
813 that each block in the loop is dominated by the loop
814 header.
816 If there exists a block that is not dominated by the loop
817 header, then the block is reachable from outside the loop
818 and thus the loop is not a natural loop. */
819 FOR_EACH_BB (jbb)
821 /* First identify blocks in the loop, except for the loop
822 entry block. */
823 if (bb->index == max_hdr[jbb->index] && bb != jbb)
825 /* Now verify that the block is dominated by the loop
826 header. */
827 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
828 break;
832 /* If we exited the loop early, then I is the header of
833 a non-reducible loop and we should quit processing it
834 now. */
835 if (jbb != EXIT_BLOCK_PTR)
836 continue;
838 /* I is a header of an inner loop, or block 0 in a subroutine
839 with no loops at all. */
840 head = tail = -1;
841 too_large_failure = 0;
842 loop_head = max_hdr[bb->index];
844 /* Decrease degree of all I's successors for topological
845 ordering. */
847 FOR_EACH_EDGE (e, ei, bb->succs)
849 if (e->dest != EXIT_BLOCK_PTR)
850 --degree[e->dest->index];
853 /* Estimate # insns, and count # blocks in the region. */
854 num_bbs = 1;
855 num_insns = (INSN_LUID (BB_END (bb))
856 - INSN_LUID (BB_HEAD (bb)));
858 /* Find all loop latches (blocks with back edges to the loop
859 header) or all the leaf blocks in the cfg has no loops.
861 Place those blocks into the queue. */
862 if (no_loops)
864 FOR_EACH_BB (jbb)
865 /* Leaf nodes have only a single successor which must
866 be EXIT_BLOCK. */
867 if (EDGE_COUNT (jbb->succs) == 1
868 && EDGE_SUCC (jbb, 0)->dest == EXIT_BLOCK_PTR)
870 queue[++tail] = jbb->index;
871 SET_BIT (in_queue, jbb->index);
873 if (too_large (jbb->index, &num_bbs, &num_insns))
875 too_large_failure = 1;
876 break;
880 else
882 edge e;
884 FOR_EACH_EDGE (e, ei, bb->preds)
886 if (e->src == ENTRY_BLOCK_PTR)
887 continue;
889 node = e->src->index;
891 if (max_hdr[node] == loop_head && node != bb->index)
893 /* This is a loop latch. */
894 queue[++tail] = node;
895 SET_BIT (in_queue, node);
897 if (too_large (node, &num_bbs, &num_insns))
899 too_large_failure = 1;
900 break;
906 /* Now add all the blocks in the loop to the queue.
908 We know the loop is a natural loop; however the algorithm
909 above will not always mark certain blocks as being in the
910 loop. Consider:
911 node children
912 a b,c
914 c a,d
917 The algorithm in the DFS traversal may not mark B & D as part
918 of the loop (ie they will not have max_hdr set to A).
920 We know they can not be loop latches (else they would have
921 had max_hdr set since they'd have a backedge to a dominator
922 block). So we don't need them on the initial queue.
924 We know they are part of the loop because they are dominated
925 by the loop header and can be reached by a backwards walk of
926 the edges starting with nodes on the initial queue.
928 It is safe and desirable to include those nodes in the
929 loop/scheduling region. To do so we would need to decrease
930 the degree of a node if it is the target of a backedge
931 within the loop itself as the node is placed in the queue.
933 We do not do this because I'm not sure that the actual
934 scheduling code will properly handle this case. ?!? */
936 while (head < tail && !too_large_failure)
938 edge e;
939 child = queue[++head];
941 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
943 node = e->src->index;
945 /* See discussion above about nodes not marked as in
946 this loop during the initial DFS traversal. */
947 if (e->src == ENTRY_BLOCK_PTR
948 || max_hdr[node] != loop_head)
950 tail = -1;
951 break;
953 else if (!TEST_BIT (in_queue, node) && node != bb->index)
955 queue[++tail] = node;
956 SET_BIT (in_queue, node);
958 if (too_large (node, &num_bbs, &num_insns))
960 too_large_failure = 1;
961 break;
967 if (tail >= 0 && !too_large_failure)
969 /* Place the loop header into list of region blocks. */
970 degree[bb->index] = -1;
971 rgn_bb_table[idx] = bb->index;
972 RGN_NR_BLOCKS (nr_regions) = num_bbs;
973 RGN_BLOCKS (nr_regions) = idx++;
974 CONTAINING_RGN (bb->index) = nr_regions;
975 BLOCK_TO_BB (bb->index) = count = 0;
977 /* Remove blocks from queue[] when their in degree
978 becomes zero. Repeat until no blocks are left on the
979 list. This produces a topological list of blocks in
980 the region. */
981 while (tail >= 0)
983 if (head < 0)
984 head = tail;
985 child = queue[head];
986 if (degree[child] == 0)
988 edge e;
990 degree[child] = -1;
991 rgn_bb_table[idx++] = child;
992 BLOCK_TO_BB (child) = ++count;
993 CONTAINING_RGN (child) = nr_regions;
994 queue[head] = queue[tail--];
996 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
998 if (e->dest != EXIT_BLOCK_PTR)
999 --degree[e->dest->index];
1002 else
1003 --head;
1005 ++nr_regions;
1009 free (queue);
1012 /* Any block that did not end up in a region is placed into a region
1013 by itself. */
1014 FOR_EACH_BB (bb)
1015 if (degree[bb->index] >= 0)
1017 rgn_bb_table[idx] = bb->index;
1018 RGN_NR_BLOCKS (nr_regions) = 1;
1019 RGN_BLOCKS (nr_regions) = idx++;
1020 CONTAINING_RGN (bb->index) = nr_regions++;
1021 BLOCK_TO_BB (bb->index) = 0;
1024 free (max_hdr);
1025 free (dfs_nr);
1026 free (stack);
1027 sbitmap_free (passed);
1028 sbitmap_free (header);
1029 sbitmap_free (inner);
1030 sbitmap_free (in_queue);
1031 sbitmap_free (in_stack);
1034 /* Functions for regions scheduling information. */
1036 /* Compute dominators, probability, and potential-split-edges of bb.
1037 Assume that these values were already computed for bb's predecessors. */
1039 static void
1040 compute_dom_prob_ps (int bb)
1042 int nxt_in_edge, fst_in_edge, pred;
1043 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1045 prob[bb] = 0.0;
1046 if (IS_RGN_ENTRY (bb))
1048 SET_BIT (dom[bb], 0);
1049 prob[bb] = 1.0;
1050 return;
1053 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1055 /* Initialize dom[bb] to '111..1'. */
1056 sbitmap_ones (dom[bb]);
1060 pred = FROM_BLOCK (nxt_in_edge);
1061 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1062 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1064 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1066 nr_out_edges = 1;
1067 nr_rgn_out_edges = 0;
1068 fst_out_edge = OUT_EDGES (pred);
1069 nxt_out_edge = NEXT_OUT (fst_out_edge);
1071 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1073 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1075 /* The successor doesn't belong in the region? */
1076 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1077 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1078 ++nr_rgn_out_edges;
1080 while (fst_out_edge != nxt_out_edge)
1082 ++nr_out_edges;
1083 /* The successor doesn't belong in the region? */
1084 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1085 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1086 ++nr_rgn_out_edges;
1087 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1088 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1092 /* Now nr_rgn_out_edges is the number of region-exit edges from
1093 pred, and nr_out_edges will be the number of pred out edges
1094 not leaving the region. */
1095 nr_out_edges -= nr_rgn_out_edges;
1096 if (nr_rgn_out_edges > 0)
1097 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1098 else
1099 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1100 nxt_in_edge = NEXT_IN (nxt_in_edge);
1102 while (fst_in_edge != nxt_in_edge);
1104 SET_BIT (dom[bb], bb);
1105 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1107 if (sched_verbose >= 2)
1108 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1109 (int) (100.0 * prob[bb]));
1112 /* Functions for target info. */
1114 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1115 Note that bb_trg dominates bb_src. */
1117 static void
1118 split_edges (int bb_src, int bb_trg, edgelst *bl)
1120 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1121 sbitmap_copy (src, pot_split[bb_src]);
1123 sbitmap_difference (src, src, pot_split[bb_trg]);
1124 extract_bitlst (src, bl);
1125 sbitmap_free (src);
1128 /* Find the valid candidate-source-blocks for the target block TRG, compute
1129 their probability, and check if they are speculative or not.
1130 For speculative sources, compute their update-blocks and split-blocks. */
1132 static void
1133 compute_trg_info (int trg)
1135 candidate *sp;
1136 edgelst el;
1137 int check_block, update_idx;
1138 int i, j, k, fst_edge, nxt_edge;
1140 /* Define some of the fields for the target bb as well. */
1141 sp = candidate_table + trg;
1142 sp->is_valid = 1;
1143 sp->is_speculative = 0;
1144 sp->src_prob = 100;
1146 for (i = trg + 1; i < current_nr_blocks; i++)
1148 sp = candidate_table + i;
1150 sp->is_valid = IS_DOMINATED (i, trg);
1151 if (sp->is_valid)
1153 sp->src_prob = GET_SRC_PROB (i, trg);
1154 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1157 if (sp->is_valid)
1159 split_edges (i, trg, &el);
1160 sp->is_speculative = (el.nr_members) ? 1 : 0;
1161 if (sp->is_speculative && !flag_schedule_speculative)
1162 sp->is_valid = 0;
1165 if (sp->is_valid)
1167 char *update_blocks;
1169 /* Compute split blocks and store them in bblst_table.
1170 The TO block of every split edge is a split block. */
1171 sp->split_bbs.first_member = &bblst_table[bblst_last];
1172 sp->split_bbs.nr_members = el.nr_members;
1173 for (j = 0; j < el.nr_members; bblst_last++, j++)
1174 bblst_table[bblst_last] =
1175 TO_BLOCK (rgn_edges[el.first_member[j]]);
1176 sp->update_bbs.first_member = &bblst_table[bblst_last];
1178 /* Compute update blocks and store them in bblst_table.
1179 For every split edge, look at the FROM block, and check
1180 all out edges. For each out edge that is not a split edge,
1181 add the TO block to the update block list. This list can end
1182 up with a lot of duplicates. We need to weed them out to avoid
1183 overrunning the end of the bblst_table. */
1184 update_blocks = alloca (last_basic_block);
1185 memset (update_blocks, 0, last_basic_block);
1187 update_idx = 0;
1188 for (j = 0; j < el.nr_members; j++)
1190 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1191 fst_edge = nxt_edge = OUT_EDGES (check_block);
1194 if (! update_blocks[TO_BLOCK (nxt_edge)])
1196 for (k = 0; k < el.nr_members; k++)
1197 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1198 break;
1200 if (k >= el.nr_members)
1202 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1203 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1204 update_idx++;
1208 nxt_edge = NEXT_OUT (nxt_edge);
1210 while (fst_edge != nxt_edge);
1212 sp->update_bbs.nr_members = update_idx;
1214 /* Make sure we didn't overrun the end of bblst_table. */
1215 gcc_assert (bblst_last <= bblst_size);
1217 else
1219 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1221 sp->is_speculative = 0;
1222 sp->src_prob = 0;
1227 /* Print candidates info, for debugging purposes. Callable from debugger. */
1229 void
1230 debug_candidate (int i)
1232 if (!candidate_table[i].is_valid)
1233 return;
1235 if (candidate_table[i].is_speculative)
1237 int j;
1238 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1240 fprintf (sched_dump, "split path: ");
1241 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1243 int b = candidate_table[i].split_bbs.first_member[j];
1245 fprintf (sched_dump, " %d ", b);
1247 fprintf (sched_dump, "\n");
1249 fprintf (sched_dump, "update path: ");
1250 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1252 int b = candidate_table[i].update_bbs.first_member[j];
1254 fprintf (sched_dump, " %d ", b);
1256 fprintf (sched_dump, "\n");
1258 else
1260 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1264 /* Print candidates info, for debugging purposes. Callable from debugger. */
1266 void
1267 debug_candidates (int trg)
1269 int i;
1271 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1272 BB_TO_BLOCK (trg), trg);
1273 for (i = trg + 1; i < current_nr_blocks; i++)
1274 debug_candidate (i);
1277 /* Functions for speculative scheduling. */
1279 /* Return 0 if x is a set of a register alive in the beginning of one
1280 of the split-blocks of src, otherwise return 1. */
1282 static int
1283 check_live_1 (int src, rtx x)
1285 int i;
1286 int regno;
1287 rtx reg = SET_DEST (x);
1289 if (reg == 0)
1290 return 1;
1292 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1293 || GET_CODE (reg) == SIGN_EXTRACT
1294 || GET_CODE (reg) == STRICT_LOW_PART)
1295 reg = XEXP (reg, 0);
1297 if (GET_CODE (reg) == PARALLEL)
1299 int i;
1301 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1302 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1303 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1304 return 1;
1306 return 0;
1309 if (!REG_P (reg))
1310 return 1;
1312 regno = REGNO (reg);
1314 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1316 /* Global registers are assumed live. */
1317 return 0;
1319 else
1321 if (regno < FIRST_PSEUDO_REGISTER)
1323 /* Check for hard registers. */
1324 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1325 while (--j >= 0)
1327 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1329 int b = candidate_table[src].split_bbs.first_member[i];
1331 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1332 regno + j))
1334 return 0;
1339 else
1341 /* Check for pseudo registers. */
1342 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1344 int b = candidate_table[src].split_bbs.first_member[i];
1346 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1348 return 0;
1354 return 1;
1357 /* If x is a set of a register R, mark that R is alive in the beginning
1358 of every update-block of src. */
1360 static void
1361 update_live_1 (int src, rtx x)
1363 int i;
1364 int regno;
1365 rtx reg = SET_DEST (x);
1367 if (reg == 0)
1368 return;
1370 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1371 || GET_CODE (reg) == SIGN_EXTRACT
1372 || GET_CODE (reg) == STRICT_LOW_PART)
1373 reg = XEXP (reg, 0);
1375 if (GET_CODE (reg) == PARALLEL)
1377 int i;
1379 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1380 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1381 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1383 return;
1386 if (!REG_P (reg))
1387 return;
1389 /* Global registers are always live, so the code below does not apply
1390 to them. */
1392 regno = REGNO (reg);
1394 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1396 if (regno < FIRST_PSEUDO_REGISTER)
1398 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1399 while (--j >= 0)
1401 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1403 int b = candidate_table[src].update_bbs.first_member[i];
1405 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1406 regno + j);
1410 else
1412 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1414 int b = candidate_table[src].update_bbs.first_member[i];
1416 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1422 /* Return 1 if insn can be speculatively moved from block src to trg,
1423 otherwise return 0. Called before first insertion of insn to
1424 ready-list or before the scheduling. */
1426 static int
1427 check_live (rtx insn, int src)
1429 /* Find the registers set by instruction. */
1430 if (GET_CODE (PATTERN (insn)) == SET
1431 || GET_CODE (PATTERN (insn)) == CLOBBER)
1432 return check_live_1 (src, PATTERN (insn));
1433 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1435 int j;
1436 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1437 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1438 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1439 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1440 return 0;
1442 return 1;
1445 return 1;
1448 /* Update the live registers info after insn was moved speculatively from
1449 block src to trg. */
1451 static void
1452 update_live (rtx insn, int src)
1454 /* Find the registers set by instruction. */
1455 if (GET_CODE (PATTERN (insn)) == SET
1456 || GET_CODE (PATTERN (insn)) == CLOBBER)
1457 update_live_1 (src, PATTERN (insn));
1458 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1460 int j;
1461 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1462 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1463 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1464 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1468 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1469 #define IS_REACHABLE(bb_from, bb_to) \
1470 (bb_from == bb_to \
1471 || IS_RGN_ENTRY (bb_from) \
1472 || (TEST_BIT (ancestor_edges[bb_to], \
1473 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1475 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1477 static void
1478 set_spec_fed (rtx load_insn)
1480 rtx link;
1482 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1483 if (GET_MODE (link) == VOIDmode)
1484 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1485 } /* set_spec_fed */
1487 /* On the path from the insn to load_insn_bb, find a conditional
1488 branch depending on insn, that guards the speculative load. */
1490 static int
1491 find_conditional_protection (rtx insn, int load_insn_bb)
1493 rtx link;
1495 /* Iterate through DEF-USE forward dependences. */
1496 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1498 rtx next = XEXP (link, 0);
1499 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1500 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1501 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1502 && load_insn_bb != INSN_BB (next)
1503 && GET_MODE (link) == VOIDmode
1504 && (JUMP_P (next)
1505 || find_conditional_protection (next, load_insn_bb)))
1506 return 1;
1508 return 0;
1509 } /* find_conditional_protection */
1511 /* Returns 1 if the same insn1 that participates in the computation
1512 of load_insn's address is feeding a conditional branch that is
1513 guarding on load_insn. This is true if we find a the two DEF-USE
1514 chains:
1515 insn1 -> ... -> conditional-branch
1516 insn1 -> ... -> load_insn,
1517 and if a flow path exist:
1518 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1519 and if insn1 is on the path
1520 region-entry -> ... -> bb_trg -> ... load_insn.
1522 Locate insn1 by climbing on LOG_LINKS from load_insn.
1523 Locate the branch by following INSN_DEPEND from insn1. */
1525 static int
1526 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1528 rtx link;
1530 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1532 rtx insn1 = XEXP (link, 0);
1534 /* Must be a DEF-USE dependence upon non-branch. */
1535 if (GET_MODE (link) != VOIDmode
1536 || JUMP_P (insn1))
1537 continue;
1539 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1540 if (INSN_BB (insn1) == bb_src
1541 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1542 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1543 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1544 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1545 continue;
1547 /* Now search for the conditional-branch. */
1548 if (find_conditional_protection (insn1, bb_src))
1549 return 1;
1551 /* Recursive step: search another insn1, "above" current insn1. */
1552 return is_conditionally_protected (insn1, bb_src, bb_trg);
1555 /* The chain does not exist. */
1556 return 0;
1557 } /* is_conditionally_protected */
1559 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1560 load_insn can move speculatively from bb_src to bb_trg. All the
1561 following must hold:
1563 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1564 (2) load_insn and load1 have a def-use dependence upon
1565 the same insn 'insn1'.
1566 (3) either load2 is in bb_trg, or:
1567 - there's only one split-block, and
1568 - load1 is on the escape path, and
1570 From all these we can conclude that the two loads access memory
1571 addresses that differ at most by a constant, and hence if moving
1572 load_insn would cause an exception, it would have been caused by
1573 load2 anyhow. */
1575 static int
1576 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1578 rtx back_link;
1579 candidate *candp = candidate_table + bb_src;
1581 if (candp->split_bbs.nr_members != 1)
1582 /* Must have exactly one escape block. */
1583 return 0;
1585 for (back_link = LOG_LINKS (load_insn);
1586 back_link; back_link = XEXP (back_link, 1))
1588 rtx insn1 = XEXP (back_link, 0);
1590 if (GET_MODE (back_link) == VOIDmode)
1592 /* Found a DEF-USE dependence (insn1, load_insn). */
1593 rtx fore_link;
1595 for (fore_link = INSN_DEPEND (insn1);
1596 fore_link; fore_link = XEXP (fore_link, 1))
1598 rtx insn2 = XEXP (fore_link, 0);
1599 if (GET_MODE (fore_link) == VOIDmode)
1601 /* Found a DEF-USE dependence (insn1, insn2). */
1602 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1603 /* insn2 not guaranteed to be a 1 base reg load. */
1604 continue;
1606 if (INSN_BB (insn2) == bb_trg)
1607 /* insn2 is the similar load, in the target block. */
1608 return 1;
1610 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1611 /* insn2 is a similar load, in a split-block. */
1612 return 1;
1618 /* Couldn't find a similar load. */
1619 return 0;
1620 } /* is_pfree */
1622 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1623 a load moved speculatively, or if load_insn is protected by
1624 a compare on load_insn's address). */
1626 static int
1627 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1629 if (FED_BY_SPEC_LOAD (load_insn))
1630 return 1;
1632 if (LOG_LINKS (load_insn) == NULL)
1633 /* Dependence may 'hide' out of the region. */
1634 return 1;
1636 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1637 return 1;
1639 return 0;
1642 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1643 Return 1 if insn is exception-free (and the motion is valid)
1644 and 0 otherwise. */
1646 static int
1647 is_exception_free (rtx insn, int bb_src, int bb_trg)
1649 int insn_class = haifa_classify_insn (insn);
1651 /* Handle non-load insns. */
1652 switch (insn_class)
1654 case TRAP_FREE:
1655 return 1;
1656 case TRAP_RISKY:
1657 return 0;
1658 default:;
1661 /* Handle loads. */
1662 if (!flag_schedule_speculative_load)
1663 return 0;
1664 IS_LOAD_INSN (insn) = 1;
1665 switch (insn_class)
1667 case IFREE:
1668 return (1);
1669 case IRISKY:
1670 return 0;
1671 case PFREE_CANDIDATE:
1672 if (is_pfree (insn, bb_src, bb_trg))
1673 return 1;
1674 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1675 case PRISKY_CANDIDATE:
1676 if (!flag_schedule_speculative_load_dangerous
1677 || is_prisky (insn, bb_src, bb_trg))
1678 return 0;
1679 break;
1680 default:;
1683 return flag_schedule_speculative_load_dangerous;
1686 /* The number of insns from the current block scheduled so far. */
1687 static int sched_target_n_insns;
1688 /* The number of insns from the current block to be scheduled in total. */
1689 static int target_n_insns;
1690 /* The number of insns from the entire region scheduled so far. */
1691 static int sched_n_insns;
1692 /* Nonzero if the last scheduled insn was a jump. */
1693 static int last_was_jump;
1695 /* Implementations of the sched_info functions for region scheduling. */
1696 static void init_ready_list (struct ready_list *);
1697 static int can_schedule_ready_p (rtx);
1698 static int new_ready (rtx);
1699 static int schedule_more_p (void);
1700 static const char *rgn_print_insn (rtx, int);
1701 static int rgn_rank (rtx, rtx);
1702 static int contributes_to_priority (rtx, rtx);
1703 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1705 /* Return nonzero if there are more insns that should be scheduled. */
1707 static int
1708 schedule_more_p (void)
1710 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1713 /* Add all insns that are initially ready to the ready list READY. Called
1714 once before scheduling a set of insns. */
1716 static void
1717 init_ready_list (struct ready_list *ready)
1719 rtx prev_head = current_sched_info->prev_head;
1720 rtx next_tail = current_sched_info->next_tail;
1721 int bb_src;
1722 rtx insn;
1724 target_n_insns = 0;
1725 sched_target_n_insns = 0;
1726 sched_n_insns = 0;
1727 last_was_jump = 0;
1729 /* Print debugging information. */
1730 if (sched_verbose >= 5)
1731 debug_dependencies ();
1733 /* Prepare current target block info. */
1734 if (current_nr_blocks > 1)
1736 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1738 bblst_last = 0;
1739 /* bblst_table holds split blocks and update blocks for each block after
1740 the current one in the region. split blocks and update blocks are
1741 the TO blocks of region edges, so there can be at most rgn_nr_edges
1742 of them. */
1743 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1744 bblst_table = xmalloc (bblst_size * sizeof (int));
1746 bitlst_table_last = 0;
1747 bitlst_table = xmalloc (rgn_nr_edges * sizeof (int));
1749 compute_trg_info (target_bb);
1752 /* Initialize ready list with all 'ready' insns in target block.
1753 Count number of insns in the target block being scheduled. */
1754 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1756 if (INSN_DEP_COUNT (insn) == 0)
1758 ready_add (ready, insn);
1760 if (targetm.sched.adjust_priority)
1761 INSN_PRIORITY (insn) =
1762 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1764 target_n_insns++;
1767 /* Add to ready list all 'ready' insns in valid source blocks.
1768 For speculative insns, check-live, exception-free, and
1769 issue-delay. */
1770 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1771 if (IS_VALID (bb_src))
1773 rtx src_head;
1774 rtx src_next_tail;
1775 rtx tail, head;
1777 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1778 src_next_tail = NEXT_INSN (tail);
1779 src_head = head;
1781 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1783 if (! INSN_P (insn))
1784 continue;
1786 if (!CANT_MOVE (insn)
1787 && (!IS_SPECULATIVE_INSN (insn)
1788 || ((recog_memoized (insn) < 0
1789 || min_insn_conflict_delay (curr_state,
1790 insn, insn) <= 3)
1791 && check_live (insn, bb_src)
1792 && is_exception_free (insn, bb_src, target_bb))))
1793 if (INSN_DEP_COUNT (insn) == 0)
1795 ready_add (ready, insn);
1797 if (targetm.sched.adjust_priority)
1798 INSN_PRIORITY (insn) =
1799 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1805 /* Called after taking INSN from the ready list. Returns nonzero if this
1806 insn can be scheduled, nonzero if we should silently discard it. */
1808 static int
1809 can_schedule_ready_p (rtx insn)
1811 if (JUMP_P (insn))
1812 last_was_jump = 1;
1814 /* An interblock motion? */
1815 if (INSN_BB (insn) != target_bb)
1817 basic_block b1;
1819 if (IS_SPECULATIVE_INSN (insn))
1821 if (!check_live (insn, INSN_BB (insn)))
1822 return 0;
1823 update_live (insn, INSN_BB (insn));
1825 /* For speculative load, mark insns fed by it. */
1826 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1827 set_spec_fed (insn);
1829 nr_spec++;
1831 nr_inter++;
1833 /* Update source block boundaries. */
1834 b1 = BLOCK_FOR_INSN (insn);
1835 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1837 /* We moved all the insns in the basic block.
1838 Emit a note after the last insn and update the
1839 begin/end boundaries to point to the note. */
1840 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1841 BB_HEAD (b1) = note;
1842 BB_END (b1) = note;
1844 else if (insn == BB_END (b1))
1846 /* We took insns from the end of the basic block,
1847 so update the end of block boundary so that it
1848 points to the first insn we did not move. */
1849 BB_END (b1) = PREV_INSN (insn);
1851 else if (insn == BB_HEAD (b1))
1853 /* We took insns from the start of the basic block,
1854 so update the start of block boundary so that
1855 it points to the first insn we did not move. */
1856 BB_HEAD (b1) = NEXT_INSN (insn);
1859 else
1861 /* In block motion. */
1862 sched_target_n_insns++;
1864 sched_n_insns++;
1866 return 1;
1869 /* Called after INSN has all its dependencies resolved. Return nonzero
1870 if it should be moved to the ready list or the queue, or zero if we
1871 should silently discard it. */
1872 static int
1873 new_ready (rtx next)
1875 /* For speculative insns, before inserting to ready/queue,
1876 check live, exception-free, and issue-delay. */
1877 if (INSN_BB (next) != target_bb
1878 && (!IS_VALID (INSN_BB (next))
1879 || CANT_MOVE (next)
1880 || (IS_SPECULATIVE_INSN (next)
1881 && ((recog_memoized (next) >= 0
1882 && min_insn_conflict_delay (curr_state, next, next) > 3)
1883 || !check_live (next, INSN_BB (next))
1884 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1885 return 0;
1886 return 1;
1889 /* Return a string that contains the insn uid and optionally anything else
1890 necessary to identify this insn in an output. It's valid to use a
1891 static buffer for this. The ALIGNED parameter should cause the string
1892 to be formatted so that multiple output lines will line up nicely. */
1894 static const char *
1895 rgn_print_insn (rtx insn, int aligned)
1897 static char tmp[80];
1899 if (aligned)
1900 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1901 else
1903 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1904 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1905 else
1906 sprintf (tmp, "%d", INSN_UID (insn));
1908 return tmp;
1911 /* Compare priority of two insns. Return a positive number if the second
1912 insn is to be preferred for scheduling, and a negative one if the first
1913 is to be preferred. Zero if they are equally good. */
1915 static int
1916 rgn_rank (rtx insn1, rtx insn2)
1918 /* Some comparison make sense in interblock scheduling only. */
1919 if (INSN_BB (insn1) != INSN_BB (insn2))
1921 int spec_val, prob_val;
1923 /* Prefer an inblock motion on an interblock motion. */
1924 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1925 return 1;
1926 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1927 return -1;
1929 /* Prefer a useful motion on a speculative one. */
1930 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1931 if (spec_val)
1932 return spec_val;
1934 /* Prefer a more probable (speculative) insn. */
1935 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1936 if (prob_val)
1937 return prob_val;
1939 return 0;
1942 /* NEXT is an instruction that depends on INSN (a backward dependence);
1943 return nonzero if we should include this dependence in priority
1944 calculations. */
1946 static int
1947 contributes_to_priority (rtx next, rtx insn)
1949 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1952 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1953 conditionally set before INSN. Store the set of registers that
1954 must be considered as used by this jump in USED and that of
1955 registers that must be considered as set in SET. */
1957 static void
1958 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1959 regset cond_exec ATTRIBUTE_UNUSED,
1960 regset used ATTRIBUTE_UNUSED,
1961 regset set ATTRIBUTE_UNUSED)
1963 /* Nothing to do here, since we postprocess jumps in
1964 add_branch_dependences. */
1967 /* Used in schedule_insns to initialize current_sched_info for scheduling
1968 regions (or single basic blocks). */
1970 static struct sched_info region_sched_info =
1972 init_ready_list,
1973 can_schedule_ready_p,
1974 schedule_more_p,
1975 new_ready,
1976 rgn_rank,
1977 rgn_print_insn,
1978 contributes_to_priority,
1979 compute_jump_reg_dependencies,
1981 NULL, NULL,
1982 NULL, NULL,
1983 0, 0, 0
1986 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1988 static bool
1989 sets_likely_spilled (rtx pat)
1991 bool ret = false;
1992 note_stores (pat, sets_likely_spilled_1, &ret);
1993 return ret;
1996 static void
1997 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1999 bool *ret = (bool *) data;
2001 if (GET_CODE (pat) == SET
2002 && REG_P (x)
2003 && REGNO (x) < FIRST_PSEUDO_REGISTER
2004 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2005 *ret = true;
2008 /* Add dependences so that branches are scheduled to run last in their
2009 block. */
2011 static void
2012 add_branch_dependences (rtx head, rtx tail)
2014 rtx insn, last;
2016 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2017 that can throw exceptions, force them to remain in order at the end of
2018 the block by adding dependencies and giving the last a high priority.
2019 There may be notes present, and prev_head may also be a note.
2021 Branches must obviously remain at the end. Calls should remain at the
2022 end since moving them results in worse register allocation. Uses remain
2023 at the end to ensure proper register allocation.
2025 cc0 setters remain at the end because they can't be moved away from
2026 their cc0 user.
2028 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2029 are not moved before reload because we can wind up with register
2030 allocation failures. */
2032 insn = tail;
2033 last = 0;
2034 while (CALL_P (insn)
2035 || JUMP_P (insn)
2036 || (NONJUMP_INSN_P (insn)
2037 && (GET_CODE (PATTERN (insn)) == USE
2038 || GET_CODE (PATTERN (insn)) == CLOBBER
2039 || can_throw_internal (insn)
2040 #ifdef HAVE_cc0
2041 || sets_cc0_p (PATTERN (insn))
2042 #endif
2043 || (!reload_completed
2044 && sets_likely_spilled (PATTERN (insn)))))
2045 || NOTE_P (insn))
2047 if (!NOTE_P (insn))
2049 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2051 add_dependence (last, insn, REG_DEP_ANTI);
2052 INSN_REF_COUNT (insn)++;
2055 CANT_MOVE (insn) = 1;
2057 last = insn;
2060 /* Don't overrun the bounds of the basic block. */
2061 if (insn == head)
2062 break;
2064 insn = PREV_INSN (insn);
2067 /* Make sure these insns are scheduled last in their block. */
2068 insn = last;
2069 if (insn != 0)
2070 while (insn != head)
2072 insn = prev_nonnote_insn (insn);
2074 if (INSN_REF_COUNT (insn) != 0)
2075 continue;
2077 add_dependence (last, insn, REG_DEP_ANTI);
2078 INSN_REF_COUNT (insn) = 1;
2082 /* Data structures for the computation of data dependences in a regions. We
2083 keep one `deps' structure for every basic block. Before analyzing the
2084 data dependences for a bb, its variables are initialized as a function of
2085 the variables of its predecessors. When the analysis for a bb completes,
2086 we save the contents to the corresponding bb_deps[bb] variable. */
2088 static struct deps *bb_deps;
2090 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2092 static rtx
2093 concat_INSN_LIST (rtx copy, rtx old)
2095 rtx new = old;
2096 for (; copy ; copy = XEXP (copy, 1))
2097 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2098 return new;
2101 static void
2102 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2103 rtx *old_mems_p)
2105 rtx new_insns = *old_insns_p;
2106 rtx new_mems = *old_mems_p;
2108 while (copy_insns)
2110 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2111 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2112 copy_insns = XEXP (copy_insns, 1);
2113 copy_mems = XEXP (copy_mems, 1);
2116 *old_insns_p = new_insns;
2117 *old_mems_p = new_mems;
2120 /* After computing the dependencies for block BB, propagate the dependencies
2121 found in TMP_DEPS to the successors of the block. */
2122 static void
2123 propagate_deps (int bb, struct deps *pred_deps)
2125 int b = BB_TO_BLOCK (bb);
2126 int e, first_edge;
2128 /* bb's structures are inherited by its successors. */
2129 first_edge = e = OUT_EDGES (b);
2130 if (e > 0)
2133 int b_succ = TO_BLOCK (e);
2134 int bb_succ = BLOCK_TO_BB (b_succ);
2135 struct deps *succ_deps = bb_deps + bb_succ;
2136 int reg;
2138 /* Only bbs "below" bb, in the same region, are interesting. */
2139 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2140 || bb_succ <= bb)
2142 e = NEXT_OUT (e);
2143 continue;
2146 /* The reg_last lists are inherited by bb_succ. */
2147 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2149 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2150 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2152 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2153 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2154 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2155 succ_rl->clobbers);
2156 succ_rl->uses_length += pred_rl->uses_length;
2157 succ_rl->clobbers_length += pred_rl->clobbers_length;
2159 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2161 /* Mem read/write lists are inherited by bb_succ. */
2162 concat_insn_mem_list (pred_deps->pending_read_insns,
2163 pred_deps->pending_read_mems,
2164 &succ_deps->pending_read_insns,
2165 &succ_deps->pending_read_mems);
2166 concat_insn_mem_list (pred_deps->pending_write_insns,
2167 pred_deps->pending_write_mems,
2168 &succ_deps->pending_write_insns,
2169 &succ_deps->pending_write_mems);
2171 succ_deps->last_pending_memory_flush
2172 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2173 succ_deps->last_pending_memory_flush);
2175 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2176 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2178 /* last_function_call is inherited by bb_succ. */
2179 succ_deps->last_function_call
2180 = concat_INSN_LIST (pred_deps->last_function_call,
2181 succ_deps->last_function_call);
2183 /* sched_before_next_call is inherited by bb_succ. */
2184 succ_deps->sched_before_next_call
2185 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2186 succ_deps->sched_before_next_call);
2188 e = NEXT_OUT (e);
2190 while (e != first_edge);
2192 /* These lists should point to the right place, for correct
2193 freeing later. */
2194 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2195 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2196 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2197 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2199 /* Can't allow these to be freed twice. */
2200 pred_deps->pending_read_insns = 0;
2201 pred_deps->pending_read_mems = 0;
2202 pred_deps->pending_write_insns = 0;
2203 pred_deps->pending_write_mems = 0;
2206 /* Compute backward dependences inside bb. In a multiple blocks region:
2207 (1) a bb is analyzed after its predecessors, and (2) the lists in
2208 effect at the end of bb (after analyzing for bb) are inherited by
2209 bb's successors.
2211 Specifically for reg-reg data dependences, the block insns are
2212 scanned by sched_analyze () top-to-bottom. Two lists are
2213 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2214 and reg_last[].uses for register USEs.
2216 When analysis is completed for bb, we update for its successors:
2217 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2218 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2220 The mechanism for computing mem-mem data dependence is very
2221 similar, and the result is interblock dependences in the region. */
2223 static void
2224 compute_block_backward_dependences (int bb)
2226 rtx head, tail;
2227 struct deps tmp_deps;
2229 tmp_deps = bb_deps[bb];
2231 /* Do the analysis for this block. */
2232 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2233 sched_analyze (&tmp_deps, head, tail);
2234 add_branch_dependences (head, tail);
2236 if (current_nr_blocks > 1)
2237 propagate_deps (bb, &tmp_deps);
2239 /* Free up the INSN_LISTs. */
2240 free_deps (&tmp_deps);
2243 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2244 them to the unused_*_list variables, so that they can be reused. */
2246 static void
2247 free_pending_lists (void)
2249 int bb;
2251 for (bb = 0; bb < current_nr_blocks; bb++)
2253 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2254 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2255 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2256 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2260 /* Print dependences for debugging, callable from debugger. */
2262 void
2263 debug_dependencies (void)
2265 int bb;
2267 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2268 for (bb = 0; bb < current_nr_blocks; bb++)
2270 rtx head, tail;
2271 rtx next_tail;
2272 rtx insn;
2274 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2275 next_tail = NEXT_INSN (tail);
2276 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2277 BB_TO_BLOCK (bb), bb);
2279 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2280 "insn", "code", "bb", "dep", "prio", "cost",
2281 "reservation");
2282 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2283 "----", "----", "--", "---", "----", "----",
2284 "-----------");
2286 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2288 rtx link;
2290 if (! INSN_P (insn))
2292 int n;
2293 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2294 if (NOTE_P (insn))
2296 n = NOTE_LINE_NUMBER (insn);
2297 if (n < 0)
2298 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2299 else
2301 expanded_location xloc;
2302 NOTE_EXPANDED_LOCATION (xloc, insn);
2303 fprintf (sched_dump, "line %d, file %s\n",
2304 xloc.line, xloc.file);
2307 else
2308 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2309 continue;
2312 fprintf (sched_dump,
2313 ";; %s%5d%6d%6d%6d%6d%6d ",
2314 (SCHED_GROUP_P (insn) ? "+" : " "),
2315 INSN_UID (insn),
2316 INSN_CODE (insn),
2317 INSN_BB (insn),
2318 INSN_DEP_COUNT (insn),
2319 INSN_PRIORITY (insn),
2320 insn_cost (insn, 0, 0));
2322 if (recog_memoized (insn) < 0)
2323 fprintf (sched_dump, "nothing");
2324 else
2325 print_reservation (sched_dump, insn);
2327 fprintf (sched_dump, "\t: ");
2328 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2329 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2330 fprintf (sched_dump, "\n");
2333 fprintf (sched_dump, "\n");
2336 /* Returns true if all the basic blocks of the current region have
2337 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2338 static bool
2339 sched_is_disabled_for_current_region_p (void)
2341 int bb;
2343 for (bb = 0; bb < current_nr_blocks; bb++)
2344 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2345 return false;
2347 return true;
2350 /* Schedule a region. A region is either an inner loop, a loop-free
2351 subroutine, or a single basic block. Each bb in the region is
2352 scheduled after its flow predecessors. */
2354 static void
2355 schedule_region (int rgn)
2357 int bb;
2358 int rgn_n_insns = 0;
2359 int sched_rgn_n_insns = 0;
2361 /* Set variables for the current region. */
2362 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2363 current_blocks = RGN_BLOCKS (rgn);
2365 /* Don't schedule region that is marked by
2366 NOTE_DISABLE_SCHED_OF_BLOCK. */
2367 if (sched_is_disabled_for_current_region_p ())
2368 return;
2370 init_deps_global ();
2372 /* Initializations for region data dependence analysis. */
2373 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2374 for (bb = 0; bb < current_nr_blocks; bb++)
2375 init_deps (bb_deps + bb);
2377 /* Compute LOG_LINKS. */
2378 for (bb = 0; bb < current_nr_blocks; bb++)
2379 compute_block_backward_dependences (bb);
2381 /* Compute INSN_DEPEND. */
2382 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2384 rtx head, tail;
2385 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2387 compute_forward_dependences (head, tail);
2389 if (targetm.sched.dependencies_evaluation_hook)
2390 targetm.sched.dependencies_evaluation_hook (head, tail);
2394 /* Set priorities. */
2395 for (bb = 0; bb < current_nr_blocks; bb++)
2397 rtx head, tail;
2398 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2400 rgn_n_insns += set_priorities (head, tail);
2403 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2404 if (current_nr_blocks > 1)
2406 int i;
2408 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2410 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2411 sbitmap_vector_zero (dom, current_nr_blocks);
2412 /* Edge to bit. */
2413 rgn_nr_edges = 0;
2414 edge_to_bit = xmalloc (nr_edges * sizeof (int));
2415 for (i = 1; i < nr_edges; i++)
2416 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2417 EDGE_TO_BIT (i) = rgn_nr_edges++;
2418 rgn_edges = xmalloc (rgn_nr_edges * sizeof (int));
2420 rgn_nr_edges = 0;
2421 for (i = 1; i < nr_edges; i++)
2422 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2423 rgn_edges[rgn_nr_edges++] = i;
2425 /* Split edges. */
2426 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2427 sbitmap_vector_zero (pot_split, current_nr_blocks);
2428 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2429 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2431 /* Compute probabilities, dominators, split_edges. */
2432 for (bb = 0; bb < current_nr_blocks; bb++)
2433 compute_dom_prob_ps (bb);
2436 /* Now we can schedule all blocks. */
2437 for (bb = 0; bb < current_nr_blocks; bb++)
2439 rtx head, tail;
2440 int b = BB_TO_BLOCK (bb);
2442 get_block_head_tail (b, &head, &tail);
2444 if (no_real_insns_p (head, tail))
2445 continue;
2447 current_sched_info->prev_head = PREV_INSN (head);
2448 current_sched_info->next_tail = NEXT_INSN (tail);
2450 if (write_symbols != NO_DEBUG)
2452 save_line_notes (b, head, tail);
2453 rm_line_notes (head, tail);
2456 /* rm_other_notes only removes notes which are _inside_ the
2457 block---that is, it won't remove notes before the first real insn
2458 or after the last real insn of the block. So if the first insn
2459 has a REG_SAVE_NOTE which would otherwise be emitted before the
2460 insn, it is redundant with the note before the start of the
2461 block, and so we have to take it out. */
2462 if (INSN_P (head))
2464 rtx note;
2466 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2467 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2469 remove_note (head, note);
2470 note = XEXP (note, 1);
2471 remove_note (head, note);
2475 /* Remove remaining note insns from the block, save them in
2476 note_list. These notes are restored at the end of
2477 schedule_block (). */
2478 rm_other_notes (head, tail);
2480 target_bb = bb;
2482 current_sched_info->queue_must_finish_empty
2483 = current_nr_blocks > 1 && !flag_schedule_interblock;
2485 schedule_block (b, rgn_n_insns);
2486 sched_rgn_n_insns += sched_n_insns;
2488 /* Update target block boundaries. */
2489 if (head == BB_HEAD (BASIC_BLOCK (b)))
2490 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2491 if (tail == BB_END (BASIC_BLOCK (b)))
2492 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2494 /* Clean up. */
2495 if (current_nr_blocks > 1)
2497 free (candidate_table);
2498 free (bblst_table);
2499 free (bitlst_table);
2503 /* Sanity check: verify that all region insns were scheduled. */
2504 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2506 /* Restore line notes. */
2507 if (write_symbols != NO_DEBUG)
2509 for (bb = 0; bb < current_nr_blocks; bb++)
2511 rtx head, tail;
2512 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2513 restore_line_notes (head, tail);
2517 /* Done with this region. */
2518 free_pending_lists ();
2520 finish_deps_global ();
2522 free (bb_deps);
2524 if (current_nr_blocks > 1)
2526 free (prob);
2527 sbitmap_vector_free (dom);
2528 sbitmap_vector_free (pot_split);
2529 sbitmap_vector_free (ancestor_edges);
2530 free (edge_to_bit);
2531 free (rgn_edges);
2535 /* Indexed by region, holds the number of death notes found in that region.
2536 Used for consistency checks. */
2537 static int *deaths_in_region;
2539 /* Initialize data structures for region scheduling. */
2541 static void
2542 init_regions (void)
2544 sbitmap blocks;
2545 int rgn;
2547 nr_regions = 0;
2548 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2549 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2550 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2551 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2553 /* Compute regions for scheduling. */
2554 if (reload_completed
2555 || n_basic_blocks == 1
2556 || !flag_schedule_interblock)
2558 find_single_block_region ();
2560 else
2562 /* Verify that a 'good' control flow graph can be built. */
2563 if (is_cfg_nonregular ())
2565 find_single_block_region ();
2567 else
2569 struct edge_list *edge_list;
2571 /* The scheduler runs after estimate_probabilities; therefore, we
2572 can't blindly call back into find_basic_blocks since doing so
2573 could invalidate the branch probability info. We could,
2574 however, call cleanup_cfg. */
2575 edge_list = create_edge_list ();
2577 /* Compute the dominators and post dominators. */
2578 calculate_dominance_info (CDI_DOMINATORS);
2580 /* build_control_flow will return nonzero if it detects unreachable
2581 blocks or any other irregularity with the cfg which prevents
2582 cross block scheduling. */
2583 if (build_control_flow (edge_list) != 0)
2584 find_single_block_region ();
2585 else
2586 find_rgns (edge_list);
2588 if (sched_verbose >= 3)
2589 debug_regions ();
2591 /* We are done with flow's edge list. */
2592 free_edge_list (edge_list);
2594 /* For now. This will move as more and more of haifa is converted
2595 to using the cfg code in flow.c. */
2596 free_dominance_info (CDI_DOMINATORS);
2601 if (CHECK_DEAD_NOTES)
2603 blocks = sbitmap_alloc (last_basic_block);
2604 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2605 /* Remove all death notes from the subroutine. */
2606 for (rgn = 0; rgn < nr_regions; rgn++)
2608 int b;
2610 sbitmap_zero (blocks);
2611 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2612 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2614 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2616 sbitmap_free (blocks);
2618 else
2619 count_or_remove_death_notes (NULL, 1);
2622 /* The one entry point in this file. DUMP_FILE is the dump file for
2623 this pass. */
2625 void
2626 schedule_insns (FILE *dump_file)
2628 sbitmap large_region_blocks, blocks;
2629 int rgn;
2630 int any_large_regions;
2631 basic_block bb;
2633 /* Taking care of this degenerate case makes the rest of
2634 this code simpler. */
2635 if (n_basic_blocks == 0)
2636 return;
2638 nr_inter = 0;
2639 nr_spec = 0;
2641 sched_init (dump_file);
2643 init_regions ();
2645 current_sched_info = &region_sched_info;
2647 /* Schedule every region in the subroutine. */
2648 for (rgn = 0; rgn < nr_regions; rgn++)
2649 schedule_region (rgn);
2651 /* Update life analysis for the subroutine. Do single block regions
2652 first so that we can verify that live_at_start didn't change. Then
2653 do all other blocks. */
2654 /* ??? There is an outside possibility that update_life_info, or more
2655 to the point propagate_block, could get called with nonzero flags
2656 more than once for one basic block. This would be kinda bad if it
2657 were to happen, since REG_INFO would be accumulated twice for the
2658 block, and we'd have twice the REG_DEAD notes.
2660 I'm fairly certain that this _shouldn't_ happen, since I don't think
2661 that live_at_start should change at region heads. Not sure what the
2662 best way to test for this kind of thing... */
2664 allocate_reg_life_data ();
2665 compute_bb_for_insn ();
2667 any_large_regions = 0;
2668 large_region_blocks = sbitmap_alloc (last_basic_block);
2669 sbitmap_zero (large_region_blocks);
2670 FOR_EACH_BB (bb)
2671 SET_BIT (large_region_blocks, bb->index);
2673 blocks = sbitmap_alloc (last_basic_block);
2674 sbitmap_zero (blocks);
2676 /* Update life information. For regions consisting of multiple blocks
2677 we've possibly done interblock scheduling that affects global liveness.
2678 For regions consisting of single blocks we need to do only local
2679 liveness. */
2680 for (rgn = 0; rgn < nr_regions; rgn++)
2681 if (RGN_NR_BLOCKS (rgn) > 1)
2682 any_large_regions = 1;
2683 else
2685 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2686 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2689 /* Don't update reg info after reload, since that affects
2690 regs_ever_live, which should not change after reload. */
2691 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2692 (reload_completed ? PROP_DEATH_NOTES
2693 : PROP_DEATH_NOTES | PROP_REG_INFO));
2694 if (any_large_regions)
2696 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2697 PROP_DEATH_NOTES | PROP_REG_INFO);
2700 if (CHECK_DEAD_NOTES)
2702 /* Verify the counts of basic block notes in single the basic block
2703 regions. */
2704 for (rgn = 0; rgn < nr_regions; rgn++)
2705 if (RGN_NR_BLOCKS (rgn) == 1)
2707 sbitmap_zero (blocks);
2708 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2710 gcc_assert (deaths_in_region[rgn]
2711 == count_or_remove_death_notes (blocks, 0));
2713 free (deaths_in_region);
2716 /* Reposition the prologue and epilogue notes in case we moved the
2717 prologue/epilogue insns. */
2718 if (reload_completed)
2719 reposition_prologue_and_epilogue_notes (get_insns ());
2721 /* Delete redundant line notes. */
2722 if (write_symbols != NO_DEBUG)
2723 rm_redundant_line_notes ();
2725 if (sched_verbose)
2727 if (reload_completed == 0 && flag_schedule_interblock)
2729 fprintf (sched_dump,
2730 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2731 nr_inter, nr_spec);
2733 else
2734 gcc_assert (nr_inter <= 0);
2735 fprintf (sched_dump, "\n\n");
2738 /* Clean up. */
2739 free (rgn_table);
2740 free (rgn_bb_table);
2741 free (block_to_bb);
2742 free (containing_rgn);
2744 sched_finish ();
2746 if (edge_table)
2748 free (edge_table);
2749 edge_table = NULL;
2752 if (in_edges)
2754 free (in_edges);
2755 in_edges = NULL;
2757 if (out_edges)
2759 free (out_edges);
2760 out_edges = NULL;
2763 sbitmap_free (blocks);
2764 sbitmap_free (large_region_blocks);
2766 #endif