2004-09-23 H.J. Lu <hongjiu.lu@intel.com>
[official-gcc.git] / gcc / sched-rgn.c
blob4da38c530c6ec2b807548e96910d2242fe93d403
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "basic-block.h"
57 #include "regs.h"
58 #include "function.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "recog.h"
65 #include "cfglayout.h"
66 #include "params.h"
67 #include "sched-int.h"
68 #include "target.h"
70 /* Define when we want to do count REG_DEAD notes before and after scheduling
71 for sanity checking. We can't do that when conditional execution is used,
72 as REG_DEAD exist only for unconditional deaths. */
74 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
75 #define CHECK_DEAD_NOTES 1
76 #else
77 #define CHECK_DEAD_NOTES 0
78 #endif
81 #ifdef INSN_SCHEDULING
82 /* Some accessor macros for h_i_d members only used within this file. */
83 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
84 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
85 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
87 /* nr_inter/spec counts interblock/speculative motion for the function. */
88 static int nr_inter, nr_spec;
90 /* Control flow graph edges are kept in circular lists. */
91 typedef struct
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
98 haifa_edge;
99 static haifa_edge *edge_table;
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
106 /* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108 static int nr_edges;
110 /* Circular list of incoming/outgoing edges of a block. */
111 static int *in_edges;
112 static int *out_edges;
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
117 static int is_cfg_nonregular (void);
118 static int build_control_flow (struct edge_list *);
119 static void new_edge (int, int);
120 static bool sched_is_disabled_for_current_region_p (void);
122 /* A region is the main entity for interblock scheduling: insns
123 are allowed to move between blocks in the same region, along
124 control flow graph edges, in the 'up' direction. */
125 typedef struct
127 int rgn_nr_blocks; /* Number of blocks in region. */
128 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
130 region;
132 /* Number of regions in the procedure. */
133 static int nr_regions;
135 /* Table of region descriptions. */
136 static region *rgn_table;
138 /* Array of lists of regions' blocks. */
139 static int *rgn_bb_table;
141 /* Topological order of blocks in the region (if b2 is reachable from
142 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
143 always referred to by either block or b, while its topological
144 order name (in the region) is referred to by bb. */
145 static int *block_to_bb;
147 /* The number of the region containing a block. */
148 static int *containing_rgn;
150 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
151 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
152 #define BLOCK_TO_BB(block) (block_to_bb[block])
153 #define CONTAINING_RGN(block) (containing_rgn[block])
155 void debug_regions (void);
156 static void find_single_block_region (void);
157 static void find_rgns (struct edge_list *);
158 static bool too_large (int, int *, int *);
160 extern void debug_live (int, int);
162 /* Blocks of the current region being scheduled. */
163 static int current_nr_blocks;
164 static int current_blocks;
166 /* The mapping from bb to block. */
167 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
169 typedef struct
171 int *first_member; /* Pointer to the list start in bitlst_table. */
172 int nr_members; /* The number of members of the bit list. */
174 bitlst;
176 static int bitlst_table_last;
177 static int *bitlst_table;
179 static void extract_bitlst (sbitmap, bitlst *);
181 /* Target info declarations.
183 The block currently being scheduled is referred to as the "target" block,
184 while other blocks in the region from which insns can be moved to the
185 target are called "source" blocks. The candidate structure holds info
186 about such sources: are they valid? Speculative? Etc. */
187 typedef bitlst bblst;
188 typedef struct
190 char is_valid;
191 char is_speculative;
192 int src_prob;
193 bblst split_bbs;
194 bblst update_bbs;
196 candidate;
198 static candidate *candidate_table;
200 /* A speculative motion requires checking live information on the path
201 from 'source' to 'target'. The split blocks are those to be checked.
202 After a speculative motion, live information should be modified in
203 the 'update' blocks.
205 Lists of split and update blocks for each candidate of the current
206 target are in array bblst_table. */
207 static int *bblst_table, bblst_size, bblst_last;
209 #define IS_VALID(src) ( candidate_table[src].is_valid )
210 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211 #define SRC_PROB(src) ( candidate_table[src].src_prob )
213 /* The bb being currently scheduled. */
214 static int target_bb;
216 /* List of edges. */
217 typedef bitlst edgelst;
219 /* Target info functions. */
220 static void split_edges (int, int, edgelst *);
221 static void compute_trg_info (int);
222 void debug_candidate (int);
223 void debug_candidates (int);
225 /* Dominators array: dom[i] contains the sbitmap of dominators of
226 bb i in the region. */
227 static sbitmap *dom;
229 /* bb 0 is the only region entry. */
230 #define IS_RGN_ENTRY(bb) (!bb)
232 /* Is bb_src dominated by bb_trg. */
233 #define IS_DOMINATED(bb_src, bb_trg) \
234 ( TEST_BIT (dom[bb_src], bb_trg) )
236 /* Probability: Prob[i] is a float in [0, 1] which is the probability
237 of bb i relative to the region entry. */
238 static float *prob;
240 /* The probability of bb_src, relative to bb_trg. Note, that while the
241 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 in [0, 100]. */
243 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 prob[bb_trg])))
246 /* Bit-set of edges, where bit i stands for edge i. */
247 typedef sbitmap edgeset;
249 /* Number of edges in the region. */
250 static int rgn_nr_edges;
252 /* Array of size rgn_nr_edges. */
253 static int *rgn_edges;
256 /* Mapping from each edge in the graph to its number in the rgn. */
257 static int *edge_to_bit;
258 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
260 /* The split edges of a source bb is different for each target
261 bb. In order to compute this efficiently, the 'potential-split edges'
262 are computed for each bb prior to scheduling a region. This is actually
263 the split edges of each bb relative to the region entry.
265 pot_split[bb] is the set of potential split edges of bb. */
266 static edgeset *pot_split;
268 /* For every bb, a set of its ancestor edges. */
269 static edgeset *ancestor_edges;
271 static void compute_dom_prob_ps (int);
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
277 /* Parameters affecting the decision of rank_for_schedule().
278 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
279 #define MIN_PROBABILITY 40
281 /* Speculative scheduling functions. */
282 static int check_live_1 (int, rtx);
283 static void update_live_1 (int, rtx);
284 static int check_live (rtx, int);
285 static void update_live (rtx, int);
286 static void set_spec_fed (rtx);
287 static int is_pfree (rtx, int, int);
288 static int find_conditional_protection (rtx, int);
289 static int is_conditionally_protected (rtx, int, int);
290 static int is_prisky (rtx, int, int);
291 static int is_exception_free (rtx, int, int);
293 static bool sets_likely_spilled (rtx);
294 static void sets_likely_spilled_1 (rtx, rtx, void *);
295 static void add_branch_dependences (rtx, rtx);
296 static void compute_block_backward_dependences (int);
297 void debug_dependencies (void);
299 static void init_regions (void);
300 static void schedule_region (int);
301 static rtx concat_INSN_LIST (rtx, rtx);
302 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
303 static void propagate_deps (int, struct deps *);
304 static void free_pending_lists (void);
306 /* Functions for construction of the control flow graph. */
308 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
310 We decide not to build the control flow graph if there is possibly more
311 than one entry to the function, if computed branches exist, of if we
312 have nonlocal gotos. */
314 static int
315 is_cfg_nonregular (void)
317 basic_block b;
318 rtx insn;
319 RTX_CODE code;
321 /* If we have a label that could be the target of a nonlocal goto, then
322 the cfg is not well structured. */
323 if (nonlocal_goto_handler_labels)
324 return 1;
326 /* If we have any forced labels, then the cfg is not well structured. */
327 if (forced_labels)
328 return 1;
330 /* If this function has a computed jump, then we consider the cfg
331 not well structured. */
332 if (current_function_has_computed_jump)
333 return 1;
335 /* If we have exception handlers, then we consider the cfg not well
336 structured. ?!? We should be able to handle this now that flow.c
337 computes an accurate cfg for EH. */
338 if (current_function_has_exception_handlers ())
339 return 1;
341 /* If we have non-jumping insns which refer to labels, then we consider
342 the cfg not well structured. */
343 /* Check for labels referred to other thn by jumps. */
344 FOR_EACH_BB (b)
345 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
347 code = GET_CODE (insn);
348 if (INSN_P (insn) && code != JUMP_INSN)
350 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
352 if (note
353 && ! (JUMP_P (NEXT_INSN (insn))
354 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
355 XEXP (note, 0))))
356 return 1;
359 if (insn == BB_END (b))
360 break;
363 /* All the tests passed. Consider the cfg well structured. */
364 return 0;
367 /* Build the control flow graph and set nr_edges.
369 Instead of trying to build a cfg ourselves, we rely on flow to
370 do it for us. Stamp out useless code (and bug) duplication.
372 Return nonzero if an irregularity in the cfg is found which would
373 prevent cross block scheduling. */
375 static int
376 build_control_flow (struct edge_list *edge_list)
378 int i, unreachable, num_edges;
379 basic_block b;
381 /* This already accounts for entry/exit edges. */
382 num_edges = NUM_EDGES (edge_list);
384 /* Unreachable loops with more than one basic block are detected
385 during the DFS traversal in find_rgns.
387 Unreachable loops with a single block are detected here. This
388 test is redundant with the one in find_rgns, but it's much
389 cheaper to go ahead and catch the trivial case here. */
390 unreachable = 0;
391 FOR_EACH_BB (b)
393 if (b->pred == NULL
394 || (b->pred->src == b
395 && b->pred->pred_next == NULL))
396 unreachable = 1;
399 /* ??? We can kill these soon. */
400 in_edges = xcalloc (last_basic_block, sizeof (int));
401 out_edges = xcalloc (last_basic_block, sizeof (int));
402 edge_table = xcalloc (num_edges, sizeof (haifa_edge));
404 nr_edges = 0;
405 for (i = 0; i < num_edges; i++)
407 edge e = INDEX_EDGE (edge_list, i);
409 if (e->dest != EXIT_BLOCK_PTR
410 && e->src != ENTRY_BLOCK_PTR)
411 new_edge (e->src->index, e->dest->index);
414 /* Increment by 1, since edge 0 is unused. */
415 nr_edges++;
417 return unreachable;
420 /* Record an edge in the control flow graph from SOURCE to TARGET.
422 In theory, this is redundant with the s_succs computed above, but
423 we have not converted all of haifa to use information from the
424 integer lists. */
426 static void
427 new_edge (int source, int target)
429 int e, next_edge;
430 int curr_edge, fst_edge;
432 /* Check for duplicates. */
433 fst_edge = curr_edge = OUT_EDGES (source);
434 while (curr_edge)
436 if (FROM_BLOCK (curr_edge) == source
437 && TO_BLOCK (curr_edge) == target)
439 return;
442 curr_edge = NEXT_OUT (curr_edge);
444 if (fst_edge == curr_edge)
445 break;
448 e = ++nr_edges;
450 FROM_BLOCK (e) = source;
451 TO_BLOCK (e) = target;
453 if (OUT_EDGES (source))
455 next_edge = NEXT_OUT (OUT_EDGES (source));
456 NEXT_OUT (OUT_EDGES (source)) = e;
457 NEXT_OUT (e) = next_edge;
459 else
461 OUT_EDGES (source) = e;
462 NEXT_OUT (e) = e;
465 if (IN_EDGES (target))
467 next_edge = NEXT_IN (IN_EDGES (target));
468 NEXT_IN (IN_EDGES (target)) = e;
469 NEXT_IN (e) = next_edge;
471 else
473 IN_EDGES (target) = e;
474 NEXT_IN (e) = e;
478 /* Translate a bit-set SET to a list BL of the bit-set members. */
480 static void
481 extract_bitlst (sbitmap set, bitlst *bl)
483 int i;
485 /* bblst table space is reused in each call to extract_bitlst. */
486 bitlst_table_last = 0;
488 bl->first_member = &bitlst_table[bitlst_table_last];
489 bl->nr_members = 0;
491 /* Iterate over each word in the bitset. */
492 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
494 bitlst_table[bitlst_table_last++] = i;
495 (bl->nr_members)++;
500 /* Functions for the construction of regions. */
502 /* Print the regions, for debugging purposes. Callable from debugger. */
504 void
505 debug_regions (void)
507 int rgn, bb;
509 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
510 for (rgn = 0; rgn < nr_regions; rgn++)
512 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
513 rgn_table[rgn].rgn_nr_blocks);
514 fprintf (sched_dump, ";;\tbb/block: ");
516 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
518 current_blocks = RGN_BLOCKS (rgn);
520 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
521 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
524 fprintf (sched_dump, "\n\n");
528 /* Build a single block region for each basic block in the function.
529 This allows for using the same code for interblock and basic block
530 scheduling. */
532 static void
533 find_single_block_region (void)
535 basic_block bb;
537 nr_regions = 0;
539 FOR_EACH_BB (bb)
541 rgn_bb_table[nr_regions] = bb->index;
542 RGN_NR_BLOCKS (nr_regions) = 1;
543 RGN_BLOCKS (nr_regions) = nr_regions;
544 CONTAINING_RGN (bb->index) = nr_regions;
545 BLOCK_TO_BB (bb->index) = 0;
546 nr_regions++;
550 /* Update number of blocks and the estimate for number of insns
551 in the region. Return true if the region is "too large" for interblock
552 scheduling (compile time considerations). */
554 static bool
555 too_large (int block, int *num_bbs, int *num_insns)
557 (*num_bbs)++;
558 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
559 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
561 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
562 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
565 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
566 is still an inner loop. Put in max_hdr[blk] the header of the most inner
567 loop containing blk. */
568 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
570 if (max_hdr[blk] == -1) \
571 max_hdr[blk] = hdr; \
572 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
573 RESET_BIT (inner, hdr); \
574 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
576 RESET_BIT (inner,max_hdr[blk]); \
577 max_hdr[blk] = hdr; \
581 /* Find regions for interblock scheduling.
583 A region for scheduling can be:
585 * A loop-free procedure, or
587 * A reducible inner loop, or
589 * A basic block not contained in any other region.
591 ?!? In theory we could build other regions based on extended basic
592 blocks or reverse extended basic blocks. Is it worth the trouble?
594 Loop blocks that form a region are put into the region's block list
595 in topological order.
597 This procedure stores its results into the following global (ick) variables
599 * rgn_nr
600 * rgn_table
601 * rgn_bb_table
602 * block_to_bb
603 * containing region
605 We use dominator relationships to avoid making regions out of non-reducible
606 loops.
608 This procedure needs to be converted to work on pred/succ lists instead
609 of edge tables. That would simplify it somewhat. */
611 static void
612 find_rgns (struct edge_list *edge_list)
614 int *max_hdr, *dfs_nr, *stack, *degree;
615 char no_loops = 1;
616 int node, child, loop_head, i, head, tail;
617 int count = 0, sp, idx = 0;
618 int current_edge = out_edges[ENTRY_BLOCK_PTR->succ->dest->index];
619 int num_bbs, num_insns, unreachable;
620 int too_large_failure;
621 basic_block bb;
623 /* Note if an edge has been passed. */
624 sbitmap passed;
626 /* Note if a block is a natural loop header. */
627 sbitmap header;
629 /* Note if a block is a natural inner loop header. */
630 sbitmap inner;
632 /* Note if a block is in the block queue. */
633 sbitmap in_queue;
635 /* Note if a block is in the block queue. */
636 sbitmap in_stack;
638 int num_edges = NUM_EDGES (edge_list);
640 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
641 and a mapping from block to its loop header (if the block is contained
642 in a loop, else -1).
644 Store results in HEADER, INNER, and MAX_HDR respectively, these will
645 be used as inputs to the second traversal.
647 STACK, SP and DFS_NR are only used during the first traversal. */
649 /* Allocate and initialize variables for the first traversal. */
650 max_hdr = xmalloc (last_basic_block * sizeof (int));
651 dfs_nr = xcalloc (last_basic_block, sizeof (int));
652 stack = xmalloc (nr_edges * sizeof (int));
654 inner = sbitmap_alloc (last_basic_block);
655 sbitmap_ones (inner);
657 header = sbitmap_alloc (last_basic_block);
658 sbitmap_zero (header);
660 passed = sbitmap_alloc (nr_edges);
661 sbitmap_zero (passed);
663 in_queue = sbitmap_alloc (last_basic_block);
664 sbitmap_zero (in_queue);
666 in_stack = sbitmap_alloc (last_basic_block);
667 sbitmap_zero (in_stack);
669 for (i = 0; i < last_basic_block; i++)
670 max_hdr[i] = -1;
672 /* DFS traversal to find inner loops in the cfg. */
674 sp = -1;
675 while (1)
677 if (current_edge == 0 || TEST_BIT (passed, current_edge))
679 /* We have reached a leaf node or a node that was already
680 processed. Pop edges off the stack until we find
681 an edge that has not yet been processed. */
682 while (sp >= 0
683 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
685 /* Pop entry off the stack. */
686 current_edge = stack[sp--];
687 node = FROM_BLOCK (current_edge);
688 child = TO_BLOCK (current_edge);
689 RESET_BIT (in_stack, child);
690 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
691 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
692 current_edge = NEXT_OUT (current_edge);
695 /* See if have finished the DFS tree traversal. */
696 if (sp < 0 && TEST_BIT (passed, current_edge))
697 break;
699 /* Nope, continue the traversal with the popped node. */
700 continue;
703 /* Process a node. */
704 node = FROM_BLOCK (current_edge);
705 child = TO_BLOCK (current_edge);
706 SET_BIT (in_stack, node);
707 dfs_nr[node] = ++count;
709 /* If the successor is in the stack, then we've found a loop.
710 Mark the loop, if it is not a natural loop, then it will
711 be rejected during the second traversal. */
712 if (TEST_BIT (in_stack, child))
714 no_loops = 0;
715 SET_BIT (header, child);
716 UPDATE_LOOP_RELATIONS (node, child);
717 SET_BIT (passed, current_edge);
718 current_edge = NEXT_OUT (current_edge);
719 continue;
722 /* If the child was already visited, then there is no need to visit
723 it again. Just update the loop relationships and restart
724 with a new edge. */
725 if (dfs_nr[child])
727 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
728 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
729 SET_BIT (passed, current_edge);
730 current_edge = NEXT_OUT (current_edge);
731 continue;
734 /* Push an entry on the stack and continue DFS traversal. */
735 stack[++sp] = current_edge;
736 SET_BIT (passed, current_edge);
737 current_edge = OUT_EDGES (child);
739 /* This is temporary until haifa is converted to use rth's new
740 cfg routines which have true entry/exit blocks and the
741 appropriate edges from/to those blocks.
743 Generally we update dfs_nr for a node when we process its
744 out edge. However, if the node has no out edge then we will
745 not set dfs_nr for that node. This can confuse the scheduler
746 into thinking that we have unreachable blocks, which in turn
747 disables cross block scheduling.
749 So, if we have a node with no out edges, go ahead and mark it
750 as reachable now. */
751 if (current_edge == 0)
752 dfs_nr[child] = ++count;
755 /* Another check for unreachable blocks. The earlier test in
756 is_cfg_nonregular only finds unreachable blocks that do not
757 form a loop.
759 The DFS traversal will mark every block that is reachable from
760 the entry node by placing a nonzero value in dfs_nr. Thus if
761 dfs_nr is zero for any block, then it must be unreachable. */
762 unreachable = 0;
763 FOR_EACH_BB (bb)
764 if (dfs_nr[bb->index] == 0)
766 unreachable = 1;
767 break;
770 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
771 to hold degree counts. */
772 degree = dfs_nr;
774 FOR_EACH_BB (bb)
775 degree[bb->index] = 0;
776 for (i = 0; i < num_edges; i++)
778 edge e = INDEX_EDGE (edge_list, i);
780 if (e->dest != EXIT_BLOCK_PTR)
781 degree[e->dest->index]++;
784 /* Do not perform region scheduling if there are any unreachable
785 blocks. */
786 if (!unreachable)
788 int *queue;
790 if (no_loops)
791 SET_BIT (header, 0);
793 /* Second traversal:find reducible inner loops and topologically sort
794 block of each region. */
796 queue = xmalloc (n_basic_blocks * sizeof (int));
798 /* Find blocks which are inner loop headers. We still have non-reducible
799 loops to consider at this point. */
800 FOR_EACH_BB (bb)
802 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
804 edge e;
805 basic_block jbb;
807 /* Now check that the loop is reducible. We do this separate
808 from finding inner loops so that we do not find a reducible
809 loop which contains an inner non-reducible loop.
811 A simple way to find reducible/natural loops is to verify
812 that each block in the loop is dominated by the loop
813 header.
815 If there exists a block that is not dominated by the loop
816 header, then the block is reachable from outside the loop
817 and thus the loop is not a natural loop. */
818 FOR_EACH_BB (jbb)
820 /* First identify blocks in the loop, except for the loop
821 entry block. */
822 if (bb->index == max_hdr[jbb->index] && bb != jbb)
824 /* Now verify that the block is dominated by the loop
825 header. */
826 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
827 break;
831 /* If we exited the loop early, then I is the header of
832 a non-reducible loop and we should quit processing it
833 now. */
834 if (jbb != EXIT_BLOCK_PTR)
835 continue;
837 /* I is a header of an inner loop, or block 0 in a subroutine
838 with no loops at all. */
839 head = tail = -1;
840 too_large_failure = 0;
841 loop_head = max_hdr[bb->index];
843 /* Decrease degree of all I's successors for topological
844 ordering. */
845 for (e = bb->succ; e; e = e->succ_next)
846 if (e->dest != EXIT_BLOCK_PTR)
847 --degree[e->dest->index];
849 /* Estimate # insns, and count # blocks in the region. */
850 num_bbs = 1;
851 num_insns = (INSN_LUID (BB_END (bb))
852 - INSN_LUID (BB_HEAD (bb)));
854 /* Find all loop latches (blocks with back edges to the loop
855 header) or all the leaf blocks in the cfg has no loops.
857 Place those blocks into the queue. */
858 if (no_loops)
860 FOR_EACH_BB (jbb)
861 /* Leaf nodes have only a single successor which must
862 be EXIT_BLOCK. */
863 if (jbb->succ
864 && jbb->succ->dest == EXIT_BLOCK_PTR
865 && jbb->succ->succ_next == NULL)
867 queue[++tail] = jbb->index;
868 SET_BIT (in_queue, jbb->index);
870 if (too_large (jbb->index, &num_bbs, &num_insns))
872 too_large_failure = 1;
873 break;
877 else
879 edge e;
881 for (e = bb->pred; e; e = e->pred_next)
883 if (e->src == ENTRY_BLOCK_PTR)
884 continue;
886 node = e->src->index;
888 if (max_hdr[node] == loop_head && node != bb->index)
890 /* This is a loop latch. */
891 queue[++tail] = node;
892 SET_BIT (in_queue, node);
894 if (too_large (node, &num_bbs, &num_insns))
896 too_large_failure = 1;
897 break;
903 /* Now add all the blocks in the loop to the queue.
905 We know the loop is a natural loop; however the algorithm
906 above will not always mark certain blocks as being in the
907 loop. Consider:
908 node children
909 a b,c
911 c a,d
914 The algorithm in the DFS traversal may not mark B & D as part
915 of the loop (i.e. they will not have max_hdr set to A).
917 We know they can not be loop latches (else they would have
918 had max_hdr set since they'd have a backedge to a dominator
919 block). So we don't need them on the initial queue.
921 We know they are part of the loop because they are dominated
922 by the loop header and can be reached by a backwards walk of
923 the edges starting with nodes on the initial queue.
925 It is safe and desirable to include those nodes in the
926 loop/scheduling region. To do so we would need to decrease
927 the degree of a node if it is the target of a backedge
928 within the loop itself as the node is placed in the queue.
930 We do not do this because I'm not sure that the actual
931 scheduling code will properly handle this case. ?!? */
933 while (head < tail && !too_large_failure)
935 edge e;
936 child = queue[++head];
938 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
940 node = e->src->index;
942 /* See discussion above about nodes not marked as in
943 this loop during the initial DFS traversal. */
944 if (e->src == ENTRY_BLOCK_PTR
945 || max_hdr[node] != loop_head)
947 tail = -1;
948 break;
950 else if (!TEST_BIT (in_queue, node) && node != bb->index)
952 queue[++tail] = node;
953 SET_BIT (in_queue, node);
955 if (too_large (node, &num_bbs, &num_insns))
957 too_large_failure = 1;
958 break;
964 if (tail >= 0 && !too_large_failure)
966 /* Place the loop header into list of region blocks. */
967 degree[bb->index] = -1;
968 rgn_bb_table[idx] = bb->index;
969 RGN_NR_BLOCKS (nr_regions) = num_bbs;
970 RGN_BLOCKS (nr_regions) = idx++;
971 CONTAINING_RGN (bb->index) = nr_regions;
972 BLOCK_TO_BB (bb->index) = count = 0;
974 /* Remove blocks from queue[] when their in degree
975 becomes zero. Repeat until no blocks are left on the
976 list. This produces a topological list of blocks in
977 the region. */
978 while (tail >= 0)
980 if (head < 0)
981 head = tail;
982 child = queue[head];
983 if (degree[child] == 0)
985 edge e;
987 degree[child] = -1;
988 rgn_bb_table[idx++] = child;
989 BLOCK_TO_BB (child) = ++count;
990 CONTAINING_RGN (child) = nr_regions;
991 queue[head] = queue[tail--];
993 for (e = BASIC_BLOCK (child)->succ;
995 e = e->succ_next)
996 if (e->dest != EXIT_BLOCK_PTR)
997 --degree[e->dest->index];
999 else
1000 --head;
1002 ++nr_regions;
1006 free (queue);
1009 /* Any block that did not end up in a region is placed into a region
1010 by itself. */
1011 FOR_EACH_BB (bb)
1012 if (degree[bb->index] >= 0)
1014 rgn_bb_table[idx] = bb->index;
1015 RGN_NR_BLOCKS (nr_regions) = 1;
1016 RGN_BLOCKS (nr_regions) = idx++;
1017 CONTAINING_RGN (bb->index) = nr_regions++;
1018 BLOCK_TO_BB (bb->index) = 0;
1021 free (max_hdr);
1022 free (dfs_nr);
1023 free (stack);
1024 sbitmap_free (passed);
1025 sbitmap_free (header);
1026 sbitmap_free (inner);
1027 sbitmap_free (in_queue);
1028 sbitmap_free (in_stack);
1031 /* Functions for regions scheduling information. */
1033 /* Compute dominators, probability, and potential-split-edges of bb.
1034 Assume that these values were already computed for bb's predecessors. */
1036 static void
1037 compute_dom_prob_ps (int bb)
1039 int nxt_in_edge, fst_in_edge, pred;
1040 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1042 prob[bb] = 0.0;
1043 if (IS_RGN_ENTRY (bb))
1045 SET_BIT (dom[bb], 0);
1046 prob[bb] = 1.0;
1047 return;
1050 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1052 /* Initialize dom[bb] to '111..1'. */
1053 sbitmap_ones (dom[bb]);
1057 pred = FROM_BLOCK (nxt_in_edge);
1058 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1059 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1061 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1063 nr_out_edges = 1;
1064 nr_rgn_out_edges = 0;
1065 fst_out_edge = OUT_EDGES (pred);
1066 nxt_out_edge = NEXT_OUT (fst_out_edge);
1068 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1070 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1072 /* The successor doesn't belong in the region? */
1073 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1074 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1075 ++nr_rgn_out_edges;
1077 while (fst_out_edge != nxt_out_edge)
1079 ++nr_out_edges;
1080 /* The successor doesn't belong in the region? */
1081 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1082 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1083 ++nr_rgn_out_edges;
1084 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1085 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1089 /* Now nr_rgn_out_edges is the number of region-exit edges from
1090 pred, and nr_out_edges will be the number of pred out edges
1091 not leaving the region. */
1092 nr_out_edges -= nr_rgn_out_edges;
1093 if (nr_rgn_out_edges > 0)
1094 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1095 else
1096 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1097 nxt_in_edge = NEXT_IN (nxt_in_edge);
1099 while (fst_in_edge != nxt_in_edge);
1101 SET_BIT (dom[bb], bb);
1102 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1104 if (sched_verbose >= 2)
1105 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1106 (int) (100.0 * prob[bb]));
1109 /* Functions for target info. */
1111 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1112 Note that bb_trg dominates bb_src. */
1114 static void
1115 split_edges (int bb_src, int bb_trg, edgelst *bl)
1117 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1118 sbitmap_copy (src, pot_split[bb_src]);
1120 sbitmap_difference (src, src, pot_split[bb_trg]);
1121 extract_bitlst (src, bl);
1122 sbitmap_free (src);
1125 /* Find the valid candidate-source-blocks for the target block TRG, compute
1126 their probability, and check if they are speculative or not.
1127 For speculative sources, compute their update-blocks and split-blocks. */
1129 static void
1130 compute_trg_info (int trg)
1132 candidate *sp;
1133 edgelst el;
1134 int check_block, update_idx;
1135 int i, j, k, fst_edge, nxt_edge;
1137 /* Define some of the fields for the target bb as well. */
1138 sp = candidate_table + trg;
1139 sp->is_valid = 1;
1140 sp->is_speculative = 0;
1141 sp->src_prob = 100;
1143 for (i = trg + 1; i < current_nr_blocks; i++)
1145 sp = candidate_table + i;
1147 sp->is_valid = IS_DOMINATED (i, trg);
1148 if (sp->is_valid)
1150 sp->src_prob = GET_SRC_PROB (i, trg);
1151 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1154 if (sp->is_valid)
1156 split_edges (i, trg, &el);
1157 sp->is_speculative = (el.nr_members) ? 1 : 0;
1158 if (sp->is_speculative && !flag_schedule_speculative)
1159 sp->is_valid = 0;
1162 if (sp->is_valid)
1164 char *update_blocks;
1166 /* Compute split blocks and store them in bblst_table.
1167 The TO block of every split edge is a split block. */
1168 sp->split_bbs.first_member = &bblst_table[bblst_last];
1169 sp->split_bbs.nr_members = el.nr_members;
1170 for (j = 0; j < el.nr_members; bblst_last++, j++)
1171 bblst_table[bblst_last] =
1172 TO_BLOCK (rgn_edges[el.first_member[j]]);
1173 sp->update_bbs.first_member = &bblst_table[bblst_last];
1175 /* Compute update blocks and store them in bblst_table.
1176 For every split edge, look at the FROM block, and check
1177 all out edges. For each out edge that is not a split edge,
1178 add the TO block to the update block list. This list can end
1179 up with a lot of duplicates. We need to weed them out to avoid
1180 overrunning the end of the bblst_table. */
1181 update_blocks = alloca (last_basic_block);
1182 memset (update_blocks, 0, last_basic_block);
1184 update_idx = 0;
1185 for (j = 0; j < el.nr_members; j++)
1187 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1188 fst_edge = nxt_edge = OUT_EDGES (check_block);
1191 if (! update_blocks[TO_BLOCK (nxt_edge)])
1193 for (k = 0; k < el.nr_members; k++)
1194 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1195 break;
1197 if (k >= el.nr_members)
1199 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1200 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1201 update_idx++;
1205 nxt_edge = NEXT_OUT (nxt_edge);
1207 while (fst_edge != nxt_edge);
1209 sp->update_bbs.nr_members = update_idx;
1211 /* Make sure we didn't overrun the end of bblst_table. */
1212 gcc_assert (bblst_last <= bblst_size);
1214 else
1216 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1218 sp->is_speculative = 0;
1219 sp->src_prob = 0;
1224 /* Print candidates info, for debugging purposes. Callable from debugger. */
1226 void
1227 debug_candidate (int i)
1229 if (!candidate_table[i].is_valid)
1230 return;
1232 if (candidate_table[i].is_speculative)
1234 int j;
1235 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1237 fprintf (sched_dump, "split path: ");
1238 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1240 int b = candidate_table[i].split_bbs.first_member[j];
1242 fprintf (sched_dump, " %d ", b);
1244 fprintf (sched_dump, "\n");
1246 fprintf (sched_dump, "update path: ");
1247 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1249 int b = candidate_table[i].update_bbs.first_member[j];
1251 fprintf (sched_dump, " %d ", b);
1253 fprintf (sched_dump, "\n");
1255 else
1257 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1261 /* Print candidates info, for debugging purposes. Callable from debugger. */
1263 void
1264 debug_candidates (int trg)
1266 int i;
1268 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1269 BB_TO_BLOCK (trg), trg);
1270 for (i = trg + 1; i < current_nr_blocks; i++)
1271 debug_candidate (i);
1274 /* Functions for speculative scheduling. */
1276 /* Return 0 if x is a set of a register alive in the beginning of one
1277 of the split-blocks of src, otherwise return 1. */
1279 static int
1280 check_live_1 (int src, rtx x)
1282 int i;
1283 int regno;
1284 rtx reg = SET_DEST (x);
1286 if (reg == 0)
1287 return 1;
1289 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1290 || GET_CODE (reg) == SIGN_EXTRACT
1291 || GET_CODE (reg) == STRICT_LOW_PART)
1292 reg = XEXP (reg, 0);
1294 if (GET_CODE (reg) == PARALLEL)
1296 int i;
1298 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1299 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1300 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1301 return 1;
1303 return 0;
1306 if (!REG_P (reg))
1307 return 1;
1309 regno = REGNO (reg);
1311 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1313 /* Global registers are assumed live. */
1314 return 0;
1316 else
1318 if (regno < FIRST_PSEUDO_REGISTER)
1320 /* Check for hard registers. */
1321 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1322 while (--j >= 0)
1324 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1326 int b = candidate_table[src].split_bbs.first_member[i];
1328 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1329 regno + j))
1331 return 0;
1336 else
1338 /* Check for pseudo registers. */
1339 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1341 int b = candidate_table[src].split_bbs.first_member[i];
1343 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1345 return 0;
1351 return 1;
1354 /* If x is a set of a register R, mark that R is alive in the beginning
1355 of every update-block of src. */
1357 static void
1358 update_live_1 (int src, rtx x)
1360 int i;
1361 int regno;
1362 rtx reg = SET_DEST (x);
1364 if (reg == 0)
1365 return;
1367 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1368 || GET_CODE (reg) == SIGN_EXTRACT
1369 || GET_CODE (reg) == STRICT_LOW_PART)
1370 reg = XEXP (reg, 0);
1372 if (GET_CODE (reg) == PARALLEL)
1374 int i;
1376 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1377 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1378 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1380 return;
1383 if (!REG_P (reg))
1384 return;
1386 /* Global registers are always live, so the code below does not apply
1387 to them. */
1389 regno = REGNO (reg);
1391 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1393 if (regno < FIRST_PSEUDO_REGISTER)
1395 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1396 while (--j >= 0)
1398 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1400 int b = candidate_table[src].update_bbs.first_member[i];
1402 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1403 regno + j);
1407 else
1409 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1411 int b = candidate_table[src].update_bbs.first_member[i];
1413 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1419 /* Return 1 if insn can be speculatively moved from block src to trg,
1420 otherwise return 0. Called before first insertion of insn to
1421 ready-list or before the scheduling. */
1423 static int
1424 check_live (rtx insn, int src)
1426 /* Find the registers set by instruction. */
1427 if (GET_CODE (PATTERN (insn)) == SET
1428 || GET_CODE (PATTERN (insn)) == CLOBBER)
1429 return check_live_1 (src, PATTERN (insn));
1430 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1432 int j;
1433 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1434 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1435 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1436 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1437 return 0;
1439 return 1;
1442 return 1;
1445 /* Update the live registers info after insn was moved speculatively from
1446 block src to trg. */
1448 static void
1449 update_live (rtx insn, int src)
1451 /* Find the registers set by instruction. */
1452 if (GET_CODE (PATTERN (insn)) == SET
1453 || GET_CODE (PATTERN (insn)) == CLOBBER)
1454 update_live_1 (src, PATTERN (insn));
1455 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1457 int j;
1458 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1459 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1460 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1461 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1465 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1466 #define IS_REACHABLE(bb_from, bb_to) \
1467 (bb_from == bb_to \
1468 || IS_RGN_ENTRY (bb_from) \
1469 || (TEST_BIT (ancestor_edges[bb_to], \
1470 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1472 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1474 static void
1475 set_spec_fed (rtx load_insn)
1477 rtx link;
1479 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1480 if (GET_MODE (link) == VOIDmode)
1481 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1482 } /* set_spec_fed */
1484 /* On the path from the insn to load_insn_bb, find a conditional
1485 branch depending on insn, that guards the speculative load. */
1487 static int
1488 find_conditional_protection (rtx insn, int load_insn_bb)
1490 rtx link;
1492 /* Iterate through DEF-USE forward dependences. */
1493 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1495 rtx next = XEXP (link, 0);
1496 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1497 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1498 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1499 && load_insn_bb != INSN_BB (next)
1500 && GET_MODE (link) == VOIDmode
1501 && (JUMP_P (next)
1502 || find_conditional_protection (next, load_insn_bb)))
1503 return 1;
1505 return 0;
1506 } /* find_conditional_protection */
1508 /* Returns 1 if the same insn1 that participates in the computation
1509 of load_insn's address is feeding a conditional branch that is
1510 guarding on load_insn. This is true if we find a the two DEF-USE
1511 chains:
1512 insn1 -> ... -> conditional-branch
1513 insn1 -> ... -> load_insn,
1514 and if a flow path exist:
1515 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1516 and if insn1 is on the path
1517 region-entry -> ... -> bb_trg -> ... load_insn.
1519 Locate insn1 by climbing on LOG_LINKS from load_insn.
1520 Locate the branch by following INSN_DEPEND from insn1. */
1522 static int
1523 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1525 rtx link;
1527 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1529 rtx insn1 = XEXP (link, 0);
1531 /* Must be a DEF-USE dependence upon non-branch. */
1532 if (GET_MODE (link) != VOIDmode
1533 || JUMP_P (insn1))
1534 continue;
1536 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1537 if (INSN_BB (insn1) == bb_src
1538 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1539 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1540 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1541 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1542 continue;
1544 /* Now search for the conditional-branch. */
1545 if (find_conditional_protection (insn1, bb_src))
1546 return 1;
1548 /* Recursive step: search another insn1, "above" current insn1. */
1549 return is_conditionally_protected (insn1, bb_src, bb_trg);
1552 /* The chain does not exist. */
1553 return 0;
1554 } /* is_conditionally_protected */
1556 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1557 load_insn can move speculatively from bb_src to bb_trg. All the
1558 following must hold:
1560 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1561 (2) load_insn and load1 have a def-use dependence upon
1562 the same insn 'insn1'.
1563 (3) either load2 is in bb_trg, or:
1564 - there's only one split-block, and
1565 - load1 is on the escape path, and
1567 From all these we can conclude that the two loads access memory
1568 addresses that differ at most by a constant, and hence if moving
1569 load_insn would cause an exception, it would have been caused by
1570 load2 anyhow. */
1572 static int
1573 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1575 rtx back_link;
1576 candidate *candp = candidate_table + bb_src;
1578 if (candp->split_bbs.nr_members != 1)
1579 /* Must have exactly one escape block. */
1580 return 0;
1582 for (back_link = LOG_LINKS (load_insn);
1583 back_link; back_link = XEXP (back_link, 1))
1585 rtx insn1 = XEXP (back_link, 0);
1587 if (GET_MODE (back_link) == VOIDmode)
1589 /* Found a DEF-USE dependence (insn1, load_insn). */
1590 rtx fore_link;
1592 for (fore_link = INSN_DEPEND (insn1);
1593 fore_link; fore_link = XEXP (fore_link, 1))
1595 rtx insn2 = XEXP (fore_link, 0);
1596 if (GET_MODE (fore_link) == VOIDmode)
1598 /* Found a DEF-USE dependence (insn1, insn2). */
1599 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1600 /* insn2 not guaranteed to be a 1 base reg load. */
1601 continue;
1603 if (INSN_BB (insn2) == bb_trg)
1604 /* insn2 is the similar load, in the target block. */
1605 return 1;
1607 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1608 /* insn2 is a similar load, in a split-block. */
1609 return 1;
1615 /* Couldn't find a similar load. */
1616 return 0;
1617 } /* is_pfree */
1619 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1620 a load moved speculatively, or if load_insn is protected by
1621 a compare on load_insn's address). */
1623 static int
1624 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1626 if (FED_BY_SPEC_LOAD (load_insn))
1627 return 1;
1629 if (LOG_LINKS (load_insn) == NULL)
1630 /* Dependence may 'hide' out of the region. */
1631 return 1;
1633 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1634 return 1;
1636 return 0;
1639 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1640 Return 1 if insn is exception-free (and the motion is valid)
1641 and 0 otherwise. */
1643 static int
1644 is_exception_free (rtx insn, int bb_src, int bb_trg)
1646 int insn_class = haifa_classify_insn (insn);
1648 /* Handle non-load insns. */
1649 switch (insn_class)
1651 case TRAP_FREE:
1652 return 1;
1653 case TRAP_RISKY:
1654 return 0;
1655 default:;
1658 /* Handle loads. */
1659 if (!flag_schedule_speculative_load)
1660 return 0;
1661 IS_LOAD_INSN (insn) = 1;
1662 switch (insn_class)
1664 case IFREE:
1665 return (1);
1666 case IRISKY:
1667 return 0;
1668 case PFREE_CANDIDATE:
1669 if (is_pfree (insn, bb_src, bb_trg))
1670 return 1;
1671 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1672 case PRISKY_CANDIDATE:
1673 if (!flag_schedule_speculative_load_dangerous
1674 || is_prisky (insn, bb_src, bb_trg))
1675 return 0;
1676 break;
1677 default:;
1680 return flag_schedule_speculative_load_dangerous;
1683 /* The number of insns from the current block scheduled so far. */
1684 static int sched_target_n_insns;
1685 /* The number of insns from the current block to be scheduled in total. */
1686 static int target_n_insns;
1687 /* The number of insns from the entire region scheduled so far. */
1688 static int sched_n_insns;
1689 /* Nonzero if the last scheduled insn was a jump. */
1690 static int last_was_jump;
1692 /* Implementations of the sched_info functions for region scheduling. */
1693 static void init_ready_list (struct ready_list *);
1694 static int can_schedule_ready_p (rtx);
1695 static int new_ready (rtx);
1696 static int schedule_more_p (void);
1697 static const char *rgn_print_insn (rtx, int);
1698 static int rgn_rank (rtx, rtx);
1699 static int contributes_to_priority (rtx, rtx);
1700 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1702 /* Return nonzero if there are more insns that should be scheduled. */
1704 static int
1705 schedule_more_p (void)
1707 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1710 /* Add all insns that are initially ready to the ready list READY. Called
1711 once before scheduling a set of insns. */
1713 static void
1714 init_ready_list (struct ready_list *ready)
1716 rtx prev_head = current_sched_info->prev_head;
1717 rtx next_tail = current_sched_info->next_tail;
1718 int bb_src;
1719 rtx insn;
1721 target_n_insns = 0;
1722 sched_target_n_insns = 0;
1723 sched_n_insns = 0;
1724 last_was_jump = 0;
1726 /* Print debugging information. */
1727 if (sched_verbose >= 5)
1728 debug_dependencies ();
1730 /* Prepare current target block info. */
1731 if (current_nr_blocks > 1)
1733 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1735 bblst_last = 0;
1736 /* bblst_table holds split blocks and update blocks for each block after
1737 the current one in the region. split blocks and update blocks are
1738 the TO blocks of region edges, so there can be at most rgn_nr_edges
1739 of them. */
1740 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1741 bblst_table = xmalloc (bblst_size * sizeof (int));
1743 bitlst_table_last = 0;
1744 bitlst_table = xmalloc (rgn_nr_edges * sizeof (int));
1746 compute_trg_info (target_bb);
1749 /* Initialize ready list with all 'ready' insns in target block.
1750 Count number of insns in the target block being scheduled. */
1751 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1753 if (INSN_DEP_COUNT (insn) == 0)
1755 ready_add (ready, insn);
1757 if (targetm.sched.adjust_priority)
1758 INSN_PRIORITY (insn) =
1759 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1761 target_n_insns++;
1764 /* Add to ready list all 'ready' insns in valid source blocks.
1765 For speculative insns, check-live, exception-free, and
1766 issue-delay. */
1767 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1768 if (IS_VALID (bb_src))
1770 rtx src_head;
1771 rtx src_next_tail;
1772 rtx tail, head;
1774 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1775 src_next_tail = NEXT_INSN (tail);
1776 src_head = head;
1778 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1780 if (! INSN_P (insn))
1781 continue;
1783 if (!CANT_MOVE (insn)
1784 && (!IS_SPECULATIVE_INSN (insn)
1785 || ((recog_memoized (insn) < 0
1786 || min_insn_conflict_delay (curr_state,
1787 insn, insn) <= 3)
1788 && check_live (insn, bb_src)
1789 && is_exception_free (insn, bb_src, target_bb))))
1790 if (INSN_DEP_COUNT (insn) == 0)
1792 ready_add (ready, insn);
1794 if (targetm.sched.adjust_priority)
1795 INSN_PRIORITY (insn) =
1796 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1802 /* Called after taking INSN from the ready list. Returns nonzero if this
1803 insn can be scheduled, nonzero if we should silently discard it. */
1805 static int
1806 can_schedule_ready_p (rtx insn)
1808 if (JUMP_P (insn))
1809 last_was_jump = 1;
1811 /* An interblock motion? */
1812 if (INSN_BB (insn) != target_bb)
1814 basic_block b1;
1816 if (IS_SPECULATIVE_INSN (insn))
1818 if (!check_live (insn, INSN_BB (insn)))
1819 return 0;
1820 update_live (insn, INSN_BB (insn));
1822 /* For speculative load, mark insns fed by it. */
1823 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1824 set_spec_fed (insn);
1826 nr_spec++;
1828 nr_inter++;
1830 /* Update source block boundaries. */
1831 b1 = BLOCK_FOR_INSN (insn);
1832 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1834 /* We moved all the insns in the basic block.
1835 Emit a note after the last insn and update the
1836 begin/end boundaries to point to the note. */
1837 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1838 BB_HEAD (b1) = note;
1839 BB_END (b1) = note;
1841 else if (insn == BB_END (b1))
1843 /* We took insns from the end of the basic block,
1844 so update the end of block boundary so that it
1845 points to the first insn we did not move. */
1846 BB_END (b1) = PREV_INSN (insn);
1848 else if (insn == BB_HEAD (b1))
1850 /* We took insns from the start of the basic block,
1851 so update the start of block boundary so that
1852 it points to the first insn we did not move. */
1853 BB_HEAD (b1) = NEXT_INSN (insn);
1856 else
1858 /* In block motion. */
1859 sched_target_n_insns++;
1861 sched_n_insns++;
1863 return 1;
1866 /* Called after INSN has all its dependencies resolved. Return nonzero
1867 if it should be moved to the ready list or the queue, or zero if we
1868 should silently discard it. */
1869 static int
1870 new_ready (rtx next)
1872 /* For speculative insns, before inserting to ready/queue,
1873 check live, exception-free, and issue-delay. */
1874 if (INSN_BB (next) != target_bb
1875 && (!IS_VALID (INSN_BB (next))
1876 || CANT_MOVE (next)
1877 || (IS_SPECULATIVE_INSN (next)
1878 && ((recog_memoized (next) >= 0
1879 && min_insn_conflict_delay (curr_state, next, next) > 3)
1880 || !check_live (next, INSN_BB (next))
1881 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1882 return 0;
1883 return 1;
1886 /* Return a string that contains the insn uid and optionally anything else
1887 necessary to identify this insn in an output. It's valid to use a
1888 static buffer for this. The ALIGNED parameter should cause the string
1889 to be formatted so that multiple output lines will line up nicely. */
1891 static const char *
1892 rgn_print_insn (rtx insn, int aligned)
1894 static char tmp[80];
1896 if (aligned)
1897 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1898 else
1900 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1901 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1902 else
1903 sprintf (tmp, "%d", INSN_UID (insn));
1905 return tmp;
1908 /* Compare priority of two insns. Return a positive number if the second
1909 insn is to be preferred for scheduling, and a negative one if the first
1910 is to be preferred. Zero if they are equally good. */
1912 static int
1913 rgn_rank (rtx insn1, rtx insn2)
1915 /* Some comparison make sense in interblock scheduling only. */
1916 if (INSN_BB (insn1) != INSN_BB (insn2))
1918 int spec_val, prob_val;
1920 /* Prefer an inblock motion on an interblock motion. */
1921 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1922 return 1;
1923 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1924 return -1;
1926 /* Prefer a useful motion on a speculative one. */
1927 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1928 if (spec_val)
1929 return spec_val;
1931 /* Prefer a more probable (speculative) insn. */
1932 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1933 if (prob_val)
1934 return prob_val;
1936 return 0;
1939 /* NEXT is an instruction that depends on INSN (a backward dependence);
1940 return nonzero if we should include this dependence in priority
1941 calculations. */
1943 static int
1944 contributes_to_priority (rtx next, rtx insn)
1946 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1949 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1950 conditionally set before INSN. Store the set of registers that
1951 must be considered as used by this jump in USED and that of
1952 registers that must be considered as set in SET. */
1954 static void
1955 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1956 regset cond_exec ATTRIBUTE_UNUSED,
1957 regset used ATTRIBUTE_UNUSED,
1958 regset set ATTRIBUTE_UNUSED)
1960 /* Nothing to do here, since we postprocess jumps in
1961 add_branch_dependences. */
1964 /* Used in schedule_insns to initialize current_sched_info for scheduling
1965 regions (or single basic blocks). */
1967 static struct sched_info region_sched_info =
1969 init_ready_list,
1970 can_schedule_ready_p,
1971 schedule_more_p,
1972 new_ready,
1973 rgn_rank,
1974 rgn_print_insn,
1975 contributes_to_priority,
1976 compute_jump_reg_dependencies,
1978 NULL, NULL,
1979 NULL, NULL,
1980 0, 0, 0
1983 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1985 static bool
1986 sets_likely_spilled (rtx pat)
1988 bool ret = false;
1989 note_stores (pat, sets_likely_spilled_1, &ret);
1990 return ret;
1993 static void
1994 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1996 bool *ret = (bool *) data;
1998 if (GET_CODE (pat) == SET
1999 && REG_P (x)
2000 && REGNO (x) < FIRST_PSEUDO_REGISTER
2001 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2002 *ret = true;
2005 /* Add dependences so that branches are scheduled to run last in their
2006 block. */
2008 static void
2009 add_branch_dependences (rtx head, rtx tail)
2011 rtx insn, last;
2013 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2014 that can throw exceptions, force them to remain in order at the end of
2015 the block by adding dependencies and giving the last a high priority.
2016 There may be notes present, and prev_head may also be a note.
2018 Branches must obviously remain at the end. Calls should remain at the
2019 end since moving them results in worse register allocation. Uses remain
2020 at the end to ensure proper register allocation.
2022 cc0 setters remain at the end because they can't be moved away from
2023 their cc0 user.
2025 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2026 are not moved before reload because we can wind up with register
2027 allocation failures. */
2029 insn = tail;
2030 last = 0;
2031 while (CALL_P (insn)
2032 || JUMP_P (insn)
2033 || (NONJUMP_INSN_P (insn)
2034 && (GET_CODE (PATTERN (insn)) == USE
2035 || GET_CODE (PATTERN (insn)) == CLOBBER
2036 || can_throw_internal (insn)
2037 #ifdef HAVE_cc0
2038 || sets_cc0_p (PATTERN (insn))
2039 #endif
2040 || (!reload_completed
2041 && sets_likely_spilled (PATTERN (insn)))))
2042 || NOTE_P (insn))
2044 if (!NOTE_P (insn))
2046 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2048 add_dependence (last, insn, REG_DEP_ANTI);
2049 INSN_REF_COUNT (insn)++;
2052 CANT_MOVE (insn) = 1;
2054 last = insn;
2057 /* Don't overrun the bounds of the basic block. */
2058 if (insn == head)
2059 break;
2061 insn = PREV_INSN (insn);
2064 /* Make sure these insns are scheduled last in their block. */
2065 insn = last;
2066 if (insn != 0)
2067 while (insn != head)
2069 insn = prev_nonnote_insn (insn);
2071 if (INSN_REF_COUNT (insn) != 0)
2072 continue;
2074 add_dependence (last, insn, REG_DEP_ANTI);
2075 INSN_REF_COUNT (insn) = 1;
2079 /* Data structures for the computation of data dependences in a regions. We
2080 keep one `deps' structure for every basic block. Before analyzing the
2081 data dependences for a bb, its variables are initialized as a function of
2082 the variables of its predecessors. When the analysis for a bb completes,
2083 we save the contents to the corresponding bb_deps[bb] variable. */
2085 static struct deps *bb_deps;
2087 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2089 static rtx
2090 concat_INSN_LIST (rtx copy, rtx old)
2092 rtx new = old;
2093 for (; copy ; copy = XEXP (copy, 1))
2094 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2095 return new;
2098 static void
2099 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2100 rtx *old_mems_p)
2102 rtx new_insns = *old_insns_p;
2103 rtx new_mems = *old_mems_p;
2105 while (copy_insns)
2107 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2108 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2109 copy_insns = XEXP (copy_insns, 1);
2110 copy_mems = XEXP (copy_mems, 1);
2113 *old_insns_p = new_insns;
2114 *old_mems_p = new_mems;
2117 /* After computing the dependencies for block BB, propagate the dependencies
2118 found in TMP_DEPS to the successors of the block. */
2119 static void
2120 propagate_deps (int bb, struct deps *pred_deps)
2122 int b = BB_TO_BLOCK (bb);
2123 int e, first_edge;
2125 /* bb's structures are inherited by its successors. */
2126 first_edge = e = OUT_EDGES (b);
2127 if (e > 0)
2130 int b_succ = TO_BLOCK (e);
2131 int bb_succ = BLOCK_TO_BB (b_succ);
2132 struct deps *succ_deps = bb_deps + bb_succ;
2133 int reg;
2135 /* Only bbs "below" bb, in the same region, are interesting. */
2136 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2137 || bb_succ <= bb)
2139 e = NEXT_OUT (e);
2140 continue;
2143 /* The reg_last lists are inherited by bb_succ. */
2144 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2146 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2147 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2149 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2150 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2151 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2152 succ_rl->clobbers);
2153 succ_rl->uses_length += pred_rl->uses_length;
2154 succ_rl->clobbers_length += pred_rl->clobbers_length;
2156 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2158 /* Mem read/write lists are inherited by bb_succ. */
2159 concat_insn_mem_list (pred_deps->pending_read_insns,
2160 pred_deps->pending_read_mems,
2161 &succ_deps->pending_read_insns,
2162 &succ_deps->pending_read_mems);
2163 concat_insn_mem_list (pred_deps->pending_write_insns,
2164 pred_deps->pending_write_mems,
2165 &succ_deps->pending_write_insns,
2166 &succ_deps->pending_write_mems);
2168 succ_deps->last_pending_memory_flush
2169 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2170 succ_deps->last_pending_memory_flush);
2172 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2173 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2175 /* last_function_call is inherited by bb_succ. */
2176 succ_deps->last_function_call
2177 = concat_INSN_LIST (pred_deps->last_function_call,
2178 succ_deps->last_function_call);
2180 /* sched_before_next_call is inherited by bb_succ. */
2181 succ_deps->sched_before_next_call
2182 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2183 succ_deps->sched_before_next_call);
2185 e = NEXT_OUT (e);
2187 while (e != first_edge);
2189 /* These lists should point to the right place, for correct
2190 freeing later. */
2191 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2192 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2193 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2194 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2196 /* Can't allow these to be freed twice. */
2197 pred_deps->pending_read_insns = 0;
2198 pred_deps->pending_read_mems = 0;
2199 pred_deps->pending_write_insns = 0;
2200 pred_deps->pending_write_mems = 0;
2203 /* Compute backward dependences inside bb. In a multiple blocks region:
2204 (1) a bb is analyzed after its predecessors, and (2) the lists in
2205 effect at the end of bb (after analyzing for bb) are inherited by
2206 bb's successors.
2208 Specifically for reg-reg data dependences, the block insns are
2209 scanned by sched_analyze () top-to-bottom. Two lists are
2210 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2211 and reg_last[].uses for register USEs.
2213 When analysis is completed for bb, we update for its successors:
2214 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2215 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2217 The mechanism for computing mem-mem data dependence is very
2218 similar, and the result is interblock dependences in the region. */
2220 static void
2221 compute_block_backward_dependences (int bb)
2223 rtx head, tail;
2224 struct deps tmp_deps;
2226 tmp_deps = bb_deps[bb];
2228 /* Do the analysis for this block. */
2229 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2230 sched_analyze (&tmp_deps, head, tail);
2231 add_branch_dependences (head, tail);
2233 if (current_nr_blocks > 1)
2234 propagate_deps (bb, &tmp_deps);
2236 /* Free up the INSN_LISTs. */
2237 free_deps (&tmp_deps);
2240 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2241 them to the unused_*_list variables, so that they can be reused. */
2243 static void
2244 free_pending_lists (void)
2246 int bb;
2248 for (bb = 0; bb < current_nr_blocks; bb++)
2250 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2251 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2252 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2253 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2257 /* Print dependences for debugging, callable from debugger. */
2259 void
2260 debug_dependencies (void)
2262 int bb;
2264 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2265 for (bb = 0; bb < current_nr_blocks; bb++)
2267 rtx head, tail;
2268 rtx next_tail;
2269 rtx insn;
2271 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2272 next_tail = NEXT_INSN (tail);
2273 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2274 BB_TO_BLOCK (bb), bb);
2276 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2277 "insn", "code", "bb", "dep", "prio", "cost",
2278 "reservation");
2279 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2280 "----", "----", "--", "---", "----", "----",
2281 "-----------");
2283 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2285 rtx link;
2287 if (! INSN_P (insn))
2289 int n;
2290 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2291 if (NOTE_P (insn))
2293 n = NOTE_LINE_NUMBER (insn);
2294 if (n < 0)
2295 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2296 else
2298 expanded_location xloc;
2299 NOTE_EXPANDED_LOCATION (xloc, insn);
2300 fprintf (sched_dump, "line %d, file %s\n",
2301 xloc.line, xloc.file);
2304 else
2305 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2306 continue;
2309 fprintf (sched_dump,
2310 ";; %s%5d%6d%6d%6d%6d%6d ",
2311 (SCHED_GROUP_P (insn) ? "+" : " "),
2312 INSN_UID (insn),
2313 INSN_CODE (insn),
2314 INSN_BB (insn),
2315 INSN_DEP_COUNT (insn),
2316 INSN_PRIORITY (insn),
2317 insn_cost (insn, 0, 0));
2319 if (recog_memoized (insn) < 0)
2320 fprintf (sched_dump, "nothing");
2321 else
2322 print_reservation (sched_dump, insn);
2324 fprintf (sched_dump, "\t: ");
2325 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2326 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2327 fprintf (sched_dump, "\n");
2330 fprintf (sched_dump, "\n");
2333 /* Returns true if all the basic blocks of the current region have
2334 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2335 static bool
2336 sched_is_disabled_for_current_region_p (void)
2338 int bb;
2340 for (bb = 0; bb < current_nr_blocks; bb++)
2341 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2342 return false;
2344 return true;
2347 /* Schedule a region. A region is either an inner loop, a loop-free
2348 subroutine, or a single basic block. Each bb in the region is
2349 scheduled after its flow predecessors. */
2351 static void
2352 schedule_region (int rgn)
2354 int bb;
2355 int rgn_n_insns = 0;
2356 int sched_rgn_n_insns = 0;
2358 /* Set variables for the current region. */
2359 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2360 current_blocks = RGN_BLOCKS (rgn);
2362 /* Don't schedule region that is marked by
2363 NOTE_DISABLE_SCHED_OF_BLOCK. */
2364 if (sched_is_disabled_for_current_region_p ())
2365 return;
2367 init_deps_global ();
2369 /* Initializations for region data dependence analysis. */
2370 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2371 for (bb = 0; bb < current_nr_blocks; bb++)
2372 init_deps (bb_deps + bb);
2374 /* Compute LOG_LINKS. */
2375 for (bb = 0; bb < current_nr_blocks; bb++)
2376 compute_block_backward_dependences (bb);
2378 /* Compute INSN_DEPEND. */
2379 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2381 rtx head, tail;
2382 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2384 compute_forward_dependences (head, tail);
2386 if (targetm.sched.dependencies_evaluation_hook)
2387 targetm.sched.dependencies_evaluation_hook (head, tail);
2391 /* Set priorities. */
2392 for (bb = 0; bb < current_nr_blocks; bb++)
2394 rtx head, tail;
2395 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2397 rgn_n_insns += set_priorities (head, tail);
2400 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2401 if (current_nr_blocks > 1)
2403 int i;
2405 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2407 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2408 sbitmap_vector_zero (dom, current_nr_blocks);
2409 /* Edge to bit. */
2410 rgn_nr_edges = 0;
2411 edge_to_bit = xmalloc (nr_edges * sizeof (int));
2412 for (i = 1; i < nr_edges; i++)
2413 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2414 EDGE_TO_BIT (i) = rgn_nr_edges++;
2415 rgn_edges = xmalloc (rgn_nr_edges * sizeof (int));
2417 rgn_nr_edges = 0;
2418 for (i = 1; i < nr_edges; i++)
2419 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2420 rgn_edges[rgn_nr_edges++] = i;
2422 /* Split edges. */
2423 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2424 sbitmap_vector_zero (pot_split, current_nr_blocks);
2425 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2426 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2428 /* Compute probabilities, dominators, split_edges. */
2429 for (bb = 0; bb < current_nr_blocks; bb++)
2430 compute_dom_prob_ps (bb);
2433 /* Now we can schedule all blocks. */
2434 for (bb = 0; bb < current_nr_blocks; bb++)
2436 rtx head, tail;
2437 int b = BB_TO_BLOCK (bb);
2439 get_block_head_tail (b, &head, &tail);
2441 if (no_real_insns_p (head, tail))
2442 continue;
2444 current_sched_info->prev_head = PREV_INSN (head);
2445 current_sched_info->next_tail = NEXT_INSN (tail);
2447 if (write_symbols != NO_DEBUG)
2449 save_line_notes (b, head, tail);
2450 rm_line_notes (head, tail);
2453 /* rm_other_notes only removes notes which are _inside_ the
2454 block---that is, it won't remove notes before the first real insn
2455 or after the last real insn of the block. So if the first insn
2456 has a REG_SAVE_NOTE which would otherwise be emitted before the
2457 insn, it is redundant with the note before the start of the
2458 block, and so we have to take it out. */
2459 if (INSN_P (head))
2461 rtx note;
2463 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2464 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2466 remove_note (head, note);
2467 note = XEXP (note, 1);
2468 remove_note (head, note);
2472 /* Remove remaining note insns from the block, save them in
2473 note_list. These notes are restored at the end of
2474 schedule_block (). */
2475 rm_other_notes (head, tail);
2477 target_bb = bb;
2479 current_sched_info->queue_must_finish_empty
2480 = current_nr_blocks > 1 && !flag_schedule_interblock;
2482 schedule_block (b, rgn_n_insns);
2483 sched_rgn_n_insns += sched_n_insns;
2485 /* Update target block boundaries. */
2486 if (head == BB_HEAD (BASIC_BLOCK (b)))
2487 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2488 if (tail == BB_END (BASIC_BLOCK (b)))
2489 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2491 /* Clean up. */
2492 if (current_nr_blocks > 1)
2494 free (candidate_table);
2495 free (bblst_table);
2496 free (bitlst_table);
2500 /* Sanity check: verify that all region insns were scheduled. */
2501 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2503 /* Restore line notes. */
2504 if (write_symbols != NO_DEBUG)
2506 for (bb = 0; bb < current_nr_blocks; bb++)
2508 rtx head, tail;
2509 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2510 restore_line_notes (head, tail);
2514 /* Done with this region. */
2515 free_pending_lists ();
2517 finish_deps_global ();
2519 free (bb_deps);
2521 if (current_nr_blocks > 1)
2523 free (prob);
2524 sbitmap_vector_free (dom);
2525 sbitmap_vector_free (pot_split);
2526 sbitmap_vector_free (ancestor_edges);
2527 free (edge_to_bit);
2528 free (rgn_edges);
2532 /* Indexed by region, holds the number of death notes found in that region.
2533 Used for consistency checks. */
2534 static int *deaths_in_region;
2536 /* Initialize data structures for region scheduling. */
2538 static void
2539 init_regions (void)
2541 sbitmap blocks;
2542 int rgn;
2544 nr_regions = 0;
2545 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2546 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2547 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2548 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2550 /* Compute regions for scheduling. */
2551 if (reload_completed
2552 || n_basic_blocks == 1
2553 || !flag_schedule_interblock)
2555 find_single_block_region ();
2557 else
2559 /* Verify that a 'good' control flow graph can be built. */
2560 if (is_cfg_nonregular ())
2562 find_single_block_region ();
2564 else
2566 struct edge_list *edge_list;
2568 /* The scheduler runs after estimate_probabilities; therefore, we
2569 can't blindly call back into find_basic_blocks since doing so
2570 could invalidate the branch probability info. We could,
2571 however, call cleanup_cfg. */
2572 edge_list = create_edge_list ();
2574 /* Compute the dominators and post dominators. */
2575 calculate_dominance_info (CDI_DOMINATORS);
2577 /* build_control_flow will return nonzero if it detects unreachable
2578 blocks or any other irregularity with the cfg which prevents
2579 cross block scheduling. */
2580 if (build_control_flow (edge_list) != 0)
2581 find_single_block_region ();
2582 else
2583 find_rgns (edge_list);
2585 if (sched_verbose >= 3)
2586 debug_regions ();
2588 /* We are done with flow's edge list. */
2589 free_edge_list (edge_list);
2591 /* For now. This will move as more and more of haifa is converted
2592 to using the cfg code in flow.c. */
2593 free_dominance_info (CDI_DOMINATORS);
2598 if (CHECK_DEAD_NOTES)
2600 blocks = sbitmap_alloc (last_basic_block);
2601 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2602 /* Remove all death notes from the subroutine. */
2603 for (rgn = 0; rgn < nr_regions; rgn++)
2605 int b;
2607 sbitmap_zero (blocks);
2608 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2609 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2611 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2613 sbitmap_free (blocks);
2615 else
2616 count_or_remove_death_notes (NULL, 1);
2619 /* The one entry point in this file. DUMP_FILE is the dump file for
2620 this pass. */
2622 void
2623 schedule_insns (FILE *dump_file)
2625 sbitmap large_region_blocks, blocks;
2626 int rgn;
2627 int any_large_regions;
2628 basic_block bb;
2630 /* Taking care of this degenerate case makes the rest of
2631 this code simpler. */
2632 if (n_basic_blocks == 0)
2633 return;
2635 nr_inter = 0;
2636 nr_spec = 0;
2638 sched_init (dump_file);
2640 init_regions ();
2642 current_sched_info = &region_sched_info;
2644 /* Schedule every region in the subroutine. */
2645 for (rgn = 0; rgn < nr_regions; rgn++)
2646 schedule_region (rgn);
2648 /* Update life analysis for the subroutine. Do single block regions
2649 first so that we can verify that live_at_start didn't change. Then
2650 do all other blocks. */
2651 /* ??? There is an outside possibility that update_life_info, or more
2652 to the point propagate_block, could get called with nonzero flags
2653 more than once for one basic block. This would be kinda bad if it
2654 were to happen, since REG_INFO would be accumulated twice for the
2655 block, and we'd have twice the REG_DEAD notes.
2657 I'm fairly certain that this _shouldn't_ happen, since I don't think
2658 that live_at_start should change at region heads. Not sure what the
2659 best way to test for this kind of thing... */
2661 allocate_reg_life_data ();
2662 compute_bb_for_insn ();
2664 any_large_regions = 0;
2665 large_region_blocks = sbitmap_alloc (last_basic_block);
2666 sbitmap_zero (large_region_blocks);
2667 FOR_EACH_BB (bb)
2668 SET_BIT (large_region_blocks, bb->index);
2670 blocks = sbitmap_alloc (last_basic_block);
2671 sbitmap_zero (blocks);
2673 /* Update life information. For regions consisting of multiple blocks
2674 we've possibly done interblock scheduling that affects global liveness.
2675 For regions consisting of single blocks we need to do only local
2676 liveness. */
2677 for (rgn = 0; rgn < nr_regions; rgn++)
2678 if (RGN_NR_BLOCKS (rgn) > 1)
2679 any_large_regions = 1;
2680 else
2682 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2683 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2686 /* Don't update reg info after reload, since that affects
2687 regs_ever_live, which should not change after reload. */
2688 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2689 (reload_completed ? PROP_DEATH_NOTES
2690 : PROP_DEATH_NOTES | PROP_REG_INFO));
2691 if (any_large_regions)
2693 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2694 PROP_DEATH_NOTES | PROP_REG_INFO);
2697 if (CHECK_DEAD_NOTES)
2699 /* Verify the counts of basic block notes in single the basic block
2700 regions. */
2701 for (rgn = 0; rgn < nr_regions; rgn++)
2702 if (RGN_NR_BLOCKS (rgn) == 1)
2704 sbitmap_zero (blocks);
2705 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2707 gcc_assert (deaths_in_region[rgn]
2708 == count_or_remove_death_notes (blocks, 0));
2710 free (deaths_in_region);
2713 /* Reposition the prologue and epilogue notes in case we moved the
2714 prologue/epilogue insns. */
2715 if (reload_completed)
2716 reposition_prologue_and_epilogue_notes (get_insns ());
2718 /* Delete redundant line notes. */
2719 if (write_symbols != NO_DEBUG)
2720 rm_redundant_line_notes ();
2722 if (sched_verbose)
2724 if (reload_completed == 0 && flag_schedule_interblock)
2726 fprintf (sched_dump,
2727 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2728 nr_inter, nr_spec);
2730 else
2731 gcc_assert (nr_inter <= 0);
2732 fprintf (sched_dump, "\n\n");
2735 /* Clean up. */
2736 free (rgn_table);
2737 free (rgn_bb_table);
2738 free (block_to_bb);
2739 free (containing_rgn);
2741 sched_finish ();
2743 if (edge_table)
2745 free (edge_table);
2746 edge_table = NULL;
2749 if (in_edges)
2751 free (in_edges);
2752 in_edges = NULL;
2754 if (out_edges)
2756 free (out_edges);
2757 out_edges = NULL;
2760 sbitmap_free (blocks);
2761 sbitmap_free (large_region_blocks);
2763 #endif