* decl.c (grok_declarator): Remove a redundant semicolon.
[official-gcc.git] / gcc / sched-rgn.c
blobf2e5773e797ba61ee461d7708cc3e44c5b32e592
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "basic-block.h"
57 #include "regs.h"
58 #include "function.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "recog.h"
65 #include "cfglayout.h"
66 #include "params.h"
67 #include "sched-int.h"
68 #include "target.h"
70 /* Define when we want to do count REG_DEAD notes before and after scheduling
71 for sanity checking. We can't do that when conditional execution is used,
72 as REG_DEAD exist only for unconditional deaths. */
74 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
75 #define CHECK_DEAD_NOTES 1
76 #else
77 #define CHECK_DEAD_NOTES 0
78 #endif
81 #ifdef INSN_SCHEDULING
82 /* Some accessor macros for h_i_d members only used within this file. */
83 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
84 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
85 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
87 /* nr_inter/spec counts interblock/speculative motion for the function. */
88 static int nr_inter, nr_spec;
90 /* Control flow graph edges are kept in circular lists. */
91 typedef struct
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
98 haifa_edge;
99 static haifa_edge *edge_table;
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
106 /* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108 static int nr_edges;
110 /* Circular list of incoming/outgoing edges of a block. */
111 static int *in_edges;
112 static int *out_edges;
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
117 static int is_cfg_nonregular (void);
118 static int build_control_flow (struct edge_list *);
119 static void new_edge (int, int);
120 static bool sched_is_disabled_for_current_region_p (void);
122 /* A region is the main entity for interblock scheduling: insns
123 are allowed to move between blocks in the same region, along
124 control flow graph edges, in the 'up' direction. */
125 typedef struct
127 int rgn_nr_blocks; /* Number of blocks in region. */
128 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
130 region;
132 /* Number of regions in the procedure. */
133 static int nr_regions;
135 /* Table of region descriptions. */
136 static region *rgn_table;
138 /* Array of lists of regions' blocks. */
139 static int *rgn_bb_table;
141 /* Topological order of blocks in the region (if b2 is reachable from
142 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
143 always referred to by either block or b, while its topological
144 order name (in the region) is referred to by bb. */
145 static int *block_to_bb;
147 /* The number of the region containing a block. */
148 static int *containing_rgn;
150 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
151 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
152 #define BLOCK_TO_BB(block) (block_to_bb[block])
153 #define CONTAINING_RGN(block) (containing_rgn[block])
155 void debug_regions (void);
156 static void find_single_block_region (void);
157 static void find_rgns (struct edge_list *);
158 static bool too_large (int, int *, int *);
160 extern void debug_live (int, int);
162 /* Blocks of the current region being scheduled. */
163 static int current_nr_blocks;
164 static int current_blocks;
166 /* The mapping from bb to block. */
167 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
169 typedef struct
171 int *first_member; /* Pointer to the list start in bitlst_table. */
172 int nr_members; /* The number of members of the bit list. */
174 bitlst;
176 static int bitlst_table_last;
177 static int *bitlst_table;
179 static void extract_bitlst (sbitmap, bitlst *);
181 /* Target info declarations.
183 The block currently being scheduled is referred to as the "target" block,
184 while other blocks in the region from which insns can be moved to the
185 target are called "source" blocks. The candidate structure holds info
186 about such sources: are they valid? Speculative? Etc. */
187 typedef bitlst bblst;
188 typedef struct
190 char is_valid;
191 char is_speculative;
192 int src_prob;
193 bblst split_bbs;
194 bblst update_bbs;
196 candidate;
198 static candidate *candidate_table;
200 /* A speculative motion requires checking live information on the path
201 from 'source' to 'target'. The split blocks are those to be checked.
202 After a speculative motion, live information should be modified in
203 the 'update' blocks.
205 Lists of split and update blocks for each candidate of the current
206 target are in array bblst_table. */
207 static int *bblst_table, bblst_size, bblst_last;
209 #define IS_VALID(src) ( candidate_table[src].is_valid )
210 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
211 #define SRC_PROB(src) ( candidate_table[src].src_prob )
213 /* The bb being currently scheduled. */
214 static int target_bb;
216 /* List of edges. */
217 typedef bitlst edgelst;
219 /* Target info functions. */
220 static void split_edges (int, int, edgelst *);
221 static void compute_trg_info (int);
222 void debug_candidate (int);
223 void debug_candidates (int);
225 /* Dominators array: dom[i] contains the sbitmap of dominators of
226 bb i in the region. */
227 static sbitmap *dom;
229 /* bb 0 is the only region entry. */
230 #define IS_RGN_ENTRY(bb) (!bb)
232 /* Is bb_src dominated by bb_trg. */
233 #define IS_DOMINATED(bb_src, bb_trg) \
234 ( TEST_BIT (dom[bb_src], bb_trg) )
236 /* Probability: Prob[i] is a float in [0, 1] which is the probability
237 of bb i relative to the region entry. */
238 static float *prob;
240 /* The probability of bb_src, relative to bb_trg. Note, that while the
241 'prob[bb]' is a float in [0, 1], this macro returns an integer
242 in [0, 100]. */
243 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
244 prob[bb_trg])))
246 /* Bit-set of edges, where bit i stands for edge i. */
247 typedef sbitmap edgeset;
249 /* Number of edges in the region. */
250 static int rgn_nr_edges;
252 /* Array of size rgn_nr_edges. */
253 static int *rgn_edges;
256 /* Mapping from each edge in the graph to its number in the rgn. */
257 static int *edge_to_bit;
258 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
260 /* The split edges of a source bb is different for each target
261 bb. In order to compute this efficiently, the 'potential-split edges'
262 are computed for each bb prior to scheduling a region. This is actually
263 the split edges of each bb relative to the region entry.
265 pot_split[bb] is the set of potential split edges of bb. */
266 static edgeset *pot_split;
268 /* For every bb, a set of its ancestor edges. */
269 static edgeset *ancestor_edges;
271 static void compute_dom_prob_ps (int);
273 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
277 /* Parameters affecting the decision of rank_for_schedule().
278 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
279 #define MIN_PROBABILITY 40
281 /* Speculative scheduling functions. */
282 static int check_live_1 (int, rtx);
283 static void update_live_1 (int, rtx);
284 static int check_live (rtx, int);
285 static void update_live (rtx, int);
286 static void set_spec_fed (rtx);
287 static int is_pfree (rtx, int, int);
288 static int find_conditional_protection (rtx, int);
289 static int is_conditionally_protected (rtx, int, int);
290 static int is_prisky (rtx, int, int);
291 static int is_exception_free (rtx, int, int);
293 static bool sets_likely_spilled (rtx);
294 static void sets_likely_spilled_1 (rtx, rtx, void *);
295 static void add_branch_dependences (rtx, rtx);
296 static void compute_block_backward_dependences (int);
297 void debug_dependencies (void);
299 static void init_regions (void);
300 static void schedule_region (int);
301 static rtx concat_INSN_LIST (rtx, rtx);
302 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
303 static void propagate_deps (int, struct deps *);
304 static void free_pending_lists (void);
306 /* Functions for construction of the control flow graph. */
308 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
310 We decide not to build the control flow graph if there is possibly more
311 than one entry to the function, if computed branches exist, of if we
312 have nonlocal gotos. */
314 static int
315 is_cfg_nonregular (void)
317 basic_block b;
318 rtx insn;
319 RTX_CODE code;
321 /* If we have a label that could be the target of a nonlocal goto, then
322 the cfg is not well structured. */
323 if (nonlocal_goto_handler_labels)
324 return 1;
326 /* If we have any forced labels, then the cfg is not well structured. */
327 if (forced_labels)
328 return 1;
330 /* If this function has a computed jump, then we consider the cfg
331 not well structured. */
332 if (current_function_has_computed_jump)
333 return 1;
335 /* If we have exception handlers, then we consider the cfg not well
336 structured. ?!? We should be able to handle this now that flow.c
337 computes an accurate cfg for EH. */
338 if (current_function_has_exception_handlers ())
339 return 1;
341 /* If we have non-jumping insns which refer to labels, then we consider
342 the cfg not well structured. */
343 /* Check for labels referred to other thn by jumps. */
344 FOR_EACH_BB (b)
345 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
347 code = GET_CODE (insn);
348 if (INSN_P (insn) && code != JUMP_INSN)
350 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
352 if (note
353 && ! (JUMP_P (NEXT_INSN (insn))
354 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
355 XEXP (note, 0))))
356 return 1;
359 if (insn == BB_END (b))
360 break;
363 /* All the tests passed. Consider the cfg well structured. */
364 return 0;
367 /* Build the control flow graph and set nr_edges.
369 Instead of trying to build a cfg ourselves, we rely on flow to
370 do it for us. Stamp out useless code (and bug) duplication.
372 Return nonzero if an irregularity in the cfg is found which would
373 prevent cross block scheduling. */
375 static int
376 build_control_flow (struct edge_list *edge_list)
378 int i, unreachable, num_edges;
379 basic_block b;
381 /* This already accounts for entry/exit edges. */
382 num_edges = NUM_EDGES (edge_list);
384 /* Unreachable loops with more than one basic block are detected
385 during the DFS traversal in find_rgns.
387 Unreachable loops with a single block are detected here. This
388 test is redundant with the one in find_rgns, but it's much
389 cheaper to go ahead and catch the trivial case here. */
390 unreachable = 0;
391 FOR_EACH_BB (b)
393 if (b->pred == NULL
394 || (b->pred->src == b
395 && b->pred->pred_next == NULL))
396 unreachable = 1;
399 /* ??? We can kill these soon. */
400 in_edges = xcalloc (last_basic_block, sizeof (int));
401 out_edges = xcalloc (last_basic_block, sizeof (int));
402 edge_table = xcalloc (num_edges, sizeof (haifa_edge));
404 nr_edges = 0;
405 for (i = 0; i < num_edges; i++)
407 edge e = INDEX_EDGE (edge_list, i);
409 if (e->dest != EXIT_BLOCK_PTR
410 && e->src != ENTRY_BLOCK_PTR)
411 new_edge (e->src->index, e->dest->index);
414 /* Increment by 1, since edge 0 is unused. */
415 nr_edges++;
417 return unreachable;
420 /* Record an edge in the control flow graph from SOURCE to TARGET.
422 In theory, this is redundant with the s_succs computed above, but
423 we have not converted all of haifa to use information from the
424 integer lists. */
426 static void
427 new_edge (int source, int target)
429 int e, next_edge;
430 int curr_edge, fst_edge;
432 /* Check for duplicates. */
433 fst_edge = curr_edge = OUT_EDGES (source);
434 while (curr_edge)
436 if (FROM_BLOCK (curr_edge) == source
437 && TO_BLOCK (curr_edge) == target)
439 return;
442 curr_edge = NEXT_OUT (curr_edge);
444 if (fst_edge == curr_edge)
445 break;
448 e = ++nr_edges;
450 FROM_BLOCK (e) = source;
451 TO_BLOCK (e) = target;
453 if (OUT_EDGES (source))
455 next_edge = NEXT_OUT (OUT_EDGES (source));
456 NEXT_OUT (OUT_EDGES (source)) = e;
457 NEXT_OUT (e) = next_edge;
459 else
461 OUT_EDGES (source) = e;
462 NEXT_OUT (e) = e;
465 if (IN_EDGES (target))
467 next_edge = NEXT_IN (IN_EDGES (target));
468 NEXT_IN (IN_EDGES (target)) = e;
469 NEXT_IN (e) = next_edge;
471 else
473 IN_EDGES (target) = e;
474 NEXT_IN (e) = e;
478 /* Translate a bit-set SET to a list BL of the bit-set members. */
480 static void
481 extract_bitlst (sbitmap set, bitlst *bl)
483 int i;
485 /* bblst table space is reused in each call to extract_bitlst. */
486 bitlst_table_last = 0;
488 bl->first_member = &bitlst_table[bitlst_table_last];
489 bl->nr_members = 0;
491 /* Iterate over each word in the bitset. */
492 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
494 bitlst_table[bitlst_table_last++] = i;
495 (bl->nr_members)++;
500 /* Functions for the construction of regions. */
502 /* Print the regions, for debugging purposes. Callable from debugger. */
504 void
505 debug_regions (void)
507 int rgn, bb;
509 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
510 for (rgn = 0; rgn < nr_regions; rgn++)
512 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
513 rgn_table[rgn].rgn_nr_blocks);
514 fprintf (sched_dump, ";;\tbb/block: ");
516 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
518 current_blocks = RGN_BLOCKS (rgn);
520 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
521 abort ();
523 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
526 fprintf (sched_dump, "\n\n");
530 /* Build a single block region for each basic block in the function.
531 This allows for using the same code for interblock and basic block
532 scheduling. */
534 static void
535 find_single_block_region (void)
537 basic_block bb;
539 nr_regions = 0;
541 FOR_EACH_BB (bb)
543 rgn_bb_table[nr_regions] = bb->index;
544 RGN_NR_BLOCKS (nr_regions) = 1;
545 RGN_BLOCKS (nr_regions) = nr_regions;
546 CONTAINING_RGN (bb->index) = nr_regions;
547 BLOCK_TO_BB (bb->index) = 0;
548 nr_regions++;
552 /* Update number of blocks and the estimate for number of insns
553 in the region. Return true if the region is "too large" for interblock
554 scheduling (compile time considerations). */
556 static bool
557 too_large (int block, int *num_bbs, int *num_insns)
559 (*num_bbs)++;
560 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
561 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
563 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
564 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
567 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
568 is still an inner loop. Put in max_hdr[blk] the header of the most inner
569 loop containing blk. */
570 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
572 if (max_hdr[blk] == -1) \
573 max_hdr[blk] = hdr; \
574 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
575 RESET_BIT (inner, hdr); \
576 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
578 RESET_BIT (inner,max_hdr[blk]); \
579 max_hdr[blk] = hdr; \
583 /* Find regions for interblock scheduling.
585 A region for scheduling can be:
587 * A loop-free procedure, or
589 * A reducible inner loop, or
591 * A basic block not contained in any other region.
593 ?!? In theory we could build other regions based on extended basic
594 blocks or reverse extended basic blocks. Is it worth the trouble?
596 Loop blocks that form a region are put into the region's block list
597 in topological order.
599 This procedure stores its results into the following global (ick) variables
601 * rgn_nr
602 * rgn_table
603 * rgn_bb_table
604 * block_to_bb
605 * containing region
607 We use dominator relationships to avoid making regions out of non-reducible
608 loops.
610 This procedure needs to be converted to work on pred/succ lists instead
611 of edge tables. That would simplify it somewhat. */
613 static void
614 find_rgns (struct edge_list *edge_list)
616 int *max_hdr, *dfs_nr, *stack, *degree;
617 char no_loops = 1;
618 int node, child, loop_head, i, head, tail;
619 int count = 0, sp, idx = 0;
620 int current_edge = out_edges[ENTRY_BLOCK_PTR->succ->dest->index];
621 int num_bbs, num_insns, unreachable;
622 int too_large_failure;
623 basic_block bb;
625 /* Note if an edge has been passed. */
626 sbitmap passed;
628 /* Note if a block is a natural loop header. */
629 sbitmap header;
631 /* Note if a block is a natural inner loop header. */
632 sbitmap inner;
634 /* Note if a block is in the block queue. */
635 sbitmap in_queue;
637 /* Note if a block is in the block queue. */
638 sbitmap in_stack;
640 int num_edges = NUM_EDGES (edge_list);
642 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
643 and a mapping from block to its loop header (if the block is contained
644 in a loop, else -1).
646 Store results in HEADER, INNER, and MAX_HDR respectively, these will
647 be used as inputs to the second traversal.
649 STACK, SP and DFS_NR are only used during the first traversal. */
651 /* Allocate and initialize variables for the first traversal. */
652 max_hdr = xmalloc (last_basic_block * sizeof (int));
653 dfs_nr = xcalloc (last_basic_block, sizeof (int));
654 stack = xmalloc (nr_edges * sizeof (int));
656 inner = sbitmap_alloc (last_basic_block);
657 sbitmap_ones (inner);
659 header = sbitmap_alloc (last_basic_block);
660 sbitmap_zero (header);
662 passed = sbitmap_alloc (nr_edges);
663 sbitmap_zero (passed);
665 in_queue = sbitmap_alloc (last_basic_block);
666 sbitmap_zero (in_queue);
668 in_stack = sbitmap_alloc (last_basic_block);
669 sbitmap_zero (in_stack);
671 for (i = 0; i < last_basic_block; i++)
672 max_hdr[i] = -1;
674 /* DFS traversal to find inner loops in the cfg. */
676 sp = -1;
677 while (1)
679 if (current_edge == 0 || TEST_BIT (passed, current_edge))
681 /* We have reached a leaf node or a node that was already
682 processed. Pop edges off the stack until we find
683 an edge that has not yet been processed. */
684 while (sp >= 0
685 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
687 /* Pop entry off the stack. */
688 current_edge = stack[sp--];
689 node = FROM_BLOCK (current_edge);
690 child = TO_BLOCK (current_edge);
691 RESET_BIT (in_stack, child);
692 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
693 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
694 current_edge = NEXT_OUT (current_edge);
697 /* See if have finished the DFS tree traversal. */
698 if (sp < 0 && TEST_BIT (passed, current_edge))
699 break;
701 /* Nope, continue the traversal with the popped node. */
702 continue;
705 /* Process a node. */
706 node = FROM_BLOCK (current_edge);
707 child = TO_BLOCK (current_edge);
708 SET_BIT (in_stack, node);
709 dfs_nr[node] = ++count;
711 /* If the successor is in the stack, then we've found a loop.
712 Mark the loop, if it is not a natural loop, then it will
713 be rejected during the second traversal. */
714 if (TEST_BIT (in_stack, child))
716 no_loops = 0;
717 SET_BIT (header, child);
718 UPDATE_LOOP_RELATIONS (node, child);
719 SET_BIT (passed, current_edge);
720 current_edge = NEXT_OUT (current_edge);
721 continue;
724 /* If the child was already visited, then there is no need to visit
725 it again. Just update the loop relationships and restart
726 with a new edge. */
727 if (dfs_nr[child])
729 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
730 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
731 SET_BIT (passed, current_edge);
732 current_edge = NEXT_OUT (current_edge);
733 continue;
736 /* Push an entry on the stack and continue DFS traversal. */
737 stack[++sp] = current_edge;
738 SET_BIT (passed, current_edge);
739 current_edge = OUT_EDGES (child);
741 /* This is temporary until haifa is converted to use rth's new
742 cfg routines which have true entry/exit blocks and the
743 appropriate edges from/to those blocks.
745 Generally we update dfs_nr for a node when we process its
746 out edge. However, if the node has no out edge then we will
747 not set dfs_nr for that node. This can confuse the scheduler
748 into thinking that we have unreachable blocks, which in turn
749 disables cross block scheduling.
751 So, if we have a node with no out edges, go ahead and mark it
752 as reachable now. */
753 if (current_edge == 0)
754 dfs_nr[child] = ++count;
757 /* Another check for unreachable blocks. The earlier test in
758 is_cfg_nonregular only finds unreachable blocks that do not
759 form a loop.
761 The DFS traversal will mark every block that is reachable from
762 the entry node by placing a nonzero value in dfs_nr. Thus if
763 dfs_nr is zero for any block, then it must be unreachable. */
764 unreachable = 0;
765 FOR_EACH_BB (bb)
766 if (dfs_nr[bb->index] == 0)
768 unreachable = 1;
769 break;
772 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
773 to hold degree counts. */
774 degree = dfs_nr;
776 FOR_EACH_BB (bb)
777 degree[bb->index] = 0;
778 for (i = 0; i < num_edges; i++)
780 edge e = INDEX_EDGE (edge_list, i);
782 if (e->dest != EXIT_BLOCK_PTR)
783 degree[e->dest->index]++;
786 /* Do not perform region scheduling if there are any unreachable
787 blocks. */
788 if (!unreachable)
790 int *queue;
792 if (no_loops)
793 SET_BIT (header, 0);
795 /* Second traversal:find reducible inner loops and topologically sort
796 block of each region. */
798 queue = xmalloc (n_basic_blocks * sizeof (int));
800 /* Find blocks which are inner loop headers. We still have non-reducible
801 loops to consider at this point. */
802 FOR_EACH_BB (bb)
804 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
806 edge e;
807 basic_block jbb;
809 /* Now check that the loop is reducible. We do this separate
810 from finding inner loops so that we do not find a reducible
811 loop which contains an inner non-reducible loop.
813 A simple way to find reducible/natural loops is to verify
814 that each block in the loop is dominated by the loop
815 header.
817 If there exists a block that is not dominated by the loop
818 header, then the block is reachable from outside the loop
819 and thus the loop is not a natural loop. */
820 FOR_EACH_BB (jbb)
822 /* First identify blocks in the loop, except for the loop
823 entry block. */
824 if (bb->index == max_hdr[jbb->index] && bb != jbb)
826 /* Now verify that the block is dominated by the loop
827 header. */
828 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
829 break;
833 /* If we exited the loop early, then I is the header of
834 a non-reducible loop and we should quit processing it
835 now. */
836 if (jbb != EXIT_BLOCK_PTR)
837 continue;
839 /* I is a header of an inner loop, or block 0 in a subroutine
840 with no loops at all. */
841 head = tail = -1;
842 too_large_failure = 0;
843 loop_head = max_hdr[bb->index];
845 /* Decrease degree of all I's successors for topological
846 ordering. */
847 for (e = bb->succ; e; e = e->succ_next)
848 if (e->dest != EXIT_BLOCK_PTR)
849 --degree[e->dest->index];
851 /* Estimate # insns, and count # blocks in the region. */
852 num_bbs = 1;
853 num_insns = (INSN_LUID (BB_END (bb))
854 - INSN_LUID (BB_HEAD (bb)));
856 /* Find all loop latches (blocks with back edges to the loop
857 header) or all the leaf blocks in the cfg has no loops.
859 Place those blocks into the queue. */
860 if (no_loops)
862 FOR_EACH_BB (jbb)
863 /* Leaf nodes have only a single successor which must
864 be EXIT_BLOCK. */
865 if (jbb->succ
866 && jbb->succ->dest == EXIT_BLOCK_PTR
867 && jbb->succ->succ_next == NULL)
869 queue[++tail] = jbb->index;
870 SET_BIT (in_queue, jbb->index);
872 if (too_large (jbb->index, &num_bbs, &num_insns))
874 too_large_failure = 1;
875 break;
879 else
881 edge e;
883 for (e = bb->pred; e; e = e->pred_next)
885 if (e->src == ENTRY_BLOCK_PTR)
886 continue;
888 node = e->src->index;
890 if (max_hdr[node] == loop_head && node != bb->index)
892 /* This is a loop latch. */
893 queue[++tail] = node;
894 SET_BIT (in_queue, node);
896 if (too_large (node, &num_bbs, &num_insns))
898 too_large_failure = 1;
899 break;
905 /* Now add all the blocks in the loop to the queue.
907 We know the loop is a natural loop; however the algorithm
908 above will not always mark certain blocks as being in the
909 loop. Consider:
910 node children
911 a b,c
913 c a,d
916 The algorithm in the DFS traversal may not mark B & D as part
917 of the loop (ie they will not have max_hdr set to A).
919 We know they can not be loop latches (else they would have
920 had max_hdr set since they'd have a backedge to a dominator
921 block). So we don't need them on the initial queue.
923 We know they are part of the loop because they are dominated
924 by the loop header and can be reached by a backwards walk of
925 the edges starting with nodes on the initial queue.
927 It is safe and desirable to include those nodes in the
928 loop/scheduling region. To do so we would need to decrease
929 the degree of a node if it is the target of a backedge
930 within the loop itself as the node is placed in the queue.
932 We do not do this because I'm not sure that the actual
933 scheduling code will properly handle this case. ?!? */
935 while (head < tail && !too_large_failure)
937 edge e;
938 child = queue[++head];
940 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
942 node = e->src->index;
944 /* See discussion above about nodes not marked as in
945 this loop during the initial DFS traversal. */
946 if (e->src == ENTRY_BLOCK_PTR
947 || max_hdr[node] != loop_head)
949 tail = -1;
950 break;
952 else if (!TEST_BIT (in_queue, node) && node != bb->index)
954 queue[++tail] = node;
955 SET_BIT (in_queue, node);
957 if (too_large (node, &num_bbs, &num_insns))
959 too_large_failure = 1;
960 break;
966 if (tail >= 0 && !too_large_failure)
968 /* Place the loop header into list of region blocks. */
969 degree[bb->index] = -1;
970 rgn_bb_table[idx] = bb->index;
971 RGN_NR_BLOCKS (nr_regions) = num_bbs;
972 RGN_BLOCKS (nr_regions) = idx++;
973 CONTAINING_RGN (bb->index) = nr_regions;
974 BLOCK_TO_BB (bb->index) = count = 0;
976 /* Remove blocks from queue[] when their in degree
977 becomes zero. Repeat until no blocks are left on the
978 list. This produces a topological list of blocks in
979 the region. */
980 while (tail >= 0)
982 if (head < 0)
983 head = tail;
984 child = queue[head];
985 if (degree[child] == 0)
987 edge e;
989 degree[child] = -1;
990 rgn_bb_table[idx++] = child;
991 BLOCK_TO_BB (child) = ++count;
992 CONTAINING_RGN (child) = nr_regions;
993 queue[head] = queue[tail--];
995 for (e = BASIC_BLOCK (child)->succ;
997 e = e->succ_next)
998 if (e->dest != EXIT_BLOCK_PTR)
999 --degree[e->dest->index];
1001 else
1002 --head;
1004 ++nr_regions;
1008 free (queue);
1011 /* Any block that did not end up in a region is placed into a region
1012 by itself. */
1013 FOR_EACH_BB (bb)
1014 if (degree[bb->index] >= 0)
1016 rgn_bb_table[idx] = bb->index;
1017 RGN_NR_BLOCKS (nr_regions) = 1;
1018 RGN_BLOCKS (nr_regions) = idx++;
1019 CONTAINING_RGN (bb->index) = nr_regions++;
1020 BLOCK_TO_BB (bb->index) = 0;
1023 free (max_hdr);
1024 free (dfs_nr);
1025 free (stack);
1026 sbitmap_free (passed);
1027 sbitmap_free (header);
1028 sbitmap_free (inner);
1029 sbitmap_free (in_queue);
1030 sbitmap_free (in_stack);
1033 /* Functions for regions scheduling information. */
1035 /* Compute dominators, probability, and potential-split-edges of bb.
1036 Assume that these values were already computed for bb's predecessors. */
1038 static void
1039 compute_dom_prob_ps (int bb)
1041 int nxt_in_edge, fst_in_edge, pred;
1042 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1044 prob[bb] = 0.0;
1045 if (IS_RGN_ENTRY (bb))
1047 SET_BIT (dom[bb], 0);
1048 prob[bb] = 1.0;
1049 return;
1052 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1054 /* Initialize dom[bb] to '111..1'. */
1055 sbitmap_ones (dom[bb]);
1059 pred = FROM_BLOCK (nxt_in_edge);
1060 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1061 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1063 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1065 nr_out_edges = 1;
1066 nr_rgn_out_edges = 0;
1067 fst_out_edge = OUT_EDGES (pred);
1068 nxt_out_edge = NEXT_OUT (fst_out_edge);
1070 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1072 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1074 /* The successor doesn't belong in the region? */
1075 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1076 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1077 ++nr_rgn_out_edges;
1079 while (fst_out_edge != nxt_out_edge)
1081 ++nr_out_edges;
1082 /* The successor doesn't belong in the region? */
1083 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1084 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1085 ++nr_rgn_out_edges;
1086 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1087 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1091 /* Now nr_rgn_out_edges is the number of region-exit edges from
1092 pred, and nr_out_edges will be the number of pred out edges
1093 not leaving the region. */
1094 nr_out_edges -= nr_rgn_out_edges;
1095 if (nr_rgn_out_edges > 0)
1096 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1097 else
1098 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1099 nxt_in_edge = NEXT_IN (nxt_in_edge);
1101 while (fst_in_edge != nxt_in_edge);
1103 SET_BIT (dom[bb], bb);
1104 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1106 if (sched_verbose >= 2)
1107 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1108 (int) (100.0 * prob[bb]));
1111 /* Functions for target info. */
1113 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1114 Note that bb_trg dominates bb_src. */
1116 static void
1117 split_edges (int bb_src, int bb_trg, edgelst *bl)
1119 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1120 sbitmap_copy (src, pot_split[bb_src]);
1122 sbitmap_difference (src, src, pot_split[bb_trg]);
1123 extract_bitlst (src, bl);
1124 sbitmap_free (src);
1127 /* Find the valid candidate-source-blocks for the target block TRG, compute
1128 their probability, and check if they are speculative or not.
1129 For speculative sources, compute their update-blocks and split-blocks. */
1131 static void
1132 compute_trg_info (int trg)
1134 candidate *sp;
1135 edgelst el;
1136 int check_block, update_idx;
1137 int i, j, k, fst_edge, nxt_edge;
1139 /* Define some of the fields for the target bb as well. */
1140 sp = candidate_table + trg;
1141 sp->is_valid = 1;
1142 sp->is_speculative = 0;
1143 sp->src_prob = 100;
1145 for (i = trg + 1; i < current_nr_blocks; i++)
1147 sp = candidate_table + i;
1149 sp->is_valid = IS_DOMINATED (i, trg);
1150 if (sp->is_valid)
1152 sp->src_prob = GET_SRC_PROB (i, trg);
1153 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1156 if (sp->is_valid)
1158 split_edges (i, trg, &el);
1159 sp->is_speculative = (el.nr_members) ? 1 : 0;
1160 if (sp->is_speculative && !flag_schedule_speculative)
1161 sp->is_valid = 0;
1164 if (sp->is_valid)
1166 char *update_blocks;
1168 /* Compute split blocks and store them in bblst_table.
1169 The TO block of every split edge is a split block. */
1170 sp->split_bbs.first_member = &bblst_table[bblst_last];
1171 sp->split_bbs.nr_members = el.nr_members;
1172 for (j = 0; j < el.nr_members; bblst_last++, j++)
1173 bblst_table[bblst_last] =
1174 TO_BLOCK (rgn_edges[el.first_member[j]]);
1175 sp->update_bbs.first_member = &bblst_table[bblst_last];
1177 /* Compute update blocks and store them in bblst_table.
1178 For every split edge, look at the FROM block, and check
1179 all out edges. For each out edge that is not a split edge,
1180 add the TO block to the update block list. This list can end
1181 up with a lot of duplicates. We need to weed them out to avoid
1182 overrunning the end of the bblst_table. */
1183 update_blocks = alloca (last_basic_block);
1184 memset (update_blocks, 0, last_basic_block);
1186 update_idx = 0;
1187 for (j = 0; j < el.nr_members; j++)
1189 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1190 fst_edge = nxt_edge = OUT_EDGES (check_block);
1193 if (! update_blocks[TO_BLOCK (nxt_edge)])
1195 for (k = 0; k < el.nr_members; k++)
1196 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1197 break;
1199 if (k >= el.nr_members)
1201 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1202 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1203 update_idx++;
1207 nxt_edge = NEXT_OUT (nxt_edge);
1209 while (fst_edge != nxt_edge);
1211 sp->update_bbs.nr_members = update_idx;
1213 /* Make sure we didn't overrun the end of bblst_table. */
1214 if (bblst_last > bblst_size)
1215 abort ();
1217 else
1219 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1221 sp->is_speculative = 0;
1222 sp->src_prob = 0;
1227 /* Print candidates info, for debugging purposes. Callable from debugger. */
1229 void
1230 debug_candidate (int i)
1232 if (!candidate_table[i].is_valid)
1233 return;
1235 if (candidate_table[i].is_speculative)
1237 int j;
1238 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1240 fprintf (sched_dump, "split path: ");
1241 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1243 int b = candidate_table[i].split_bbs.first_member[j];
1245 fprintf (sched_dump, " %d ", b);
1247 fprintf (sched_dump, "\n");
1249 fprintf (sched_dump, "update path: ");
1250 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1252 int b = candidate_table[i].update_bbs.first_member[j];
1254 fprintf (sched_dump, " %d ", b);
1256 fprintf (sched_dump, "\n");
1258 else
1260 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1264 /* Print candidates info, for debugging purposes. Callable from debugger. */
1266 void
1267 debug_candidates (int trg)
1269 int i;
1271 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1272 BB_TO_BLOCK (trg), trg);
1273 for (i = trg + 1; i < current_nr_blocks; i++)
1274 debug_candidate (i);
1277 /* Functions for speculative scheduling. */
1279 /* Return 0 if x is a set of a register alive in the beginning of one
1280 of the split-blocks of src, otherwise return 1. */
1282 static int
1283 check_live_1 (int src, rtx x)
1285 int i;
1286 int regno;
1287 rtx reg = SET_DEST (x);
1289 if (reg == 0)
1290 return 1;
1292 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1293 || GET_CODE (reg) == SIGN_EXTRACT
1294 || GET_CODE (reg) == STRICT_LOW_PART)
1295 reg = XEXP (reg, 0);
1297 if (GET_CODE (reg) == PARALLEL)
1299 int i;
1301 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1302 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1303 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1304 return 1;
1306 return 0;
1309 if (!REG_P (reg))
1310 return 1;
1312 regno = REGNO (reg);
1314 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1316 /* Global registers are assumed live. */
1317 return 0;
1319 else
1321 if (regno < FIRST_PSEUDO_REGISTER)
1323 /* Check for hard registers. */
1324 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1325 while (--j >= 0)
1327 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1329 int b = candidate_table[src].split_bbs.first_member[i];
1331 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1332 regno + j))
1334 return 0;
1339 else
1341 /* Check for pseudo registers. */
1342 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1344 int b = candidate_table[src].split_bbs.first_member[i];
1346 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1348 return 0;
1354 return 1;
1357 /* If x is a set of a register R, mark that R is alive in the beginning
1358 of every update-block of src. */
1360 static void
1361 update_live_1 (int src, rtx x)
1363 int i;
1364 int regno;
1365 rtx reg = SET_DEST (x);
1367 if (reg == 0)
1368 return;
1370 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1371 || GET_CODE (reg) == SIGN_EXTRACT
1372 || GET_CODE (reg) == STRICT_LOW_PART)
1373 reg = XEXP (reg, 0);
1375 if (GET_CODE (reg) == PARALLEL)
1377 int i;
1379 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1380 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1381 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1383 return;
1386 if (!REG_P (reg))
1387 return;
1389 /* Global registers are always live, so the code below does not apply
1390 to them. */
1392 regno = REGNO (reg);
1394 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1396 if (regno < FIRST_PSEUDO_REGISTER)
1398 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1399 while (--j >= 0)
1401 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1403 int b = candidate_table[src].update_bbs.first_member[i];
1405 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1406 regno + j);
1410 else
1412 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1414 int b = candidate_table[src].update_bbs.first_member[i];
1416 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1422 /* Return 1 if insn can be speculatively moved from block src to trg,
1423 otherwise return 0. Called before first insertion of insn to
1424 ready-list or before the scheduling. */
1426 static int
1427 check_live (rtx insn, int src)
1429 /* Find the registers set by instruction. */
1430 if (GET_CODE (PATTERN (insn)) == SET
1431 || GET_CODE (PATTERN (insn)) == CLOBBER)
1432 return check_live_1 (src, PATTERN (insn));
1433 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1435 int j;
1436 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1437 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1438 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1439 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1440 return 0;
1442 return 1;
1445 return 1;
1448 /* Update the live registers info after insn was moved speculatively from
1449 block src to trg. */
1451 static void
1452 update_live (rtx insn, int src)
1454 /* Find the registers set by instruction. */
1455 if (GET_CODE (PATTERN (insn)) == SET
1456 || GET_CODE (PATTERN (insn)) == CLOBBER)
1457 update_live_1 (src, PATTERN (insn));
1458 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1460 int j;
1461 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1462 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1463 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1464 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1468 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1469 #define IS_REACHABLE(bb_from, bb_to) \
1470 (bb_from == bb_to \
1471 || IS_RGN_ENTRY (bb_from) \
1472 || (TEST_BIT (ancestor_edges[bb_to], \
1473 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1475 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1477 static void
1478 set_spec_fed (rtx load_insn)
1480 rtx link;
1482 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1483 if (GET_MODE (link) == VOIDmode)
1484 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1485 } /* set_spec_fed */
1487 /* On the path from the insn to load_insn_bb, find a conditional
1488 branch depending on insn, that guards the speculative load. */
1490 static int
1491 find_conditional_protection (rtx insn, int load_insn_bb)
1493 rtx link;
1495 /* Iterate through DEF-USE forward dependences. */
1496 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1498 rtx next = XEXP (link, 0);
1499 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1500 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1501 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1502 && load_insn_bb != INSN_BB (next)
1503 && GET_MODE (link) == VOIDmode
1504 && (JUMP_P (next)
1505 || find_conditional_protection (next, load_insn_bb)))
1506 return 1;
1508 return 0;
1509 } /* find_conditional_protection */
1511 /* Returns 1 if the same insn1 that participates in the computation
1512 of load_insn's address is feeding a conditional branch that is
1513 guarding on load_insn. This is true if we find a the two DEF-USE
1514 chains:
1515 insn1 -> ... -> conditional-branch
1516 insn1 -> ... -> load_insn,
1517 and if a flow path exist:
1518 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1519 and if insn1 is on the path
1520 region-entry -> ... -> bb_trg -> ... load_insn.
1522 Locate insn1 by climbing on LOG_LINKS from load_insn.
1523 Locate the branch by following INSN_DEPEND from insn1. */
1525 static int
1526 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1528 rtx link;
1530 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1532 rtx insn1 = XEXP (link, 0);
1534 /* Must be a DEF-USE dependence upon non-branch. */
1535 if (GET_MODE (link) != VOIDmode
1536 || JUMP_P (insn1))
1537 continue;
1539 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1540 if (INSN_BB (insn1) == bb_src
1541 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1542 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1543 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1544 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1545 continue;
1547 /* Now search for the conditional-branch. */
1548 if (find_conditional_protection (insn1, bb_src))
1549 return 1;
1551 /* Recursive step: search another insn1, "above" current insn1. */
1552 return is_conditionally_protected (insn1, bb_src, bb_trg);
1555 /* The chain does not exist. */
1556 return 0;
1557 } /* is_conditionally_protected */
1559 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1560 load_insn can move speculatively from bb_src to bb_trg. All the
1561 following must hold:
1563 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1564 (2) load_insn and load1 have a def-use dependence upon
1565 the same insn 'insn1'.
1566 (3) either load2 is in bb_trg, or:
1567 - there's only one split-block, and
1568 - load1 is on the escape path, and
1570 From all these we can conclude that the two loads access memory
1571 addresses that differ at most by a constant, and hence if moving
1572 load_insn would cause an exception, it would have been caused by
1573 load2 anyhow. */
1575 static int
1576 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1578 rtx back_link;
1579 candidate *candp = candidate_table + bb_src;
1581 if (candp->split_bbs.nr_members != 1)
1582 /* Must have exactly one escape block. */
1583 return 0;
1585 for (back_link = LOG_LINKS (load_insn);
1586 back_link; back_link = XEXP (back_link, 1))
1588 rtx insn1 = XEXP (back_link, 0);
1590 if (GET_MODE (back_link) == VOIDmode)
1592 /* Found a DEF-USE dependence (insn1, load_insn). */
1593 rtx fore_link;
1595 for (fore_link = INSN_DEPEND (insn1);
1596 fore_link; fore_link = XEXP (fore_link, 1))
1598 rtx insn2 = XEXP (fore_link, 0);
1599 if (GET_MODE (fore_link) == VOIDmode)
1601 /* Found a DEF-USE dependence (insn1, insn2). */
1602 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1603 /* insn2 not guaranteed to be a 1 base reg load. */
1604 continue;
1606 if (INSN_BB (insn2) == bb_trg)
1607 /* insn2 is the similar load, in the target block. */
1608 return 1;
1610 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1611 /* insn2 is a similar load, in a split-block. */
1612 return 1;
1618 /* Couldn't find a similar load. */
1619 return 0;
1620 } /* is_pfree */
1622 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1623 a load moved speculatively, or if load_insn is protected by
1624 a compare on load_insn's address). */
1626 static int
1627 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1629 if (FED_BY_SPEC_LOAD (load_insn))
1630 return 1;
1632 if (LOG_LINKS (load_insn) == NULL)
1633 /* Dependence may 'hide' out of the region. */
1634 return 1;
1636 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1637 return 1;
1639 return 0;
1642 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1643 Return 1 if insn is exception-free (and the motion is valid)
1644 and 0 otherwise. */
1646 static int
1647 is_exception_free (rtx insn, int bb_src, int bb_trg)
1649 int insn_class = haifa_classify_insn (insn);
1651 /* Handle non-load insns. */
1652 switch (insn_class)
1654 case TRAP_FREE:
1655 return 1;
1656 case TRAP_RISKY:
1657 return 0;
1658 default:;
1661 /* Handle loads. */
1662 if (!flag_schedule_speculative_load)
1663 return 0;
1664 IS_LOAD_INSN (insn) = 1;
1665 switch (insn_class)
1667 case IFREE:
1668 return (1);
1669 case IRISKY:
1670 return 0;
1671 case PFREE_CANDIDATE:
1672 if (is_pfree (insn, bb_src, bb_trg))
1673 return 1;
1674 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1675 case PRISKY_CANDIDATE:
1676 if (!flag_schedule_speculative_load_dangerous
1677 || is_prisky (insn, bb_src, bb_trg))
1678 return 0;
1679 break;
1680 default:;
1683 return flag_schedule_speculative_load_dangerous;
1686 /* The number of insns from the current block scheduled so far. */
1687 static int sched_target_n_insns;
1688 /* The number of insns from the current block to be scheduled in total. */
1689 static int target_n_insns;
1690 /* The number of insns from the entire region scheduled so far. */
1691 static int sched_n_insns;
1692 /* Nonzero if the last scheduled insn was a jump. */
1693 static int last_was_jump;
1695 /* Implementations of the sched_info functions for region scheduling. */
1696 static void init_ready_list (struct ready_list *);
1697 static int can_schedule_ready_p (rtx);
1698 static int new_ready (rtx);
1699 static int schedule_more_p (void);
1700 static const char *rgn_print_insn (rtx, int);
1701 static int rgn_rank (rtx, rtx);
1702 static int contributes_to_priority (rtx, rtx);
1703 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1705 /* Return nonzero if there are more insns that should be scheduled. */
1707 static int
1708 schedule_more_p (void)
1710 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1713 /* Add all insns that are initially ready to the ready list READY. Called
1714 once before scheduling a set of insns. */
1716 static void
1717 init_ready_list (struct ready_list *ready)
1719 rtx prev_head = current_sched_info->prev_head;
1720 rtx next_tail = current_sched_info->next_tail;
1721 int bb_src;
1722 rtx insn;
1724 target_n_insns = 0;
1725 sched_target_n_insns = 0;
1726 sched_n_insns = 0;
1727 last_was_jump = 0;
1729 /* Print debugging information. */
1730 if (sched_verbose >= 5)
1731 debug_dependencies ();
1733 /* Prepare current target block info. */
1734 if (current_nr_blocks > 1)
1736 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1738 bblst_last = 0;
1739 /* bblst_table holds split blocks and update blocks for each block after
1740 the current one in the region. split blocks and update blocks are
1741 the TO blocks of region edges, so there can be at most rgn_nr_edges
1742 of them. */
1743 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1744 bblst_table = xmalloc (bblst_size * sizeof (int));
1746 bitlst_table_last = 0;
1747 bitlst_table = xmalloc (rgn_nr_edges * sizeof (int));
1749 compute_trg_info (target_bb);
1752 /* Initialize ready list with all 'ready' insns in target block.
1753 Count number of insns in the target block being scheduled. */
1754 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1756 if (INSN_DEP_COUNT (insn) == 0)
1758 ready_add (ready, insn);
1760 if (targetm.sched.adjust_priority)
1761 INSN_PRIORITY (insn) =
1762 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1764 target_n_insns++;
1767 /* Add to ready list all 'ready' insns in valid source blocks.
1768 For speculative insns, check-live, exception-free, and
1769 issue-delay. */
1770 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1771 if (IS_VALID (bb_src))
1773 rtx src_head;
1774 rtx src_next_tail;
1775 rtx tail, head;
1777 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1778 src_next_tail = NEXT_INSN (tail);
1779 src_head = head;
1781 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1783 if (! INSN_P (insn))
1784 continue;
1786 if (!CANT_MOVE (insn)
1787 && (!IS_SPECULATIVE_INSN (insn)
1788 || ((recog_memoized (insn) < 0
1789 || min_insn_conflict_delay (curr_state,
1790 insn, insn) <= 3)
1791 && check_live (insn, bb_src)
1792 && is_exception_free (insn, bb_src, target_bb))))
1793 if (INSN_DEP_COUNT (insn) == 0)
1795 ready_add (ready, insn);
1797 if (targetm.sched.adjust_priority)
1798 INSN_PRIORITY (insn) =
1799 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1805 /* Called after taking INSN from the ready list. Returns nonzero if this
1806 insn can be scheduled, nonzero if we should silently discard it. */
1808 static int
1809 can_schedule_ready_p (rtx insn)
1811 if (JUMP_P (insn))
1812 last_was_jump = 1;
1814 /* An interblock motion? */
1815 if (INSN_BB (insn) != target_bb)
1817 basic_block b1;
1819 if (IS_SPECULATIVE_INSN (insn))
1821 if (!check_live (insn, INSN_BB (insn)))
1822 return 0;
1823 update_live (insn, INSN_BB (insn));
1825 /* For speculative load, mark insns fed by it. */
1826 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1827 set_spec_fed (insn);
1829 nr_spec++;
1831 nr_inter++;
1833 /* Update source block boundaries. */
1834 b1 = BLOCK_FOR_INSN (insn);
1835 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1837 /* We moved all the insns in the basic block.
1838 Emit a note after the last insn and update the
1839 begin/end boundaries to point to the note. */
1840 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1841 BB_HEAD (b1) = note;
1842 BB_END (b1) = note;
1844 else if (insn == BB_END (b1))
1846 /* We took insns from the end of the basic block,
1847 so update the end of block boundary so that it
1848 points to the first insn we did not move. */
1849 BB_END (b1) = PREV_INSN (insn);
1851 else if (insn == BB_HEAD (b1))
1853 /* We took insns from the start of the basic block,
1854 so update the start of block boundary so that
1855 it points to the first insn we did not move. */
1856 BB_HEAD (b1) = NEXT_INSN (insn);
1859 else
1861 /* In block motion. */
1862 sched_target_n_insns++;
1864 sched_n_insns++;
1866 return 1;
1869 /* Called after INSN has all its dependencies resolved. Return nonzero
1870 if it should be moved to the ready list or the queue, or zero if we
1871 should silently discard it. */
1872 static int
1873 new_ready (rtx next)
1875 /* For speculative insns, before inserting to ready/queue,
1876 check live, exception-free, and issue-delay. */
1877 if (INSN_BB (next) != target_bb
1878 && (!IS_VALID (INSN_BB (next))
1879 || CANT_MOVE (next)
1880 || (IS_SPECULATIVE_INSN (next)
1881 && ((recog_memoized (next) >= 0
1882 && min_insn_conflict_delay (curr_state, next, next) > 3)
1883 || !check_live (next, INSN_BB (next))
1884 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1885 return 0;
1886 return 1;
1889 /* Return a string that contains the insn uid and optionally anything else
1890 necessary to identify this insn in an output. It's valid to use a
1891 static buffer for this. The ALIGNED parameter should cause the string
1892 to be formatted so that multiple output lines will line up nicely. */
1894 static const char *
1895 rgn_print_insn (rtx insn, int aligned)
1897 static char tmp[80];
1899 if (aligned)
1900 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1901 else
1903 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1904 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1905 else
1906 sprintf (tmp, "%d", INSN_UID (insn));
1908 return tmp;
1911 /* Compare priority of two insns. Return a positive number if the second
1912 insn is to be preferred for scheduling, and a negative one if the first
1913 is to be preferred. Zero if they are equally good. */
1915 static int
1916 rgn_rank (rtx insn1, rtx insn2)
1918 /* Some comparison make sense in interblock scheduling only. */
1919 if (INSN_BB (insn1) != INSN_BB (insn2))
1921 int spec_val, prob_val;
1923 /* Prefer an inblock motion on an interblock motion. */
1924 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1925 return 1;
1926 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1927 return -1;
1929 /* Prefer a useful motion on a speculative one. */
1930 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1931 if (spec_val)
1932 return spec_val;
1934 /* Prefer a more probable (speculative) insn. */
1935 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1936 if (prob_val)
1937 return prob_val;
1939 return 0;
1942 /* NEXT is an instruction that depends on INSN (a backward dependence);
1943 return nonzero if we should include this dependence in priority
1944 calculations. */
1946 static int
1947 contributes_to_priority (rtx next, rtx insn)
1949 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1952 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1953 conditionally set before INSN. Store the set of registers that
1954 must be considered as used by this jump in USED and that of
1955 registers that must be considered as set in SET. */
1957 static void
1958 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1959 regset cond_exec ATTRIBUTE_UNUSED,
1960 regset used ATTRIBUTE_UNUSED,
1961 regset set ATTRIBUTE_UNUSED)
1963 /* Nothing to do here, since we postprocess jumps in
1964 add_branch_dependences. */
1967 /* Used in schedule_insns to initialize current_sched_info for scheduling
1968 regions (or single basic blocks). */
1970 static struct sched_info region_sched_info =
1972 init_ready_list,
1973 can_schedule_ready_p,
1974 schedule_more_p,
1975 new_ready,
1976 rgn_rank,
1977 rgn_print_insn,
1978 contributes_to_priority,
1979 compute_jump_reg_dependencies,
1981 NULL, NULL,
1982 NULL, NULL,
1983 0, 0, 0
1986 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1988 static bool
1989 sets_likely_spilled (rtx pat)
1991 bool ret = false;
1992 note_stores (pat, sets_likely_spilled_1, &ret);
1993 return ret;
1996 static void
1997 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1999 bool *ret = (bool *) data;
2001 if (GET_CODE (pat) == SET
2002 && REG_P (x)
2003 && REGNO (x) < FIRST_PSEUDO_REGISTER
2004 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2005 *ret = true;
2008 /* Add dependences so that branches are scheduled to run last in their
2009 block. */
2011 static void
2012 add_branch_dependences (rtx head, rtx tail)
2014 rtx insn, last;
2016 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2017 that can throw exceptions, force them to remain in order at the end of
2018 the block by adding dependencies and giving the last a high priority.
2019 There may be notes present, and prev_head may also be a note.
2021 Branches must obviously remain at the end. Calls should remain at the
2022 end since moving them results in worse register allocation. Uses remain
2023 at the end to ensure proper register allocation.
2025 cc0 setters remain at the end because they can't be moved away from
2026 their cc0 user.
2028 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2029 are not moved before reload because we can wind up with register
2030 allocation failures. */
2032 insn = tail;
2033 last = 0;
2034 while (CALL_P (insn)
2035 || JUMP_P (insn)
2036 || (NONJUMP_INSN_P (insn)
2037 && (GET_CODE (PATTERN (insn)) == USE
2038 || GET_CODE (PATTERN (insn)) == CLOBBER
2039 || can_throw_internal (insn)
2040 #ifdef HAVE_cc0
2041 || sets_cc0_p (PATTERN (insn))
2042 #endif
2043 || (!reload_completed
2044 && sets_likely_spilled (PATTERN (insn)))))
2045 || NOTE_P (insn))
2047 if (!NOTE_P (insn))
2049 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2051 add_dependence (last, insn, REG_DEP_ANTI);
2052 INSN_REF_COUNT (insn)++;
2055 CANT_MOVE (insn) = 1;
2057 last = insn;
2060 /* Don't overrun the bounds of the basic block. */
2061 if (insn == head)
2062 break;
2064 insn = PREV_INSN (insn);
2067 /* Make sure these insns are scheduled last in their block. */
2068 insn = last;
2069 if (insn != 0)
2070 while (insn != head)
2072 insn = prev_nonnote_insn (insn);
2074 if (INSN_REF_COUNT (insn) != 0)
2075 continue;
2077 add_dependence (last, insn, REG_DEP_ANTI);
2078 INSN_REF_COUNT (insn) = 1;
2082 /* Data structures for the computation of data dependences in a regions. We
2083 keep one `deps' structure for every basic block. Before analyzing the
2084 data dependences for a bb, its variables are initialized as a function of
2085 the variables of its predecessors. When the analysis for a bb completes,
2086 we save the contents to the corresponding bb_deps[bb] variable. */
2088 static struct deps *bb_deps;
2090 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2092 static rtx
2093 concat_INSN_LIST (rtx copy, rtx old)
2095 rtx new = old;
2096 for (; copy ; copy = XEXP (copy, 1))
2097 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2098 return new;
2101 static void
2102 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2103 rtx *old_mems_p)
2105 rtx new_insns = *old_insns_p;
2106 rtx new_mems = *old_mems_p;
2108 while (copy_insns)
2110 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2111 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2112 copy_insns = XEXP (copy_insns, 1);
2113 copy_mems = XEXP (copy_mems, 1);
2116 *old_insns_p = new_insns;
2117 *old_mems_p = new_mems;
2120 /* After computing the dependencies for block BB, propagate the dependencies
2121 found in TMP_DEPS to the successors of the block. */
2122 static void
2123 propagate_deps (int bb, struct deps *pred_deps)
2125 int b = BB_TO_BLOCK (bb);
2126 int e, first_edge;
2128 /* bb's structures are inherited by its successors. */
2129 first_edge = e = OUT_EDGES (b);
2130 if (e > 0)
2133 int b_succ = TO_BLOCK (e);
2134 int bb_succ = BLOCK_TO_BB (b_succ);
2135 struct deps *succ_deps = bb_deps + bb_succ;
2136 int reg;
2138 /* Only bbs "below" bb, in the same region, are interesting. */
2139 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2140 || bb_succ <= bb)
2142 e = NEXT_OUT (e);
2143 continue;
2146 /* The reg_last lists are inherited by bb_succ. */
2147 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2149 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2150 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2152 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2153 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2154 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2155 succ_rl->clobbers);
2156 succ_rl->uses_length += pred_rl->uses_length;
2157 succ_rl->clobbers_length += pred_rl->clobbers_length;
2159 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2161 /* Mem read/write lists are inherited by bb_succ. */
2162 concat_insn_mem_list (pred_deps->pending_read_insns,
2163 pred_deps->pending_read_mems,
2164 &succ_deps->pending_read_insns,
2165 &succ_deps->pending_read_mems);
2166 concat_insn_mem_list (pred_deps->pending_write_insns,
2167 pred_deps->pending_write_mems,
2168 &succ_deps->pending_write_insns,
2169 &succ_deps->pending_write_mems);
2171 succ_deps->last_pending_memory_flush
2172 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2173 succ_deps->last_pending_memory_flush);
2175 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2176 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2178 /* last_function_call is inherited by bb_succ. */
2179 succ_deps->last_function_call
2180 = concat_INSN_LIST (pred_deps->last_function_call,
2181 succ_deps->last_function_call);
2183 /* sched_before_next_call is inherited by bb_succ. */
2184 succ_deps->sched_before_next_call
2185 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2186 succ_deps->sched_before_next_call);
2188 e = NEXT_OUT (e);
2190 while (e != first_edge);
2192 /* These lists should point to the right place, for correct
2193 freeing later. */
2194 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2195 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2196 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2197 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2199 /* Can't allow these to be freed twice. */
2200 pred_deps->pending_read_insns = 0;
2201 pred_deps->pending_read_mems = 0;
2202 pred_deps->pending_write_insns = 0;
2203 pred_deps->pending_write_mems = 0;
2206 /* Compute backward dependences inside bb. In a multiple blocks region:
2207 (1) a bb is analyzed after its predecessors, and (2) the lists in
2208 effect at the end of bb (after analyzing for bb) are inherited by
2209 bb's successors.
2211 Specifically for reg-reg data dependences, the block insns are
2212 scanned by sched_analyze () top-to-bottom. Two lists are
2213 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2214 and reg_last[].uses for register USEs.
2216 When analysis is completed for bb, we update for its successors:
2217 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2218 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2220 The mechanism for computing mem-mem data dependence is very
2221 similar, and the result is interblock dependences in the region. */
2223 static void
2224 compute_block_backward_dependences (int bb)
2226 rtx head, tail;
2227 struct deps tmp_deps;
2229 tmp_deps = bb_deps[bb];
2231 /* Do the analysis for this block. */
2232 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2233 sched_analyze (&tmp_deps, head, tail);
2234 add_branch_dependences (head, tail);
2236 if (current_nr_blocks > 1)
2237 propagate_deps (bb, &tmp_deps);
2239 /* Free up the INSN_LISTs. */
2240 free_deps (&tmp_deps);
2243 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2244 them to the unused_*_list variables, so that they can be reused. */
2246 static void
2247 free_pending_lists (void)
2249 int bb;
2251 for (bb = 0; bb < current_nr_blocks; bb++)
2253 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2254 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2255 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2256 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2260 /* Print dependences for debugging, callable from debugger. */
2262 void
2263 debug_dependencies (void)
2265 int bb;
2267 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2268 for (bb = 0; bb < current_nr_blocks; bb++)
2270 rtx head, tail;
2271 rtx next_tail;
2272 rtx insn;
2274 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2275 next_tail = NEXT_INSN (tail);
2276 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2277 BB_TO_BLOCK (bb), bb);
2279 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2280 "insn", "code", "bb", "dep", "prio", "cost",
2281 "reservation");
2282 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2283 "----", "----", "--", "---", "----", "----",
2284 "-----------");
2286 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2288 rtx link;
2290 if (! INSN_P (insn))
2292 int n;
2293 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2294 if (NOTE_P (insn))
2296 n = NOTE_LINE_NUMBER (insn);
2297 if (n < 0)
2298 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2299 else
2301 expanded_location xloc;
2302 NOTE_EXPANDED_LOCATION (xloc, insn);
2303 fprintf (sched_dump, "line %d, file %s\n",
2304 xloc.line, xloc.file);
2307 else
2308 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2309 continue;
2312 fprintf (sched_dump,
2313 ";; %s%5d%6d%6d%6d%6d%6d ",
2314 (SCHED_GROUP_P (insn) ? "+" : " "),
2315 INSN_UID (insn),
2316 INSN_CODE (insn),
2317 INSN_BB (insn),
2318 INSN_DEP_COUNT (insn),
2319 INSN_PRIORITY (insn),
2320 insn_cost (insn, 0, 0));
2322 if (recog_memoized (insn) < 0)
2323 fprintf (sched_dump, "nothing");
2324 else
2325 print_reservation (sched_dump, insn);
2327 fprintf (sched_dump, "\t: ");
2328 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2329 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2330 fprintf (sched_dump, "\n");
2333 fprintf (sched_dump, "\n");
2336 /* Returns true if all the basic blocks of the current region have
2337 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2338 static bool
2339 sched_is_disabled_for_current_region_p (void)
2341 int bb;
2343 for (bb = 0; bb < current_nr_blocks; bb++)
2344 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2345 return false;
2347 return true;
2350 /* Schedule a region. A region is either an inner loop, a loop-free
2351 subroutine, or a single basic block. Each bb in the region is
2352 scheduled after its flow predecessors. */
2354 static void
2355 schedule_region (int rgn)
2357 int bb;
2358 int rgn_n_insns = 0;
2359 int sched_rgn_n_insns = 0;
2361 /* Set variables for the current region. */
2362 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2363 current_blocks = RGN_BLOCKS (rgn);
2365 /* Don't schedule region that is marked by
2366 NOTE_DISABLE_SCHED_OF_BLOCK. */
2367 if (sched_is_disabled_for_current_region_p ())
2368 return;
2370 init_deps_global ();
2372 /* Initializations for region data dependence analysis. */
2373 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2374 for (bb = 0; bb < current_nr_blocks; bb++)
2375 init_deps (bb_deps + bb);
2377 /* Compute LOG_LINKS. */
2378 for (bb = 0; bb < current_nr_blocks; bb++)
2379 compute_block_backward_dependences (bb);
2381 /* Compute INSN_DEPEND. */
2382 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2384 rtx head, tail;
2385 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2387 compute_forward_dependences (head, tail);
2389 if (targetm.sched.dependencies_evaluation_hook)
2390 targetm.sched.dependencies_evaluation_hook (head, tail);
2394 /* Set priorities. */
2395 for (bb = 0; bb < current_nr_blocks; bb++)
2397 rtx head, tail;
2398 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2400 rgn_n_insns += set_priorities (head, tail);
2403 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2404 if (current_nr_blocks > 1)
2406 int i;
2408 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2410 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2411 sbitmap_vector_zero (dom, current_nr_blocks);
2412 /* Edge to bit. */
2413 rgn_nr_edges = 0;
2414 edge_to_bit = xmalloc (nr_edges * sizeof (int));
2415 for (i = 1; i < nr_edges; i++)
2416 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2417 EDGE_TO_BIT (i) = rgn_nr_edges++;
2418 rgn_edges = xmalloc (rgn_nr_edges * sizeof (int));
2420 rgn_nr_edges = 0;
2421 for (i = 1; i < nr_edges; i++)
2422 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2423 rgn_edges[rgn_nr_edges++] = i;
2425 /* Split edges. */
2426 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2427 sbitmap_vector_zero (pot_split, current_nr_blocks);
2428 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2429 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2431 /* Compute probabilities, dominators, split_edges. */
2432 for (bb = 0; bb < current_nr_blocks; bb++)
2433 compute_dom_prob_ps (bb);
2436 /* Now we can schedule all blocks. */
2437 for (bb = 0; bb < current_nr_blocks; bb++)
2439 rtx head, tail;
2440 int b = BB_TO_BLOCK (bb);
2442 get_block_head_tail (b, &head, &tail);
2444 if (no_real_insns_p (head, tail))
2445 continue;
2447 current_sched_info->prev_head = PREV_INSN (head);
2448 current_sched_info->next_tail = NEXT_INSN (tail);
2450 if (write_symbols != NO_DEBUG)
2452 save_line_notes (b, head, tail);
2453 rm_line_notes (head, tail);
2456 /* rm_other_notes only removes notes which are _inside_ the
2457 block---that is, it won't remove notes before the first real insn
2458 or after the last real insn of the block. So if the first insn
2459 has a REG_SAVE_NOTE which would otherwise be emitted before the
2460 insn, it is redundant with the note before the start of the
2461 block, and so we have to take it out. */
2462 if (INSN_P (head))
2464 rtx note;
2466 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2467 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2469 remove_note (head, note);
2470 note = XEXP (note, 1);
2471 remove_note (head, note);
2475 /* Remove remaining note insns from the block, save them in
2476 note_list. These notes are restored at the end of
2477 schedule_block (). */
2478 rm_other_notes (head, tail);
2480 target_bb = bb;
2482 current_sched_info->queue_must_finish_empty
2483 = current_nr_blocks > 1 && !flag_schedule_interblock;
2485 schedule_block (b, rgn_n_insns);
2486 sched_rgn_n_insns += sched_n_insns;
2488 /* Update target block boundaries. */
2489 if (head == BB_HEAD (BASIC_BLOCK (b)))
2490 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2491 if (tail == BB_END (BASIC_BLOCK (b)))
2492 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2494 /* Clean up. */
2495 if (current_nr_blocks > 1)
2497 free (candidate_table);
2498 free (bblst_table);
2499 free (bitlst_table);
2503 /* Sanity check: verify that all region insns were scheduled. */
2504 if (sched_rgn_n_insns != rgn_n_insns)
2505 abort ();
2507 /* Restore line notes. */
2508 if (write_symbols != NO_DEBUG)
2510 for (bb = 0; bb < current_nr_blocks; bb++)
2512 rtx head, tail;
2513 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2514 restore_line_notes (head, tail);
2518 /* Done with this region. */
2519 free_pending_lists ();
2521 finish_deps_global ();
2523 free (bb_deps);
2525 if (current_nr_blocks > 1)
2527 free (prob);
2528 sbitmap_vector_free (dom);
2529 sbitmap_vector_free (pot_split);
2530 sbitmap_vector_free (ancestor_edges);
2531 free (edge_to_bit);
2532 free (rgn_edges);
2536 /* Indexed by region, holds the number of death notes found in that region.
2537 Used for consistency checks. */
2538 static int *deaths_in_region;
2540 /* Initialize data structures for region scheduling. */
2542 static void
2543 init_regions (void)
2545 sbitmap blocks;
2546 int rgn;
2548 nr_regions = 0;
2549 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2550 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2551 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2552 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2554 /* Compute regions for scheduling. */
2555 if (reload_completed
2556 || n_basic_blocks == 1
2557 || !flag_schedule_interblock)
2559 find_single_block_region ();
2561 else
2563 /* Verify that a 'good' control flow graph can be built. */
2564 if (is_cfg_nonregular ())
2566 find_single_block_region ();
2568 else
2570 struct edge_list *edge_list;
2572 /* The scheduler runs after estimate_probabilities; therefore, we
2573 can't blindly call back into find_basic_blocks since doing so
2574 could invalidate the branch probability info. We could,
2575 however, call cleanup_cfg. */
2576 edge_list = create_edge_list ();
2578 /* Compute the dominators and post dominators. */
2579 calculate_dominance_info (CDI_DOMINATORS);
2581 /* build_control_flow will return nonzero if it detects unreachable
2582 blocks or any other irregularity with the cfg which prevents
2583 cross block scheduling. */
2584 if (build_control_flow (edge_list) != 0)
2585 find_single_block_region ();
2586 else
2587 find_rgns (edge_list);
2589 if (sched_verbose >= 3)
2590 debug_regions ();
2592 /* We are done with flow's edge list. */
2593 free_edge_list (edge_list);
2595 /* For now. This will move as more and more of haifa is converted
2596 to using the cfg code in flow.c. */
2597 free_dominance_info (CDI_DOMINATORS);
2602 if (CHECK_DEAD_NOTES)
2604 blocks = sbitmap_alloc (last_basic_block);
2605 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2606 /* Remove all death notes from the subroutine. */
2607 for (rgn = 0; rgn < nr_regions; rgn++)
2609 int b;
2611 sbitmap_zero (blocks);
2612 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2613 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2615 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2617 sbitmap_free (blocks);
2619 else
2620 count_or_remove_death_notes (NULL, 1);
2623 /* The one entry point in this file. DUMP_FILE is the dump file for
2624 this pass. */
2626 void
2627 schedule_insns (FILE *dump_file)
2629 sbitmap large_region_blocks, blocks;
2630 int rgn;
2631 int any_large_regions;
2632 basic_block bb;
2634 /* Taking care of this degenerate case makes the rest of
2635 this code simpler. */
2636 if (n_basic_blocks == 0)
2637 return;
2639 nr_inter = 0;
2640 nr_spec = 0;
2642 sched_init (dump_file);
2644 init_regions ();
2646 current_sched_info = &region_sched_info;
2648 /* Schedule every region in the subroutine. */
2649 for (rgn = 0; rgn < nr_regions; rgn++)
2650 schedule_region (rgn);
2652 /* Update life analysis for the subroutine. Do single block regions
2653 first so that we can verify that live_at_start didn't change. Then
2654 do all other blocks. */
2655 /* ??? There is an outside possibility that update_life_info, or more
2656 to the point propagate_block, could get called with nonzero flags
2657 more than once for one basic block. This would be kinda bad if it
2658 were to happen, since REG_INFO would be accumulated twice for the
2659 block, and we'd have twice the REG_DEAD notes.
2661 I'm fairly certain that this _shouldn't_ happen, since I don't think
2662 that live_at_start should change at region heads. Not sure what the
2663 best way to test for this kind of thing... */
2665 allocate_reg_life_data ();
2666 compute_bb_for_insn ();
2668 any_large_regions = 0;
2669 large_region_blocks = sbitmap_alloc (last_basic_block);
2670 sbitmap_zero (large_region_blocks);
2671 FOR_EACH_BB (bb)
2672 SET_BIT (large_region_blocks, bb->index);
2674 blocks = sbitmap_alloc (last_basic_block);
2675 sbitmap_zero (blocks);
2677 /* Update life information. For regions consisting of multiple blocks
2678 we've possibly done interblock scheduling that affects global liveness.
2679 For regions consisting of single blocks we need to do only local
2680 liveness. */
2681 for (rgn = 0; rgn < nr_regions; rgn++)
2682 if (RGN_NR_BLOCKS (rgn) > 1)
2683 any_large_regions = 1;
2684 else
2686 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2687 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2690 /* Don't update reg info after reload, since that affects
2691 regs_ever_live, which should not change after reload. */
2692 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2693 (reload_completed ? PROP_DEATH_NOTES
2694 : PROP_DEATH_NOTES | PROP_REG_INFO));
2695 if (any_large_regions)
2697 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2698 PROP_DEATH_NOTES | PROP_REG_INFO);
2701 if (CHECK_DEAD_NOTES)
2703 /* Verify the counts of basic block notes in single the basic block
2704 regions. */
2705 for (rgn = 0; rgn < nr_regions; rgn++)
2706 if (RGN_NR_BLOCKS (rgn) == 1)
2708 sbitmap_zero (blocks);
2709 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2711 if (deaths_in_region[rgn]
2712 != count_or_remove_death_notes (blocks, 0))
2713 abort ();
2715 free (deaths_in_region);
2718 /* Reposition the prologue and epilogue notes in case we moved the
2719 prologue/epilogue insns. */
2720 if (reload_completed)
2721 reposition_prologue_and_epilogue_notes (get_insns ());
2723 /* Delete redundant line notes. */
2724 if (write_symbols != NO_DEBUG)
2725 rm_redundant_line_notes ();
2727 if (sched_verbose)
2729 if (reload_completed == 0 && flag_schedule_interblock)
2731 fprintf (sched_dump,
2732 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2733 nr_inter, nr_spec);
2735 else
2737 if (nr_inter > 0)
2738 abort ();
2740 fprintf (sched_dump, "\n\n");
2743 /* Clean up. */
2744 free (rgn_table);
2745 free (rgn_bb_table);
2746 free (block_to_bb);
2747 free (containing_rgn);
2749 sched_finish ();
2751 if (edge_table)
2753 free (edge_table);
2754 edge_table = NULL;
2757 if (in_edges)
2759 free (in_edges);
2760 in_edges = NULL;
2762 if (out_edges)
2764 free (out_edges);
2765 out_edges = NULL;
2768 sbitmap_free (blocks);
2769 sbitmap_free (large_region_blocks);
2771 #endif