* predict.c (estimate_bb_frequencies): Correctly set
[official-gcc.git] / gcc / sched-rgn.c
blobc7e7f808ee93f284885e02aa152118000fb39016
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "basic-block.h"
57 #include "regs.h"
58 #include "function.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "recog.h"
65 #include "cfglayout.h"
66 #include "sched-int.h"
67 #include "target.h"
69 /* Define when we want to do count REG_DEAD notes before and after scheduling
70 for sanity checking. We can't do that when conditional execution is used,
71 as REG_DEAD exist only for unconditional deaths. */
73 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
74 #define CHECK_DEAD_NOTES 1
75 #else
76 #define CHECK_DEAD_NOTES 0
77 #endif
80 #ifdef INSN_SCHEDULING
81 /* Some accessor macros for h_i_d members only used within this file. */
82 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
83 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
84 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
86 #define MAX_RGN_BLOCKS 10
87 #define MAX_RGN_INSNS 100
89 /* nr_inter/spec counts interblock/speculative motion for the function. */
90 static int nr_inter, nr_spec;
92 /* Control flow graph edges are kept in circular lists. */
93 typedef struct
95 int from_block;
96 int to_block;
97 int next_in;
98 int next_out;
100 haifa_edge;
101 static haifa_edge *edge_table;
103 #define NEXT_IN(edge) (edge_table[edge].next_in)
104 #define NEXT_OUT(edge) (edge_table[edge].next_out)
105 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
106 #define TO_BLOCK(edge) (edge_table[edge].to_block)
108 /* Number of edges in the control flow graph. (In fact, larger than
109 that by 1, since edge 0 is unused.) */
110 static int nr_edges;
112 /* Circular list of incoming/outgoing edges of a block. */
113 static int *in_edges;
114 static int *out_edges;
116 #define IN_EDGES(block) (in_edges[block])
117 #define OUT_EDGES(block) (out_edges[block])
119 static int is_cfg_nonregular PARAMS ((void));
120 static int build_control_flow PARAMS ((struct edge_list *));
121 static void new_edge PARAMS ((int, int));
123 /* A region is the main entity for interblock scheduling: insns
124 are allowed to move between blocks in the same region, along
125 control flow graph edges, in the 'up' direction. */
126 typedef struct
128 int rgn_nr_blocks; /* Number of blocks in region. */
129 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
131 region;
133 /* Number of regions in the procedure. */
134 static int nr_regions;
136 /* Table of region descriptions. */
137 static region *rgn_table;
139 /* Array of lists of regions' blocks. */
140 static int *rgn_bb_table;
142 /* Topological order of blocks in the region (if b2 is reachable from
143 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
144 always referred to by either block or b, while its topological
145 order name (in the region) is refered to by bb. */
146 static int *block_to_bb;
148 /* The number of the region containing a block. */
149 static int *containing_rgn;
151 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
152 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
153 #define BLOCK_TO_BB(block) (block_to_bb[block])
154 #define CONTAINING_RGN(block) (containing_rgn[block])
156 void debug_regions PARAMS ((void));
157 static void find_single_block_region PARAMS ((void));
158 static void find_rgns PARAMS ((struct edge_list *, dominance_info));
159 static int too_large PARAMS ((int, int *, int *));
161 extern void debug_live PARAMS ((int, int));
163 /* Blocks of the current region being scheduled. */
164 static int current_nr_blocks;
165 static int current_blocks;
167 /* The mapping from bb to block. */
168 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
170 typedef struct
172 int *first_member; /* Pointer to the list start in bitlst_table. */
173 int nr_members; /* The number of members of the bit list. */
175 bitlst;
177 static int bitlst_table_last;
178 static int *bitlst_table;
180 static void extract_bitlst PARAMS ((sbitmap, bitlst *));
182 /* Target info declarations.
184 The block currently being scheduled is referred to as the "target" block,
185 while other blocks in the region from which insns can be moved to the
186 target are called "source" blocks. The candidate structure holds info
187 about such sources: are they valid? Speculative? Etc. */
188 typedef bitlst bblst;
189 typedef struct
191 char is_valid;
192 char is_speculative;
193 int src_prob;
194 bblst split_bbs;
195 bblst update_bbs;
197 candidate;
199 static candidate *candidate_table;
201 /* A speculative motion requires checking live information on the path
202 from 'source' to 'target'. The split blocks are those to be checked.
203 After a speculative motion, live information should be modified in
204 the 'update' blocks.
206 Lists of split and update blocks for each candidate of the current
207 target are in array bblst_table. */
208 static int *bblst_table, bblst_size, bblst_last;
210 #define IS_VALID(src) ( candidate_table[src].is_valid )
211 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
212 #define SRC_PROB(src) ( candidate_table[src].src_prob )
214 /* The bb being currently scheduled. */
215 static int target_bb;
217 /* List of edges. */
218 typedef bitlst edgelst;
220 /* Target info functions. */
221 static void split_edges PARAMS ((int, int, edgelst *));
222 static void compute_trg_info PARAMS ((int));
223 void debug_candidate PARAMS ((int));
224 void debug_candidates PARAMS ((int));
226 /* Dominators array: dom[i] contains the sbitmap of dominators of
227 bb i in the region. */
228 static sbitmap *dom;
230 /* bb 0 is the only region entry. */
231 #define IS_RGN_ENTRY(bb) (!bb)
233 /* Is bb_src dominated by bb_trg. */
234 #define IS_DOMINATED(bb_src, bb_trg) \
235 ( TEST_BIT (dom[bb_src], bb_trg) )
237 /* Probability: Prob[i] is a float in [0, 1] which is the probability
238 of bb i relative to the region entry. */
239 static float *prob;
241 /* The probability of bb_src, relative to bb_trg. Note, that while the
242 'prob[bb]' is a float in [0, 1], this macro returns an integer
243 in [0, 100]. */
244 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
245 prob[bb_trg])))
247 /* Bit-set of edges, where bit i stands for edge i. */
248 typedef sbitmap edgeset;
250 /* Number of edges in the region. */
251 static int rgn_nr_edges;
253 /* Array of size rgn_nr_edges. */
254 static int *rgn_edges;
257 /* Mapping from each edge in the graph to its number in the rgn. */
258 static int *edge_to_bit;
259 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
261 /* The split edges of a source bb is different for each target
262 bb. In order to compute this efficiently, the 'potential-split edges'
263 are computed for each bb prior to scheduling a region. This is actually
264 the split edges of each bb relative to the region entry.
266 pot_split[bb] is the set of potential split edges of bb. */
267 static edgeset *pot_split;
269 /* For every bb, a set of its ancestor edges. */
270 static edgeset *ancestor_edges;
272 static void compute_dom_prob_ps PARAMS ((int));
274 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
275 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
276 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
278 /* Parameters affecting the decision of rank_for_schedule().
279 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
280 #define MIN_PROBABILITY 40
282 /* Speculative scheduling functions. */
283 static int check_live_1 PARAMS ((int, rtx));
284 static void update_live_1 PARAMS ((int, rtx));
285 static int check_live PARAMS ((rtx, int));
286 static void update_live PARAMS ((rtx, int));
287 static void set_spec_fed PARAMS ((rtx));
288 static int is_pfree PARAMS ((rtx, int, int));
289 static int find_conditional_protection PARAMS ((rtx, int));
290 static int is_conditionally_protected PARAMS ((rtx, int, int));
291 static int is_prisky PARAMS ((rtx, int, int));
292 static int is_exception_free PARAMS ((rtx, int, int));
294 static bool sets_likely_spilled PARAMS ((rtx));
295 static void sets_likely_spilled_1 PARAMS ((rtx, rtx, void *));
296 static void add_branch_dependences PARAMS ((rtx, rtx));
297 static void compute_block_backward_dependences PARAMS ((int));
298 void debug_dependencies PARAMS ((void));
300 static void init_regions PARAMS ((void));
301 static void schedule_region PARAMS ((int));
302 static rtx concat_INSN_LIST PARAMS ((rtx, rtx));
303 static void concat_insn_mem_list PARAMS ((rtx, rtx, rtx *, rtx *));
304 static void propagate_deps PARAMS ((int, struct deps *));
305 static void free_pending_lists PARAMS ((void));
307 /* Functions for construction of the control flow graph. */
309 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
311 We decide not to build the control flow graph if there is possibly more
312 than one entry to the function, if computed branches exist, of if we
313 have nonlocal gotos. */
315 static int
316 is_cfg_nonregular ()
318 basic_block b;
319 rtx insn;
320 RTX_CODE code;
322 /* If we have a label that could be the target of a nonlocal goto, then
323 the cfg is not well structured. */
324 if (nonlocal_goto_handler_labels)
325 return 1;
327 /* If we have any forced labels, then the cfg is not well structured. */
328 if (forced_labels)
329 return 1;
331 /* If this function has a computed jump, then we consider the cfg
332 not well structured. */
333 if (current_function_has_computed_jump)
334 return 1;
336 /* If we have exception handlers, then we consider the cfg not well
337 structured. ?!? We should be able to handle this now that flow.c
338 computes an accurate cfg for EH. */
339 if (current_function_has_exception_handlers ())
340 return 1;
342 /* If we have non-jumping insns which refer to labels, then we consider
343 the cfg not well structured. */
344 /* Check for labels referred to other thn by jumps. */
345 FOR_EACH_BB (b)
346 for (insn = b->head;; insn = NEXT_INSN (insn))
348 code = GET_CODE (insn);
349 if (GET_RTX_CLASS (code) == 'i' && code != JUMP_INSN)
351 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
353 if (note
354 && ! (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN
355 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
356 XEXP (note, 0))))
357 return 1;
360 if (insn == b->end)
361 break;
364 /* All the tests passed. Consider the cfg well structured. */
365 return 0;
368 /* Build the control flow graph and set nr_edges.
370 Instead of trying to build a cfg ourselves, we rely on flow to
371 do it for us. Stamp out useless code (and bug) duplication.
373 Return nonzero if an irregularity in the cfg is found which would
374 prevent cross block scheduling. */
376 static int
377 build_control_flow (edge_list)
378 struct edge_list *edge_list;
380 int i, unreachable, num_edges;
381 basic_block b;
383 /* This already accounts for entry/exit edges. */
384 num_edges = NUM_EDGES (edge_list);
386 /* Unreachable loops with more than one basic block are detected
387 during the DFS traversal in find_rgns.
389 Unreachable loops with a single block are detected here. This
390 test is redundant with the one in find_rgns, but it's much
391 cheaper to go ahead and catch the trivial case here. */
392 unreachable = 0;
393 FOR_EACH_BB (b)
395 if (b->pred == NULL
396 || (b->pred->src == b
397 && b->pred->pred_next == NULL))
398 unreachable = 1;
401 /* ??? We can kill these soon. */
402 in_edges = (int *) xcalloc (last_basic_block, sizeof (int));
403 out_edges = (int *) xcalloc (last_basic_block, sizeof (int));
404 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
406 nr_edges = 0;
407 for (i = 0; i < num_edges; i++)
409 edge e = INDEX_EDGE (edge_list, i);
411 if (e->dest != EXIT_BLOCK_PTR
412 && e->src != ENTRY_BLOCK_PTR)
413 new_edge (e->src->index, e->dest->index);
416 /* Increment by 1, since edge 0 is unused. */
417 nr_edges++;
419 return unreachable;
422 /* Record an edge in the control flow graph from SOURCE to TARGET.
424 In theory, this is redundant with the s_succs computed above, but
425 we have not converted all of haifa to use information from the
426 integer lists. */
428 static void
429 new_edge (source, target)
430 int source, target;
432 int e, next_edge;
433 int curr_edge, fst_edge;
435 /* Check for duplicates. */
436 fst_edge = curr_edge = OUT_EDGES (source);
437 while (curr_edge)
439 if (FROM_BLOCK (curr_edge) == source
440 && TO_BLOCK (curr_edge) == target)
442 return;
445 curr_edge = NEXT_OUT (curr_edge);
447 if (fst_edge == curr_edge)
448 break;
451 e = ++nr_edges;
453 FROM_BLOCK (e) = source;
454 TO_BLOCK (e) = target;
456 if (OUT_EDGES (source))
458 next_edge = NEXT_OUT (OUT_EDGES (source));
459 NEXT_OUT (OUT_EDGES (source)) = e;
460 NEXT_OUT (e) = next_edge;
462 else
464 OUT_EDGES (source) = e;
465 NEXT_OUT (e) = e;
468 if (IN_EDGES (target))
470 next_edge = NEXT_IN (IN_EDGES (target));
471 NEXT_IN (IN_EDGES (target)) = e;
472 NEXT_IN (e) = next_edge;
474 else
476 IN_EDGES (target) = e;
477 NEXT_IN (e) = e;
481 /* Translate a bit-set SET to a list BL of the bit-set members. */
483 static void
484 extract_bitlst (set, bl)
485 sbitmap set;
486 bitlst *bl;
488 int i;
490 /* bblst table space is reused in each call to extract_bitlst. */
491 bitlst_table_last = 0;
493 bl->first_member = &bitlst_table[bitlst_table_last];
494 bl->nr_members = 0;
496 /* Iterate over each word in the bitset. */
497 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
499 bitlst_table[bitlst_table_last++] = i;
500 (bl->nr_members)++;
505 /* Functions for the construction of regions. */
507 /* Print the regions, for debugging purposes. Callable from debugger. */
509 void
510 debug_regions ()
512 int rgn, bb;
514 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
515 for (rgn = 0; rgn < nr_regions; rgn++)
517 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
518 rgn_table[rgn].rgn_nr_blocks);
519 fprintf (sched_dump, ";;\tbb/block: ");
521 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
523 current_blocks = RGN_BLOCKS (rgn);
525 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
526 abort ();
528 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
531 fprintf (sched_dump, "\n\n");
535 /* Build a single block region for each basic block in the function.
536 This allows for using the same code for interblock and basic block
537 scheduling. */
539 static void
540 find_single_block_region ()
542 basic_block bb;
544 nr_regions = 0;
546 FOR_EACH_BB (bb)
548 rgn_bb_table[nr_regions] = bb->index;
549 RGN_NR_BLOCKS (nr_regions) = 1;
550 RGN_BLOCKS (nr_regions) = nr_regions;
551 CONTAINING_RGN (bb->index) = nr_regions;
552 BLOCK_TO_BB (bb->index) = 0;
553 nr_regions++;
557 /* Update number of blocks and the estimate for number of insns
558 in the region. Return 1 if the region is "too large" for interblock
559 scheduling (compile time considerations), otherwise return 0. */
561 static int
562 too_large (block, num_bbs, num_insns)
563 int block, *num_bbs, *num_insns;
565 (*num_bbs)++;
566 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
567 INSN_LUID (BLOCK_HEAD (block)));
568 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
569 return 1;
570 else
571 return 0;
574 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
575 is still an inner loop. Put in max_hdr[blk] the header of the most inner
576 loop containing blk. */
577 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
579 if (max_hdr[blk] == -1) \
580 max_hdr[blk] = hdr; \
581 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
582 RESET_BIT (inner, hdr); \
583 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
585 RESET_BIT (inner,max_hdr[blk]); \
586 max_hdr[blk] = hdr; \
590 /* Find regions for interblock scheduling.
592 A region for scheduling can be:
594 * A loop-free procedure, or
596 * A reducible inner loop, or
598 * A basic block not contained in any other region.
600 ?!? In theory we could build other regions based on extended basic
601 blocks or reverse extended basic blocks. Is it worth the trouble?
603 Loop blocks that form a region are put into the region's block list
604 in topological order.
606 This procedure stores its results into the following global (ick) variables
608 * rgn_nr
609 * rgn_table
610 * rgn_bb_table
611 * block_to_bb
612 * containing region
614 We use dominator relationships to avoid making regions out of non-reducible
615 loops.
617 This procedure needs to be converted to work on pred/succ lists instead
618 of edge tables. That would simplify it somewhat. */
620 static void
621 find_rgns (edge_list, dom)
622 struct edge_list *edge_list;
623 dominance_info dom;
625 int *max_hdr, *dfs_nr, *stack, *degree;
626 char no_loops = 1;
627 int node, child, loop_head, i, head, tail;
628 int count = 0, sp, idx = 0, current_edge = out_edges[0];
629 int num_bbs, num_insns, unreachable;
630 int too_large_failure;
631 basic_block bb;
633 /* Note if an edge has been passed. */
634 sbitmap passed;
636 /* Note if a block is a natural loop header. */
637 sbitmap header;
639 /* Note if a block is a natural inner loop header. */
640 sbitmap inner;
642 /* Note if a block is in the block queue. */
643 sbitmap in_queue;
645 /* Note if a block is in the block queue. */
646 sbitmap in_stack;
648 int num_edges = NUM_EDGES (edge_list);
650 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
651 and a mapping from block to its loop header (if the block is contained
652 in a loop, else -1).
654 Store results in HEADER, INNER, and MAX_HDR respectively, these will
655 be used as inputs to the second traversal.
657 STACK, SP and DFS_NR are only used during the first traversal. */
659 /* Allocate and initialize variables for the first traversal. */
660 max_hdr = (int *) xmalloc (last_basic_block * sizeof (int));
661 dfs_nr = (int *) xcalloc (last_basic_block, sizeof (int));
662 stack = (int *) xmalloc (nr_edges * sizeof (int));
664 inner = sbitmap_alloc (last_basic_block);
665 sbitmap_ones (inner);
667 header = sbitmap_alloc (last_basic_block);
668 sbitmap_zero (header);
670 passed = sbitmap_alloc (nr_edges);
671 sbitmap_zero (passed);
673 in_queue = sbitmap_alloc (last_basic_block);
674 sbitmap_zero (in_queue);
676 in_stack = sbitmap_alloc (last_basic_block);
677 sbitmap_zero (in_stack);
679 for (i = 0; i < last_basic_block; i++)
680 max_hdr[i] = -1;
682 /* DFS traversal to find inner loops in the cfg. */
684 sp = -1;
685 while (1)
687 if (current_edge == 0 || TEST_BIT (passed, current_edge))
689 /* We have reached a leaf node or a node that was already
690 processed. Pop edges off the stack until we find
691 an edge that has not yet been processed. */
692 while (sp >= 0
693 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
695 /* Pop entry off the stack. */
696 current_edge = stack[sp--];
697 node = FROM_BLOCK (current_edge);
698 child = TO_BLOCK (current_edge);
699 RESET_BIT (in_stack, child);
700 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
701 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
702 current_edge = NEXT_OUT (current_edge);
705 /* See if have finished the DFS tree traversal. */
706 if (sp < 0 && TEST_BIT (passed, current_edge))
707 break;
709 /* Nope, continue the traversal with the popped node. */
710 continue;
713 /* Process a node. */
714 node = FROM_BLOCK (current_edge);
715 child = TO_BLOCK (current_edge);
716 SET_BIT (in_stack, node);
717 dfs_nr[node] = ++count;
719 /* If the successor is in the stack, then we've found a loop.
720 Mark the loop, if it is not a natural loop, then it will
721 be rejected during the second traversal. */
722 if (TEST_BIT (in_stack, child))
724 no_loops = 0;
725 SET_BIT (header, child);
726 UPDATE_LOOP_RELATIONS (node, child);
727 SET_BIT (passed, current_edge);
728 current_edge = NEXT_OUT (current_edge);
729 continue;
732 /* If the child was already visited, then there is no need to visit
733 it again. Just update the loop relationships and restart
734 with a new edge. */
735 if (dfs_nr[child])
737 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
738 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
739 SET_BIT (passed, current_edge);
740 current_edge = NEXT_OUT (current_edge);
741 continue;
744 /* Push an entry on the stack and continue DFS traversal. */
745 stack[++sp] = current_edge;
746 SET_BIT (passed, current_edge);
747 current_edge = OUT_EDGES (child);
749 /* This is temporary until haifa is converted to use rth's new
750 cfg routines which have true entry/exit blocks and the
751 appropriate edges from/to those blocks.
753 Generally we update dfs_nr for a node when we process its
754 out edge. However, if the node has no out edge then we will
755 not set dfs_nr for that node. This can confuse the scheduler
756 into thinking that we have unreachable blocks, which in turn
757 disables cross block scheduling.
759 So, if we have a node with no out edges, go ahead and mark it
760 as reachable now. */
761 if (current_edge == 0)
762 dfs_nr[child] = ++count;
765 /* Another check for unreachable blocks. The earlier test in
766 is_cfg_nonregular only finds unreachable blocks that do not
767 form a loop.
769 The DFS traversal will mark every block that is reachable from
770 the entry node by placing a nonzero value in dfs_nr. Thus if
771 dfs_nr is zero for any block, then it must be unreachable. */
772 unreachable = 0;
773 FOR_EACH_BB (bb)
774 if (dfs_nr[bb->index] == 0)
776 unreachable = 1;
777 break;
780 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
781 to hold degree counts. */
782 degree = dfs_nr;
784 FOR_EACH_BB (bb)
785 degree[bb->index] = 0;
786 for (i = 0; i < num_edges; i++)
788 edge e = INDEX_EDGE (edge_list, i);
790 if (e->dest != EXIT_BLOCK_PTR)
791 degree[e->dest->index]++;
794 /* Do not perform region scheduling if there are any unreachable
795 blocks. */
796 if (!unreachable)
798 int *queue;
800 if (no_loops)
801 SET_BIT (header, 0);
803 /* Second traversal:find reducible inner loops and topologically sort
804 block of each region. */
806 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
808 /* Find blocks which are inner loop headers. We still have non-reducible
809 loops to consider at this point. */
810 FOR_EACH_BB (bb)
812 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
814 edge e;
815 basic_block jbb;
817 /* Now check that the loop is reducible. We do this separate
818 from finding inner loops so that we do not find a reducible
819 loop which contains an inner non-reducible loop.
821 A simple way to find reducible/natural loops is to verify
822 that each block in the loop is dominated by the loop
823 header.
825 If there exists a block that is not dominated by the loop
826 header, then the block is reachable from outside the loop
827 and thus the loop is not a natural loop. */
828 FOR_EACH_BB (jbb)
830 /* First identify blocks in the loop, except for the loop
831 entry block. */
832 if (bb->index == max_hdr[jbb->index] && bb != jbb)
834 /* Now verify that the block is dominated by the loop
835 header. */
836 if (!dominated_by_p (dom, jbb, bb))
837 break;
841 /* If we exited the loop early, then I is the header of
842 a non-reducible loop and we should quit processing it
843 now. */
844 if (jbb != EXIT_BLOCK_PTR)
845 continue;
847 /* I is a header of an inner loop, or block 0 in a subroutine
848 with no loops at all. */
849 head = tail = -1;
850 too_large_failure = 0;
851 loop_head = max_hdr[bb->index];
853 /* Decrease degree of all I's successors for topological
854 ordering. */
855 for (e = bb->succ; e; e = e->succ_next)
856 if (e->dest != EXIT_BLOCK_PTR)
857 --degree[e->dest->index];
859 /* Estimate # insns, and count # blocks in the region. */
860 num_bbs = 1;
861 num_insns = (INSN_LUID (bb->end)
862 - INSN_LUID (bb->head));
864 /* Find all loop latches (blocks with back edges to the loop
865 header) or all the leaf blocks in the cfg has no loops.
867 Place those blocks into the queue. */
868 if (no_loops)
870 FOR_EACH_BB (jbb)
871 /* Leaf nodes have only a single successor which must
872 be EXIT_BLOCK. */
873 if (jbb->succ
874 && jbb->succ->dest == EXIT_BLOCK_PTR
875 && jbb->succ->succ_next == NULL)
877 queue[++tail] = jbb->index;
878 SET_BIT (in_queue, jbb->index);
880 if (too_large (jbb->index, &num_bbs, &num_insns))
882 too_large_failure = 1;
883 break;
887 else
889 edge e;
891 for (e = bb->pred; e; e = e->pred_next)
893 if (e->src == ENTRY_BLOCK_PTR)
894 continue;
896 node = e->src->index;
898 if (max_hdr[node] == loop_head && node != bb->index)
900 /* This is a loop latch. */
901 queue[++tail] = node;
902 SET_BIT (in_queue, node);
904 if (too_large (node, &num_bbs, &num_insns))
906 too_large_failure = 1;
907 break;
913 /* Now add all the blocks in the loop to the queue.
915 We know the loop is a natural loop; however the algorithm
916 above will not always mark certain blocks as being in the
917 loop. Consider:
918 node children
919 a b,c
921 c a,d
924 The algorithm in the DFS traversal may not mark B & D as part
925 of the loop (ie they will not have max_hdr set to A).
927 We know they can not be loop latches (else they would have
928 had max_hdr set since they'd have a backedge to a dominator
929 block). So we don't need them on the initial queue.
931 We know they are part of the loop because they are dominated
932 by the loop header and can be reached by a backwards walk of
933 the edges starting with nodes on the initial queue.
935 It is safe and desirable to include those nodes in the
936 loop/scheduling region. To do so we would need to decrease
937 the degree of a node if it is the target of a backedge
938 within the loop itself as the node is placed in the queue.
940 We do not do this because I'm not sure that the actual
941 scheduling code will properly handle this case. ?!? */
943 while (head < tail && !too_large_failure)
945 edge e;
946 child = queue[++head];
948 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
950 node = e->src->index;
952 /* See discussion above about nodes not marked as in
953 this loop during the initial DFS traversal. */
954 if (e->src == ENTRY_BLOCK_PTR
955 || max_hdr[node] != loop_head)
957 tail = -1;
958 break;
960 else if (!TEST_BIT (in_queue, node) && node != bb->index)
962 queue[++tail] = node;
963 SET_BIT (in_queue, node);
965 if (too_large (node, &num_bbs, &num_insns))
967 too_large_failure = 1;
968 break;
974 if (tail >= 0 && !too_large_failure)
976 /* Place the loop header into list of region blocks. */
977 degree[bb->index] = -1;
978 rgn_bb_table[idx] = bb->index;
979 RGN_NR_BLOCKS (nr_regions) = num_bbs;
980 RGN_BLOCKS (nr_regions) = idx++;
981 CONTAINING_RGN (bb->index) = nr_regions;
982 BLOCK_TO_BB (bb->index) = count = 0;
984 /* Remove blocks from queue[] when their in degree
985 becomes zero. Repeat until no blocks are left on the
986 list. This produces a topological list of blocks in
987 the region. */
988 while (tail >= 0)
990 if (head < 0)
991 head = tail;
992 child = queue[head];
993 if (degree[child] == 0)
995 edge e;
997 degree[child] = -1;
998 rgn_bb_table[idx++] = child;
999 BLOCK_TO_BB (child) = ++count;
1000 CONTAINING_RGN (child) = nr_regions;
1001 queue[head] = queue[tail--];
1003 for (e = BASIC_BLOCK (child)->succ;
1005 e = e->succ_next)
1006 if (e->dest != EXIT_BLOCK_PTR)
1007 --degree[e->dest->index];
1009 else
1010 --head;
1012 ++nr_regions;
1016 free (queue);
1019 /* Any block that did not end up in a region is placed into a region
1020 by itself. */
1021 FOR_EACH_BB (bb)
1022 if (degree[bb->index] >= 0)
1024 rgn_bb_table[idx] = bb->index;
1025 RGN_NR_BLOCKS (nr_regions) = 1;
1026 RGN_BLOCKS (nr_regions) = idx++;
1027 CONTAINING_RGN (bb->index) = nr_regions++;
1028 BLOCK_TO_BB (bb->index) = 0;
1031 free (max_hdr);
1032 free (dfs_nr);
1033 free (stack);
1034 sbitmap_free (passed);
1035 sbitmap_free (header);
1036 sbitmap_free (inner);
1037 sbitmap_free (in_queue);
1038 sbitmap_free (in_stack);
1041 /* Functions for regions scheduling information. */
1043 /* Compute dominators, probability, and potential-split-edges of bb.
1044 Assume that these values were already computed for bb's predecessors. */
1046 static void
1047 compute_dom_prob_ps (bb)
1048 int bb;
1050 int nxt_in_edge, fst_in_edge, pred;
1051 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1053 prob[bb] = 0.0;
1054 if (IS_RGN_ENTRY (bb))
1056 SET_BIT (dom[bb], 0);
1057 prob[bb] = 1.0;
1058 return;
1061 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1063 /* Initialize dom[bb] to '111..1'. */
1064 sbitmap_ones (dom[bb]);
1068 pred = FROM_BLOCK (nxt_in_edge);
1069 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1070 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1072 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1074 nr_out_edges = 1;
1075 nr_rgn_out_edges = 0;
1076 fst_out_edge = OUT_EDGES (pred);
1077 nxt_out_edge = NEXT_OUT (fst_out_edge);
1079 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1081 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1083 /* The successor doesn't belong in the region? */
1084 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1085 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1086 ++nr_rgn_out_edges;
1088 while (fst_out_edge != nxt_out_edge)
1090 ++nr_out_edges;
1091 /* The successor doesn't belong in the region? */
1092 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1093 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1094 ++nr_rgn_out_edges;
1095 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1096 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1100 /* Now nr_rgn_out_edges is the number of region-exit edges from
1101 pred, and nr_out_edges will be the number of pred out edges
1102 not leaving the region. */
1103 nr_out_edges -= nr_rgn_out_edges;
1104 if (nr_rgn_out_edges > 0)
1105 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1106 else
1107 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1108 nxt_in_edge = NEXT_IN (nxt_in_edge);
1110 while (fst_in_edge != nxt_in_edge);
1112 SET_BIT (dom[bb], bb);
1113 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1115 if (sched_verbose >= 2)
1116 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1117 (int) (100.0 * prob[bb]));
1120 /* Functions for target info. */
1122 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1123 Note that bb_trg dominates bb_src. */
1125 static void
1126 split_edges (bb_src, bb_trg, bl)
1127 int bb_src;
1128 int bb_trg;
1129 edgelst *bl;
1131 sbitmap src = (edgeset) sbitmap_alloc (pot_split[bb_src]->n_bits);
1132 sbitmap_copy (src, pot_split[bb_src]);
1134 sbitmap_difference (src, src, pot_split[bb_trg]);
1135 extract_bitlst (src, bl);
1136 sbitmap_free (src);
1139 /* Find the valid candidate-source-blocks for the target block TRG, compute
1140 their probability, and check if they are speculative or not.
1141 For speculative sources, compute their update-blocks and split-blocks. */
1143 static void
1144 compute_trg_info (trg)
1145 int trg;
1147 candidate *sp;
1148 edgelst el;
1149 int check_block, update_idx;
1150 int i, j, k, fst_edge, nxt_edge;
1152 /* Define some of the fields for the target bb as well. */
1153 sp = candidate_table + trg;
1154 sp->is_valid = 1;
1155 sp->is_speculative = 0;
1156 sp->src_prob = 100;
1158 for (i = trg + 1; i < current_nr_blocks; i++)
1160 sp = candidate_table + i;
1162 sp->is_valid = IS_DOMINATED (i, trg);
1163 if (sp->is_valid)
1165 sp->src_prob = GET_SRC_PROB (i, trg);
1166 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1169 if (sp->is_valid)
1171 split_edges (i, trg, &el);
1172 sp->is_speculative = (el.nr_members) ? 1 : 0;
1173 if (sp->is_speculative && !flag_schedule_speculative)
1174 sp->is_valid = 0;
1177 if (sp->is_valid)
1179 char *update_blocks;
1181 /* Compute split blocks and store them in bblst_table.
1182 The TO block of every split edge is a split block. */
1183 sp->split_bbs.first_member = &bblst_table[bblst_last];
1184 sp->split_bbs.nr_members = el.nr_members;
1185 for (j = 0; j < el.nr_members; bblst_last++, j++)
1186 bblst_table[bblst_last] =
1187 TO_BLOCK (rgn_edges[el.first_member[j]]);
1188 sp->update_bbs.first_member = &bblst_table[bblst_last];
1190 /* Compute update blocks and store them in bblst_table.
1191 For every split edge, look at the FROM block, and check
1192 all out edges. For each out edge that is not a split edge,
1193 add the TO block to the update block list. This list can end
1194 up with a lot of duplicates. We need to weed them out to avoid
1195 overrunning the end of the bblst_table. */
1196 update_blocks = (char *) alloca (last_basic_block);
1197 memset (update_blocks, 0, last_basic_block);
1199 update_idx = 0;
1200 for (j = 0; j < el.nr_members; j++)
1202 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1203 fst_edge = nxt_edge = OUT_EDGES (check_block);
1206 if (! update_blocks[TO_BLOCK (nxt_edge)])
1208 for (k = 0; k < el.nr_members; k++)
1209 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1210 break;
1212 if (k >= el.nr_members)
1214 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1215 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1216 update_idx++;
1220 nxt_edge = NEXT_OUT (nxt_edge);
1222 while (fst_edge != nxt_edge);
1224 sp->update_bbs.nr_members = update_idx;
1226 /* Make sure we didn't overrun the end of bblst_table. */
1227 if (bblst_last > bblst_size)
1228 abort ();
1230 else
1232 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1234 sp->is_speculative = 0;
1235 sp->src_prob = 0;
1240 /* Print candidates info, for debugging purposes. Callable from debugger. */
1242 void
1243 debug_candidate (i)
1244 int i;
1246 if (!candidate_table[i].is_valid)
1247 return;
1249 if (candidate_table[i].is_speculative)
1251 int j;
1252 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1254 fprintf (sched_dump, "split path: ");
1255 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1257 int b = candidate_table[i].split_bbs.first_member[j];
1259 fprintf (sched_dump, " %d ", b);
1261 fprintf (sched_dump, "\n");
1263 fprintf (sched_dump, "update path: ");
1264 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1266 int b = candidate_table[i].update_bbs.first_member[j];
1268 fprintf (sched_dump, " %d ", b);
1270 fprintf (sched_dump, "\n");
1272 else
1274 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1278 /* Print candidates info, for debugging purposes. Callable from debugger. */
1280 void
1281 debug_candidates (trg)
1282 int trg;
1284 int i;
1286 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1287 BB_TO_BLOCK (trg), trg);
1288 for (i = trg + 1; i < current_nr_blocks; i++)
1289 debug_candidate (i);
1292 /* Functions for speculative scheduling. */
1294 /* Return 0 if x is a set of a register alive in the beginning of one
1295 of the split-blocks of src, otherwise return 1. */
1297 static int
1298 check_live_1 (src, x)
1299 int src;
1300 rtx x;
1302 int i;
1303 int regno;
1304 rtx reg = SET_DEST (x);
1306 if (reg == 0)
1307 return 1;
1309 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1310 || GET_CODE (reg) == SIGN_EXTRACT
1311 || GET_CODE (reg) == STRICT_LOW_PART)
1312 reg = XEXP (reg, 0);
1314 if (GET_CODE (reg) == PARALLEL)
1316 int i;
1318 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1319 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1320 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1321 return 1;
1323 return 0;
1326 if (GET_CODE (reg) != REG)
1327 return 1;
1329 regno = REGNO (reg);
1331 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1333 /* Global registers are assumed live. */
1334 return 0;
1336 else
1338 if (regno < FIRST_PSEUDO_REGISTER)
1340 /* Check for hard registers. */
1341 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1342 while (--j >= 0)
1344 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1346 int b = candidate_table[src].split_bbs.first_member[i];
1348 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1349 regno + j))
1351 return 0;
1356 else
1358 /* Check for psuedo registers. */
1359 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1361 int b = candidate_table[src].split_bbs.first_member[i];
1363 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1365 return 0;
1371 return 1;
1374 /* If x is a set of a register R, mark that R is alive in the beginning
1375 of every update-block of src. */
1377 static void
1378 update_live_1 (src, x)
1379 int src;
1380 rtx x;
1382 int i;
1383 int regno;
1384 rtx reg = SET_DEST (x);
1386 if (reg == 0)
1387 return;
1389 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1390 || GET_CODE (reg) == SIGN_EXTRACT
1391 || GET_CODE (reg) == STRICT_LOW_PART)
1392 reg = XEXP (reg, 0);
1394 if (GET_CODE (reg) == PARALLEL)
1396 int i;
1398 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1399 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1400 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1402 return;
1405 if (GET_CODE (reg) != REG)
1406 return;
1408 /* Global registers are always live, so the code below does not apply
1409 to them. */
1411 regno = REGNO (reg);
1413 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1415 if (regno < FIRST_PSEUDO_REGISTER)
1417 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
1418 while (--j >= 0)
1420 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1422 int b = candidate_table[src].update_bbs.first_member[i];
1424 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1425 regno + j);
1429 else
1431 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1433 int b = candidate_table[src].update_bbs.first_member[i];
1435 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1441 /* Return 1 if insn can be speculatively moved from block src to trg,
1442 otherwise return 0. Called before first insertion of insn to
1443 ready-list or before the scheduling. */
1445 static int
1446 check_live (insn, src)
1447 rtx insn;
1448 int src;
1450 /* Find the registers set by instruction. */
1451 if (GET_CODE (PATTERN (insn)) == SET
1452 || GET_CODE (PATTERN (insn)) == CLOBBER)
1453 return check_live_1 (src, PATTERN (insn));
1454 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1456 int j;
1457 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1458 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1459 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1460 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1461 return 0;
1463 return 1;
1466 return 1;
1469 /* Update the live registers info after insn was moved speculatively from
1470 block src to trg. */
1472 static void
1473 update_live (insn, src)
1474 rtx insn;
1475 int src;
1477 /* Find the registers set by instruction. */
1478 if (GET_CODE (PATTERN (insn)) == SET
1479 || GET_CODE (PATTERN (insn)) == CLOBBER)
1480 update_live_1 (src, PATTERN (insn));
1481 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1483 int j;
1484 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1485 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1486 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1487 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1491 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1492 #define IS_REACHABLE(bb_from, bb_to) \
1493 (bb_from == bb_to \
1494 || IS_RGN_ENTRY (bb_from) \
1495 || (TEST_BIT (ancestor_edges[bb_to], \
1496 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1498 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1500 static void
1501 set_spec_fed (load_insn)
1502 rtx load_insn;
1504 rtx link;
1506 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1507 if (GET_MODE (link) == VOIDmode)
1508 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1509 } /* set_spec_fed */
1511 /* On the path from the insn to load_insn_bb, find a conditional
1512 branch depending on insn, that guards the speculative load. */
1514 static int
1515 find_conditional_protection (insn, load_insn_bb)
1516 rtx insn;
1517 int load_insn_bb;
1519 rtx link;
1521 /* Iterate through DEF-USE forward dependences. */
1522 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1524 rtx next = XEXP (link, 0);
1525 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1526 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1527 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1528 && load_insn_bb != INSN_BB (next)
1529 && GET_MODE (link) == VOIDmode
1530 && (GET_CODE (next) == JUMP_INSN
1531 || find_conditional_protection (next, load_insn_bb)))
1532 return 1;
1534 return 0;
1535 } /* find_conditional_protection */
1537 /* Returns 1 if the same insn1 that participates in the computation
1538 of load_insn's address is feeding a conditional branch that is
1539 guarding on load_insn. This is true if we find a the two DEF-USE
1540 chains:
1541 insn1 -> ... -> conditional-branch
1542 insn1 -> ... -> load_insn,
1543 and if a flow path exist:
1544 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1545 and if insn1 is on the path
1546 region-entry -> ... -> bb_trg -> ... load_insn.
1548 Locate insn1 by climbing on LOG_LINKS from load_insn.
1549 Locate the branch by following INSN_DEPEND from insn1. */
1551 static int
1552 is_conditionally_protected (load_insn, bb_src, bb_trg)
1553 rtx load_insn;
1554 int bb_src, bb_trg;
1556 rtx link;
1558 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1560 rtx insn1 = XEXP (link, 0);
1562 /* Must be a DEF-USE dependence upon non-branch. */
1563 if (GET_MODE (link) != VOIDmode
1564 || GET_CODE (insn1) == JUMP_INSN)
1565 continue;
1567 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1568 if (INSN_BB (insn1) == bb_src
1569 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1570 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1571 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1572 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1573 continue;
1575 /* Now search for the conditional-branch. */
1576 if (find_conditional_protection (insn1, bb_src))
1577 return 1;
1579 /* Recursive step: search another insn1, "above" current insn1. */
1580 return is_conditionally_protected (insn1, bb_src, bb_trg);
1583 /* The chain does not exist. */
1584 return 0;
1585 } /* is_conditionally_protected */
1587 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1588 load_insn can move speculatively from bb_src to bb_trg. All the
1589 following must hold:
1591 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1592 (2) load_insn and load1 have a def-use dependence upon
1593 the same insn 'insn1'.
1594 (3) either load2 is in bb_trg, or:
1595 - there's only one split-block, and
1596 - load1 is on the escape path, and
1598 From all these we can conclude that the two loads access memory
1599 addresses that differ at most by a constant, and hence if moving
1600 load_insn would cause an exception, it would have been caused by
1601 load2 anyhow. */
1603 static int
1604 is_pfree (load_insn, bb_src, bb_trg)
1605 rtx load_insn;
1606 int bb_src, bb_trg;
1608 rtx back_link;
1609 candidate *candp = candidate_table + bb_src;
1611 if (candp->split_bbs.nr_members != 1)
1612 /* Must have exactly one escape block. */
1613 return 0;
1615 for (back_link = LOG_LINKS (load_insn);
1616 back_link; back_link = XEXP (back_link, 1))
1618 rtx insn1 = XEXP (back_link, 0);
1620 if (GET_MODE (back_link) == VOIDmode)
1622 /* Found a DEF-USE dependence (insn1, load_insn). */
1623 rtx fore_link;
1625 for (fore_link = INSN_DEPEND (insn1);
1626 fore_link; fore_link = XEXP (fore_link, 1))
1628 rtx insn2 = XEXP (fore_link, 0);
1629 if (GET_MODE (fore_link) == VOIDmode)
1631 /* Found a DEF-USE dependence (insn1, insn2). */
1632 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1633 /* insn2 not guaranteed to be a 1 base reg load. */
1634 continue;
1636 if (INSN_BB (insn2) == bb_trg)
1637 /* insn2 is the similar load, in the target block. */
1638 return 1;
1640 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1641 /* insn2 is a similar load, in a split-block. */
1642 return 1;
1648 /* Couldn't find a similar load. */
1649 return 0;
1650 } /* is_pfree */
1652 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1653 a load moved speculatively, or if load_insn is protected by
1654 a compare on load_insn's address). */
1656 static int
1657 is_prisky (load_insn, bb_src, bb_trg)
1658 rtx load_insn;
1659 int bb_src, bb_trg;
1661 if (FED_BY_SPEC_LOAD (load_insn))
1662 return 1;
1664 if (LOG_LINKS (load_insn) == NULL)
1665 /* Dependence may 'hide' out of the region. */
1666 return 1;
1668 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1669 return 1;
1671 return 0;
1674 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1675 Return 1 if insn is exception-free (and the motion is valid)
1676 and 0 otherwise. */
1678 static int
1679 is_exception_free (insn, bb_src, bb_trg)
1680 rtx insn;
1681 int bb_src, bb_trg;
1683 int insn_class = haifa_classify_insn (insn);
1685 /* Handle non-load insns. */
1686 switch (insn_class)
1688 case TRAP_FREE:
1689 return 1;
1690 case TRAP_RISKY:
1691 return 0;
1692 default:;
1695 /* Handle loads. */
1696 if (!flag_schedule_speculative_load)
1697 return 0;
1698 IS_LOAD_INSN (insn) = 1;
1699 switch (insn_class)
1701 case IFREE:
1702 return (1);
1703 case IRISKY:
1704 return 0;
1705 case PFREE_CANDIDATE:
1706 if (is_pfree (insn, bb_src, bb_trg))
1707 return 1;
1708 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1709 case PRISKY_CANDIDATE:
1710 if (!flag_schedule_speculative_load_dangerous
1711 || is_prisky (insn, bb_src, bb_trg))
1712 return 0;
1713 break;
1714 default:;
1717 return flag_schedule_speculative_load_dangerous;
1720 /* The number of insns from the current block scheduled so far. */
1721 static int sched_target_n_insns;
1722 /* The number of insns from the current block to be scheduled in total. */
1723 static int target_n_insns;
1724 /* The number of insns from the entire region scheduled so far. */
1725 static int sched_n_insns;
1726 /* Nonzero if the last scheduled insn was a jump. */
1727 static int last_was_jump;
1729 /* Implementations of the sched_info functions for region scheduling. */
1730 static void init_ready_list PARAMS ((struct ready_list *));
1731 static int can_schedule_ready_p PARAMS ((rtx));
1732 static int new_ready PARAMS ((rtx));
1733 static int schedule_more_p PARAMS ((void));
1734 static const char *rgn_print_insn PARAMS ((rtx, int));
1735 static int rgn_rank PARAMS ((rtx, rtx));
1736 static int contributes_to_priority PARAMS ((rtx, rtx));
1737 static void compute_jump_reg_dependencies PARAMS ((rtx, regset));
1739 /* Return nonzero if there are more insns that should be scheduled. */
1741 static int
1742 schedule_more_p ()
1744 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1747 /* Add all insns that are initially ready to the ready list READY. Called
1748 once before scheduling a set of insns. */
1750 static void
1751 init_ready_list (ready)
1752 struct ready_list *ready;
1754 rtx prev_head = current_sched_info->prev_head;
1755 rtx next_tail = current_sched_info->next_tail;
1756 int bb_src;
1757 rtx insn;
1759 target_n_insns = 0;
1760 sched_target_n_insns = 0;
1761 sched_n_insns = 0;
1762 last_was_jump = 0;
1764 /* Print debugging information. */
1765 if (sched_verbose >= 5)
1766 debug_dependencies ();
1768 /* Prepare current target block info. */
1769 if (current_nr_blocks > 1)
1771 candidate_table = (candidate *) xmalloc (current_nr_blocks
1772 * sizeof (candidate));
1774 bblst_last = 0;
1775 /* bblst_table holds split blocks and update blocks for each block after
1776 the current one in the region. split blocks and update blocks are
1777 the TO blocks of region edges, so there can be at most rgn_nr_edges
1778 of them. */
1779 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1780 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
1782 bitlst_table_last = 0;
1783 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
1785 compute_trg_info (target_bb);
1788 /* Initialize ready list with all 'ready' insns in target block.
1789 Count number of insns in the target block being scheduled. */
1790 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1792 if (INSN_DEP_COUNT (insn) == 0)
1793 ready_add (ready, insn);
1794 target_n_insns++;
1797 /* Add to ready list all 'ready' insns in valid source blocks.
1798 For speculative insns, check-live, exception-free, and
1799 issue-delay. */
1800 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1801 if (IS_VALID (bb_src))
1803 rtx src_head;
1804 rtx src_next_tail;
1805 rtx tail, head;
1807 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1808 src_next_tail = NEXT_INSN (tail);
1809 src_head = head;
1811 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1813 if (! INSN_P (insn))
1814 continue;
1816 if (!CANT_MOVE (insn)
1817 && (!IS_SPECULATIVE_INSN (insn)
1818 || ((((!targetm.sched.use_dfa_pipeline_interface
1819 || !(*targetm.sched.use_dfa_pipeline_interface) ())
1820 && insn_issue_delay (insn) <= 3)
1821 || (targetm.sched.use_dfa_pipeline_interface
1822 && (*targetm.sched.use_dfa_pipeline_interface) ()
1823 && (recog_memoized (insn) < 0
1824 || min_insn_conflict_delay (curr_state,
1825 insn, insn) <= 3)))
1826 && check_live (insn, bb_src)
1827 && is_exception_free (insn, bb_src, target_bb))))
1828 if (INSN_DEP_COUNT (insn) == 0)
1829 ready_add (ready, insn);
1834 /* Called after taking INSN from the ready list. Returns nonzero if this
1835 insn can be scheduled, nonzero if we should silently discard it. */
1837 static int
1838 can_schedule_ready_p (insn)
1839 rtx insn;
1841 if (GET_CODE (insn) == JUMP_INSN)
1842 last_was_jump = 1;
1844 /* An interblock motion? */
1845 if (INSN_BB (insn) != target_bb)
1847 basic_block b1;
1849 if (IS_SPECULATIVE_INSN (insn))
1851 if (!check_live (insn, INSN_BB (insn)))
1852 return 0;
1853 update_live (insn, INSN_BB (insn));
1855 /* For speculative load, mark insns fed by it. */
1856 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1857 set_spec_fed (insn);
1859 nr_spec++;
1861 nr_inter++;
1863 /* Update source block boundaries. */
1864 b1 = BLOCK_FOR_INSN (insn);
1865 if (insn == b1->head && insn == b1->end)
1867 /* We moved all the insns in the basic block.
1868 Emit a note after the last insn and update the
1869 begin/end boundaries to point to the note. */
1870 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1871 b1->head = note;
1872 b1->end = note;
1874 else if (insn == b1->end)
1876 /* We took insns from the end of the basic block,
1877 so update the end of block boundary so that it
1878 points to the first insn we did not move. */
1879 b1->end = PREV_INSN (insn);
1881 else if (insn == b1->head)
1883 /* We took insns from the start of the basic block,
1884 so update the start of block boundary so that
1885 it points to the first insn we did not move. */
1886 b1->head = NEXT_INSN (insn);
1889 else
1891 /* In block motion. */
1892 sched_target_n_insns++;
1894 sched_n_insns++;
1896 return 1;
1899 /* Called after INSN has all its dependencies resolved. Return nonzero
1900 if it should be moved to the ready list or the queue, or zero if we
1901 should silently discard it. */
1902 static int
1903 new_ready (next)
1904 rtx next;
1906 /* For speculative insns, before inserting to ready/queue,
1907 check live, exception-free, and issue-delay. */
1908 if (INSN_BB (next) != target_bb
1909 && (!IS_VALID (INSN_BB (next))
1910 || CANT_MOVE (next)
1911 || (IS_SPECULATIVE_INSN (next)
1912 && (0
1913 || (targetm.sched.use_dfa_pipeline_interface
1914 && (*targetm.sched.use_dfa_pipeline_interface) ()
1915 && recog_memoized (next) >= 0
1916 && min_insn_conflict_delay (curr_state, next,
1917 next) > 3)
1918 || ((!targetm.sched.use_dfa_pipeline_interface
1919 || !(*targetm.sched.use_dfa_pipeline_interface) ())
1920 && insn_issue_delay (next) > 3)
1921 || !check_live (next, INSN_BB (next))
1922 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1923 return 0;
1924 return 1;
1927 /* Return a string that contains the insn uid and optionally anything else
1928 necessary to identify this insn in an output. It's valid to use a
1929 static buffer for this. The ALIGNED parameter should cause the string
1930 to be formatted so that multiple output lines will line up nicely. */
1932 static const char *
1933 rgn_print_insn (insn, aligned)
1934 rtx insn;
1935 int aligned;
1937 static char tmp[80];
1939 if (aligned)
1940 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1941 else
1943 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1944 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1945 else
1946 sprintf (tmp, "%d", INSN_UID (insn));
1948 return tmp;
1951 /* Compare priority of two insns. Return a positive number if the second
1952 insn is to be preferred for scheduling, and a negative one if the first
1953 is to be preferred. Zero if they are equally good. */
1955 static int
1956 rgn_rank (insn1, insn2)
1957 rtx insn1, insn2;
1959 /* Some comparison make sense in interblock scheduling only. */
1960 if (INSN_BB (insn1) != INSN_BB (insn2))
1962 int spec_val, prob_val;
1964 /* Prefer an inblock motion on an interblock motion. */
1965 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1966 return 1;
1967 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1968 return -1;
1970 /* Prefer a useful motion on a speculative one. */
1971 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1972 if (spec_val)
1973 return spec_val;
1975 /* Prefer a more probable (speculative) insn. */
1976 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1977 if (prob_val)
1978 return prob_val;
1980 return 0;
1983 /* NEXT is an instruction that depends on INSN (a backward dependence);
1984 return nonzero if we should include this dependence in priority
1985 calculations. */
1987 static int
1988 contributes_to_priority (next, insn)
1989 rtx next, insn;
1991 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1994 /* INSN is a JUMP_INSN. Store the set of registers that must be considered
1995 to be set by this jump in SET. */
1997 static void
1998 compute_jump_reg_dependencies (insn, set)
1999 rtx insn ATTRIBUTE_UNUSED;
2000 regset set ATTRIBUTE_UNUSED;
2002 /* Nothing to do here, since we postprocess jumps in
2003 add_branch_dependences. */
2006 /* Used in schedule_insns to initialize current_sched_info for scheduling
2007 regions (or single basic blocks). */
2009 static struct sched_info region_sched_info =
2011 init_ready_list,
2012 can_schedule_ready_p,
2013 schedule_more_p,
2014 new_ready,
2015 rgn_rank,
2016 rgn_print_insn,
2017 contributes_to_priority,
2018 compute_jump_reg_dependencies,
2020 NULL, NULL,
2021 NULL, NULL,
2022 0, 0
2025 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2027 static bool
2028 sets_likely_spilled (pat)
2029 rtx pat;
2031 bool ret = false;
2032 note_stores (pat, sets_likely_spilled_1, &ret);
2033 return ret;
2036 static void
2037 sets_likely_spilled_1 (x, pat, data)
2038 rtx x, pat;
2039 void *data;
2041 bool *ret = (bool *) data;
2043 if (GET_CODE (pat) == SET
2044 && REG_P (x)
2045 && REGNO (x) < FIRST_PSEUDO_REGISTER
2046 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2047 *ret = true;
2050 /* Add dependences so that branches are scheduled to run last in their
2051 block. */
2053 static void
2054 add_branch_dependences (head, tail)
2055 rtx head, tail;
2057 rtx insn, last;
2059 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2060 that can throw exceptions, force them to remain in order at the end of
2061 the block by adding dependencies and giving the last a high priority.
2062 There may be notes present, and prev_head may also be a note.
2064 Branches must obviously remain at the end. Calls should remain at the
2065 end since moving them results in worse register allocation. Uses remain
2066 at the end to ensure proper register allocation.
2068 cc0 setters remaim at the end because they can't be moved away from
2069 their cc0 user.
2071 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2072 are not moved before reload because we can wind up with register
2073 allocation failures. */
2075 insn = tail;
2076 last = 0;
2077 while (GET_CODE (insn) == CALL_INSN
2078 || GET_CODE (insn) == JUMP_INSN
2079 || (GET_CODE (insn) == INSN
2080 && (GET_CODE (PATTERN (insn)) == USE
2081 || GET_CODE (PATTERN (insn)) == CLOBBER
2082 || can_throw_internal (insn)
2083 #ifdef HAVE_cc0
2084 || sets_cc0_p (PATTERN (insn))
2085 #endif
2086 || (!reload_completed
2087 && sets_likely_spilled (PATTERN (insn)))))
2088 || GET_CODE (insn) == NOTE)
2090 if (GET_CODE (insn) != NOTE)
2092 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2094 add_dependence (last, insn, REG_DEP_ANTI);
2095 INSN_REF_COUNT (insn)++;
2098 CANT_MOVE (insn) = 1;
2100 last = insn;
2103 /* Don't overrun the bounds of the basic block. */
2104 if (insn == head)
2105 break;
2107 insn = PREV_INSN (insn);
2110 /* Make sure these insns are scheduled last in their block. */
2111 insn = last;
2112 if (insn != 0)
2113 while (insn != head)
2115 insn = prev_nonnote_insn (insn);
2117 if (INSN_REF_COUNT (insn) != 0)
2118 continue;
2120 add_dependence (last, insn, REG_DEP_ANTI);
2121 INSN_REF_COUNT (insn) = 1;
2125 /* Data structures for the computation of data dependences in a regions. We
2126 keep one `deps' structure for every basic block. Before analyzing the
2127 data dependences for a bb, its variables are initialized as a function of
2128 the variables of its predecessors. When the analysis for a bb completes,
2129 we save the contents to the corresponding bb_deps[bb] variable. */
2131 static struct deps *bb_deps;
2133 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2135 static rtx
2136 concat_INSN_LIST (copy, old)
2137 rtx copy, old;
2139 rtx new = old;
2140 for (; copy ; copy = XEXP (copy, 1))
2141 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2142 return new;
2145 static void
2146 concat_insn_mem_list (copy_insns, copy_mems, old_insns_p, old_mems_p)
2147 rtx copy_insns, copy_mems;
2148 rtx *old_insns_p, *old_mems_p;
2150 rtx new_insns = *old_insns_p;
2151 rtx new_mems = *old_mems_p;
2153 while (copy_insns)
2155 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2156 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2157 copy_insns = XEXP (copy_insns, 1);
2158 copy_mems = XEXP (copy_mems, 1);
2161 *old_insns_p = new_insns;
2162 *old_mems_p = new_mems;
2165 /* After computing the dependencies for block BB, propagate the dependencies
2166 found in TMP_DEPS to the successors of the block. */
2167 static void
2168 propagate_deps (bb, pred_deps)
2169 int bb;
2170 struct deps *pred_deps;
2172 int b = BB_TO_BLOCK (bb);
2173 int e, first_edge;
2175 /* bb's structures are inherited by its successors. */
2176 first_edge = e = OUT_EDGES (b);
2177 if (e > 0)
2180 int b_succ = TO_BLOCK (e);
2181 int bb_succ = BLOCK_TO_BB (b_succ);
2182 struct deps *succ_deps = bb_deps + bb_succ;
2183 int reg;
2185 /* Only bbs "below" bb, in the same region, are interesting. */
2186 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2187 || bb_succ <= bb)
2189 e = NEXT_OUT (e);
2190 continue;
2193 /* The reg_last lists are inherited by bb_succ. */
2194 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2196 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2197 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2199 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2200 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2201 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2202 succ_rl->clobbers);
2203 succ_rl->uses_length += pred_rl->uses_length;
2204 succ_rl->clobbers_length += pred_rl->clobbers_length;
2206 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2208 /* Mem read/write lists are inherited by bb_succ. */
2209 concat_insn_mem_list (pred_deps->pending_read_insns,
2210 pred_deps->pending_read_mems,
2211 &succ_deps->pending_read_insns,
2212 &succ_deps->pending_read_mems);
2213 concat_insn_mem_list (pred_deps->pending_write_insns,
2214 pred_deps->pending_write_mems,
2215 &succ_deps->pending_write_insns,
2216 &succ_deps->pending_write_mems);
2218 succ_deps->last_pending_memory_flush
2219 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2220 succ_deps->last_pending_memory_flush);
2222 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2223 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2225 /* last_function_call is inherited by bb_succ. */
2226 succ_deps->last_function_call
2227 = concat_INSN_LIST (pred_deps->last_function_call,
2228 succ_deps->last_function_call);
2230 /* sched_before_next_call is inherited by bb_succ. */
2231 succ_deps->sched_before_next_call
2232 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2233 succ_deps->sched_before_next_call);
2235 e = NEXT_OUT (e);
2237 while (e != first_edge);
2239 /* These lists should point to the right place, for correct
2240 freeing later. */
2241 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2242 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2243 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2244 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2246 /* Can't allow these to be freed twice. */
2247 pred_deps->pending_read_insns = 0;
2248 pred_deps->pending_read_mems = 0;
2249 pred_deps->pending_write_insns = 0;
2250 pred_deps->pending_write_mems = 0;
2253 /* Compute backward dependences inside bb. In a multiple blocks region:
2254 (1) a bb is analyzed after its predecessors, and (2) the lists in
2255 effect at the end of bb (after analyzing for bb) are inherited by
2256 bb's successors.
2258 Specifically for reg-reg data dependences, the block insns are
2259 scanned by sched_analyze () top-to-bottom. Two lists are
2260 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2261 and reg_last[].uses for register USEs.
2263 When analysis is completed for bb, we update for its successors:
2264 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2265 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2267 The mechanism for computing mem-mem data dependence is very
2268 similar, and the result is interblock dependences in the region. */
2270 static void
2271 compute_block_backward_dependences (bb)
2272 int bb;
2274 rtx head, tail;
2275 struct deps tmp_deps;
2277 tmp_deps = bb_deps[bb];
2279 /* Do the analysis for this block. */
2280 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2281 sched_analyze (&tmp_deps, head, tail);
2282 add_branch_dependences (head, tail);
2284 if (current_nr_blocks > 1)
2285 propagate_deps (bb, &tmp_deps);
2287 /* Free up the INSN_LISTs. */
2288 free_deps (&tmp_deps);
2291 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2292 them to the unused_*_list variables, so that they can be reused. */
2294 static void
2295 free_pending_lists ()
2297 int bb;
2299 for (bb = 0; bb < current_nr_blocks; bb++)
2301 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2302 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2303 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2304 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2308 /* Print dependences for debugging, callable from debugger. */
2310 void
2311 debug_dependencies ()
2313 int bb;
2315 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2316 for (bb = 0; bb < current_nr_blocks; bb++)
2318 if (1)
2320 rtx head, tail;
2321 rtx next_tail;
2322 rtx insn;
2324 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2325 next_tail = NEXT_INSN (tail);
2326 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2327 BB_TO_BLOCK (bb), bb);
2329 if (targetm.sched.use_dfa_pipeline_interface
2330 && (*targetm.sched.use_dfa_pipeline_interface) ())
2332 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2333 "insn", "code", "bb", "dep", "prio", "cost",
2334 "reservation");
2335 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2336 "----", "----", "--", "---", "----", "----",
2337 "-----------");
2339 else
2341 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2342 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
2343 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
2344 "----", "----", "--", "---", "----", "----", "--------", "-----");
2347 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2349 rtx link;
2351 if (! INSN_P (insn))
2353 int n;
2354 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2355 if (GET_CODE (insn) == NOTE)
2357 n = NOTE_LINE_NUMBER (insn);
2358 if (n < 0)
2359 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2360 else
2361 fprintf (sched_dump, "line %d, file %s\n", n,
2362 NOTE_SOURCE_FILE (insn));
2364 else
2365 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2366 continue;
2369 if (targetm.sched.use_dfa_pipeline_interface
2370 && (*targetm.sched.use_dfa_pipeline_interface) ())
2372 fprintf (sched_dump,
2373 ";; %s%5d%6d%6d%6d%6d%6d ",
2374 (SCHED_GROUP_P (insn) ? "+" : " "),
2375 INSN_UID (insn),
2376 INSN_CODE (insn),
2377 INSN_BB (insn),
2378 INSN_DEP_COUNT (insn),
2379 INSN_PRIORITY (insn),
2380 insn_cost (insn, 0, 0));
2382 if (recog_memoized (insn) < 0)
2383 fprintf (sched_dump, "nothing");
2384 else
2385 print_reservation (sched_dump, insn);
2387 else
2389 int unit = insn_unit (insn);
2390 int range
2391 = (unit < 0
2392 || function_units[unit].blockage_range_function == 0
2394 : function_units[unit].blockage_range_function (insn));
2395 fprintf (sched_dump,
2396 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
2397 (SCHED_GROUP_P (insn) ? "+" : " "),
2398 INSN_UID (insn),
2399 INSN_CODE (insn),
2400 INSN_BB (insn),
2401 INSN_DEP_COUNT (insn),
2402 INSN_PRIORITY (insn),
2403 insn_cost (insn, 0, 0),
2404 (int) MIN_BLOCKAGE_COST (range),
2405 (int) MAX_BLOCKAGE_COST (range));
2406 insn_print_units (insn);
2409 fprintf (sched_dump, "\t: ");
2410 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2411 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2412 fprintf (sched_dump, "\n");
2416 fprintf (sched_dump, "\n");
2419 /* Schedule a region. A region is either an inner loop, a loop-free
2420 subroutine, or a single basic block. Each bb in the region is
2421 scheduled after its flow predecessors. */
2423 static void
2424 schedule_region (rgn)
2425 int rgn;
2427 int bb;
2428 int rgn_n_insns = 0;
2429 int sched_rgn_n_insns = 0;
2431 /* Set variables for the current region. */
2432 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2433 current_blocks = RGN_BLOCKS (rgn);
2435 init_deps_global ();
2437 /* Initializations for region data dependence analysis. */
2438 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
2439 for (bb = 0; bb < current_nr_blocks; bb++)
2440 init_deps (bb_deps + bb);
2442 /* Compute LOG_LINKS. */
2443 for (bb = 0; bb < current_nr_blocks; bb++)
2444 compute_block_backward_dependences (bb);
2446 /* Compute INSN_DEPEND. */
2447 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2449 rtx head, tail;
2450 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2452 compute_forward_dependences (head, tail);
2454 if (targetm.sched.dependencies_evaluation_hook)
2455 targetm.sched.dependencies_evaluation_hook (head, tail);
2459 /* Set priorities. */
2460 for (bb = 0; bb < current_nr_blocks; bb++)
2462 rtx head, tail;
2463 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2465 rgn_n_insns += set_priorities (head, tail);
2468 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2469 if (current_nr_blocks > 1)
2471 int i;
2473 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
2475 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2476 sbitmap_vector_zero (dom, current_nr_blocks);
2477 /* Edge to bit. */
2478 rgn_nr_edges = 0;
2479 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
2480 for (i = 1; i < nr_edges; i++)
2481 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2482 EDGE_TO_BIT (i) = rgn_nr_edges++;
2483 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
2485 rgn_nr_edges = 0;
2486 for (i = 1; i < nr_edges; i++)
2487 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2488 rgn_edges[rgn_nr_edges++] = i;
2490 /* Split edges. */
2491 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2492 sbitmap_vector_zero (pot_split, current_nr_blocks);
2493 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2494 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2496 /* Compute probabilities, dominators, split_edges. */
2497 for (bb = 0; bb < current_nr_blocks; bb++)
2498 compute_dom_prob_ps (bb);
2501 /* Now we can schedule all blocks. */
2502 for (bb = 0; bb < current_nr_blocks; bb++)
2504 rtx head, tail;
2505 int b = BB_TO_BLOCK (bb);
2507 get_block_head_tail (b, &head, &tail);
2509 if (no_real_insns_p (head, tail))
2510 continue;
2512 current_sched_info->prev_head = PREV_INSN (head);
2513 current_sched_info->next_tail = NEXT_INSN (tail);
2515 if (write_symbols != NO_DEBUG)
2517 save_line_notes (b, head, tail);
2518 rm_line_notes (head, tail);
2521 /* rm_other_notes only removes notes which are _inside_ the
2522 block---that is, it won't remove notes before the first real insn
2523 or after the last real insn of the block. So if the first insn
2524 has a REG_SAVE_NOTE which would otherwise be emitted before the
2525 insn, it is redundant with the note before the start of the
2526 block, and so we have to take it out. */
2527 if (INSN_P (head))
2529 rtx note;
2531 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2532 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2534 remove_note (head, note);
2535 note = XEXP (note, 1);
2536 remove_note (head, note);
2540 /* Remove remaining note insns from the block, save them in
2541 note_list. These notes are restored at the end of
2542 schedule_block (). */
2543 rm_other_notes (head, tail);
2545 target_bb = bb;
2547 current_sched_info->queue_must_finish_empty
2548 = current_nr_blocks > 1 && !flag_schedule_interblock;
2550 schedule_block (b, rgn_n_insns);
2551 sched_rgn_n_insns += sched_n_insns;
2553 /* Update target block boundaries. */
2554 if (head == BLOCK_HEAD (b))
2555 BLOCK_HEAD (b) = current_sched_info->head;
2556 if (tail == BLOCK_END (b))
2557 BLOCK_END (b) = current_sched_info->tail;
2559 /* Clean up. */
2560 if (current_nr_blocks > 1)
2562 free (candidate_table);
2563 free (bblst_table);
2564 free (bitlst_table);
2568 /* Sanity check: verify that all region insns were scheduled. */
2569 if (sched_rgn_n_insns != rgn_n_insns)
2570 abort ();
2572 /* Restore line notes. */
2573 if (write_symbols != NO_DEBUG)
2575 for (bb = 0; bb < current_nr_blocks; bb++)
2577 rtx head, tail;
2578 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2579 restore_line_notes (head, tail);
2583 /* Done with this region. */
2584 free_pending_lists ();
2586 finish_deps_global ();
2588 free (bb_deps);
2590 if (current_nr_blocks > 1)
2592 free (prob);
2593 sbitmap_vector_free (dom);
2594 sbitmap_vector_free (pot_split);
2595 sbitmap_vector_free (ancestor_edges);
2596 free (edge_to_bit);
2597 free (rgn_edges);
2601 /* Indexed by region, holds the number of death notes found in that region.
2602 Used for consistency checks. */
2603 static int *deaths_in_region;
2605 /* Initialize data structures for region scheduling. */
2607 static void
2608 init_regions ()
2610 sbitmap blocks;
2611 int rgn;
2613 nr_regions = 0;
2614 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
2615 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
2616 block_to_bb = (int *) xmalloc ((last_basic_block) * sizeof (int));
2617 containing_rgn = (int *) xmalloc ((last_basic_block) * sizeof (int));
2619 /* Compute regions for scheduling. */
2620 if (reload_completed
2621 || n_basic_blocks == 1
2622 || !flag_schedule_interblock)
2624 find_single_block_region ();
2626 else
2628 /* Verify that a 'good' control flow graph can be built. */
2629 if (is_cfg_nonregular ())
2631 find_single_block_region ();
2633 else
2635 dominance_info dom;
2636 struct edge_list *edge_list;
2638 /* The scheduler runs after estimate_probabilities; therefore, we
2639 can't blindly call back into find_basic_blocks since doing so
2640 could invalidate the branch probability info. We could,
2641 however, call cleanup_cfg. */
2642 edge_list = create_edge_list ();
2644 /* Compute the dominators and post dominators. */
2645 dom = calculate_dominance_info (CDI_DOMINATORS);
2647 /* build_control_flow will return nonzero if it detects unreachable
2648 blocks or any other irregularity with the cfg which prevents
2649 cross block scheduling. */
2650 if (build_control_flow (edge_list) != 0)
2651 find_single_block_region ();
2652 else
2653 find_rgns (edge_list, dom);
2655 if (sched_verbose >= 3)
2656 debug_regions ();
2658 /* We are done with flow's edge list. */
2659 free_edge_list (edge_list);
2661 /* For now. This will move as more and more of haifa is converted
2662 to using the cfg code in flow.c. */
2663 free_dominance_info (dom);
2668 if (CHECK_DEAD_NOTES)
2670 blocks = sbitmap_alloc (last_basic_block);
2671 deaths_in_region = (int *) xmalloc (sizeof (int) * nr_regions);
2672 /* Remove all death notes from the subroutine. */
2673 for (rgn = 0; rgn < nr_regions; rgn++)
2675 int b;
2677 sbitmap_zero (blocks);
2678 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2679 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2681 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2683 sbitmap_free (blocks);
2685 else
2686 count_or_remove_death_notes (NULL, 1);
2689 /* The one entry point in this file. DUMP_FILE is the dump file for
2690 this pass. */
2692 void
2693 schedule_insns (dump_file)
2694 FILE *dump_file;
2696 sbitmap large_region_blocks, blocks;
2697 int rgn;
2698 int any_large_regions;
2699 basic_block bb;
2701 /* Taking care of this degenerate case makes the rest of
2702 this code simpler. */
2703 if (n_basic_blocks == 0)
2704 return;
2706 nr_inter = 0;
2707 nr_spec = 0;
2709 sched_init (dump_file);
2711 init_regions ();
2713 current_sched_info = &region_sched_info;
2715 /* Schedule every region in the subroutine. */
2716 for (rgn = 0; rgn < nr_regions; rgn++)
2717 schedule_region (rgn);
2719 /* Update life analysis for the subroutine. Do single block regions
2720 first so that we can verify that live_at_start didn't change. Then
2721 do all other blocks. */
2722 /* ??? There is an outside possibility that update_life_info, or more
2723 to the point propagate_block, could get called with nonzero flags
2724 more than once for one basic block. This would be kinda bad if it
2725 were to happen, since REG_INFO would be accumulated twice for the
2726 block, and we'd have twice the REG_DEAD notes.
2728 I'm fairly certain that this _shouldn't_ happen, since I don't think
2729 that live_at_start should change at region heads. Not sure what the
2730 best way to test for this kind of thing... */
2732 allocate_reg_life_data ();
2733 compute_bb_for_insn ();
2735 any_large_regions = 0;
2736 large_region_blocks = sbitmap_alloc (last_basic_block);
2737 sbitmap_zero (large_region_blocks);
2738 FOR_EACH_BB (bb)
2739 SET_BIT (large_region_blocks, bb->index);
2741 blocks = sbitmap_alloc (last_basic_block);
2742 sbitmap_zero (blocks);
2744 /* Update life information. For regions consisting of multiple blocks
2745 we've possibly done interblock scheduling that affects global liveness.
2746 For regions consisting of single blocks we need to do only local
2747 liveness. */
2748 for (rgn = 0; rgn < nr_regions; rgn++)
2749 if (RGN_NR_BLOCKS (rgn) > 1)
2750 any_large_regions = 1;
2751 else
2753 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2754 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2757 /* Don't update reg info after reload, since that affects
2758 regs_ever_live, which should not change after reload. */
2759 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2760 (reload_completed ? PROP_DEATH_NOTES
2761 : PROP_DEATH_NOTES | PROP_REG_INFO));
2762 if (any_large_regions)
2764 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2765 PROP_DEATH_NOTES | PROP_REG_INFO);
2768 if (CHECK_DEAD_NOTES)
2770 /* Verify the counts of basic block notes in single the basic block
2771 regions. */
2772 for (rgn = 0; rgn < nr_regions; rgn++)
2773 if (RGN_NR_BLOCKS (rgn) == 1)
2775 sbitmap_zero (blocks);
2776 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2778 if (deaths_in_region[rgn]
2779 != count_or_remove_death_notes (blocks, 0))
2780 abort ();
2782 free (deaths_in_region);
2785 /* Reposition the prologue and epilogue notes in case we moved the
2786 prologue/epilogue insns. */
2787 if (reload_completed)
2788 reposition_prologue_and_epilogue_notes (get_insns ());
2790 /* Delete redundant line notes. */
2791 if (write_symbols != NO_DEBUG)
2792 rm_redundant_line_notes ();
2794 if (sched_verbose)
2796 if (reload_completed == 0 && flag_schedule_interblock)
2798 fprintf (sched_dump,
2799 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2800 nr_inter, nr_spec);
2802 else
2804 if (nr_inter > 0)
2805 abort ();
2807 fprintf (sched_dump, "\n\n");
2810 /* Clean up. */
2811 free (rgn_table);
2812 free (rgn_bb_table);
2813 free (block_to_bb);
2814 free (containing_rgn);
2816 sched_finish ();
2818 if (edge_table)
2820 free (edge_table);
2821 edge_table = NULL;
2824 if (in_edges)
2826 free (in_edges);
2827 in_edges = NULL;
2829 if (out_edges)
2831 free (out_edges);
2832 out_edges = NULL;
2835 sbitmap_free (blocks);
2836 sbitmap_free (large_region_blocks);
2838 #endif