* basic-block.h: Include "errors.h".
[official-gcc.git] / gcc / sched-rgn.c
bloba52a4e73d92fe8929573dbaced96f285b3d9b7dd
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 02111-1307, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "basic-block.h"
57 #include "regs.h"
58 #include "function.h"
59 #include "flags.h"
60 #include "insn-config.h"
61 #include "insn-attr.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "recog.h"
65 #include "cfglayout.h"
66 #include "params.h"
67 #include "sched-int.h"
68 #include "target.h"
70 /* Define when we want to do count REG_DEAD notes before and after scheduling
71 for sanity checking. We can't do that when conditional execution is used,
72 as REG_DEAD exist only for unconditional deaths. */
74 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
75 #define CHECK_DEAD_NOTES 1
76 #else
77 #define CHECK_DEAD_NOTES 0
78 #endif
81 #ifdef INSN_SCHEDULING
82 /* Some accessor macros for h_i_d members only used within this file. */
83 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
84 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
85 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
87 /* nr_inter/spec counts interblock/speculative motion for the function. */
88 static int nr_inter, nr_spec;
90 /* Control flow graph edges are kept in circular lists. */
91 typedef struct
93 int from_block;
94 int to_block;
95 int next_in;
96 int next_out;
98 haifa_edge;
99 static haifa_edge *edge_table;
101 #define NEXT_IN(edge) (edge_table[edge].next_in)
102 #define NEXT_OUT(edge) (edge_table[edge].next_out)
103 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
104 #define TO_BLOCK(edge) (edge_table[edge].to_block)
106 /* Number of edges in the control flow graph. (In fact, larger than
107 that by 1, since edge 0 is unused.) */
108 static int nr_edges;
110 /* Circular list of incoming/outgoing edges of a block. */
111 static int *in_edges;
112 static int *out_edges;
114 #define IN_EDGES(block) (in_edges[block])
115 #define OUT_EDGES(block) (out_edges[block])
117 static int is_cfg_nonregular (void);
118 static int build_control_flow (struct edge_list *);
119 static void new_edge (int, int);
121 /* A region is the main entity for interblock scheduling: insns
122 are allowed to move between blocks in the same region, along
123 control flow graph edges, in the 'up' direction. */
124 typedef struct
126 int rgn_nr_blocks; /* Number of blocks in region. */
127 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
129 region;
131 /* Number of regions in the procedure. */
132 static int nr_regions;
134 /* Table of region descriptions. */
135 static region *rgn_table;
137 /* Array of lists of regions' blocks. */
138 static int *rgn_bb_table;
140 /* Topological order of blocks in the region (if b2 is reachable from
141 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
142 always referred to by either block or b, while its topological
143 order name (in the region) is referred to by bb. */
144 static int *block_to_bb;
146 /* The number of the region containing a block. */
147 static int *containing_rgn;
149 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
150 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
151 #define BLOCK_TO_BB(block) (block_to_bb[block])
152 #define CONTAINING_RGN(block) (containing_rgn[block])
154 void debug_regions (void);
155 static void find_single_block_region (void);
156 static void find_rgns (struct edge_list *);
157 static bool too_large (int, int *, int *);
159 extern void debug_live (int, int);
161 /* Blocks of the current region being scheduled. */
162 static int current_nr_blocks;
163 static int current_blocks;
165 /* The mapping from bb to block. */
166 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
168 typedef struct
170 int *first_member; /* Pointer to the list start in bitlst_table. */
171 int nr_members; /* The number of members of the bit list. */
173 bitlst;
175 static int bitlst_table_last;
176 static int *bitlst_table;
178 static void extract_bitlst (sbitmap, bitlst *);
180 /* Target info declarations.
182 The block currently being scheduled is referred to as the "target" block,
183 while other blocks in the region from which insns can be moved to the
184 target are called "source" blocks. The candidate structure holds info
185 about such sources: are they valid? Speculative? Etc. */
186 typedef bitlst bblst;
187 typedef struct
189 char is_valid;
190 char is_speculative;
191 int src_prob;
192 bblst split_bbs;
193 bblst update_bbs;
195 candidate;
197 static candidate *candidate_table;
199 /* A speculative motion requires checking live information on the path
200 from 'source' to 'target'. The split blocks are those to be checked.
201 After a speculative motion, live information should be modified in
202 the 'update' blocks.
204 Lists of split and update blocks for each candidate of the current
205 target are in array bblst_table. */
206 static int *bblst_table, bblst_size, bblst_last;
208 #define IS_VALID(src) ( candidate_table[src].is_valid )
209 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
210 #define SRC_PROB(src) ( candidate_table[src].src_prob )
212 /* The bb being currently scheduled. */
213 static int target_bb;
215 /* List of edges. */
216 typedef bitlst edgelst;
218 /* Target info functions. */
219 static void split_edges (int, int, edgelst *);
220 static void compute_trg_info (int);
221 void debug_candidate (int);
222 void debug_candidates (int);
224 /* Dominators array: dom[i] contains the sbitmap of dominators of
225 bb i in the region. */
226 static sbitmap *dom;
228 /* bb 0 is the only region entry. */
229 #define IS_RGN_ENTRY(bb) (!bb)
231 /* Is bb_src dominated by bb_trg. */
232 #define IS_DOMINATED(bb_src, bb_trg) \
233 ( TEST_BIT (dom[bb_src], bb_trg) )
235 /* Probability: Prob[i] is a float in [0, 1] which is the probability
236 of bb i relative to the region entry. */
237 static float *prob;
239 /* The probability of bb_src, relative to bb_trg. Note, that while the
240 'prob[bb]' is a float in [0, 1], this macro returns an integer
241 in [0, 100]. */
242 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
243 prob[bb_trg])))
245 /* Bit-set of edges, where bit i stands for edge i. */
246 typedef sbitmap edgeset;
248 /* Number of edges in the region. */
249 static int rgn_nr_edges;
251 /* Array of size rgn_nr_edges. */
252 static int *rgn_edges;
255 /* Mapping from each edge in the graph to its number in the rgn. */
256 static int *edge_to_bit;
257 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
259 /* The split edges of a source bb is different for each target
260 bb. In order to compute this efficiently, the 'potential-split edges'
261 are computed for each bb prior to scheduling a region. This is actually
262 the split edges of each bb relative to the region entry.
264 pot_split[bb] is the set of potential split edges of bb. */
265 static edgeset *pot_split;
267 /* For every bb, a set of its ancestor edges. */
268 static edgeset *ancestor_edges;
270 static void compute_dom_prob_ps (int);
272 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
273 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
274 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
276 /* Parameters affecting the decision of rank_for_schedule().
277 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
278 #define MIN_PROBABILITY 40
280 /* Speculative scheduling functions. */
281 static int check_live_1 (int, rtx);
282 static void update_live_1 (int, rtx);
283 static int check_live (rtx, int);
284 static void update_live (rtx, int);
285 static void set_spec_fed (rtx);
286 static int is_pfree (rtx, int, int);
287 static int find_conditional_protection (rtx, int);
288 static int is_conditionally_protected (rtx, int, int);
289 static int is_prisky (rtx, int, int);
290 static int is_exception_free (rtx, int, int);
292 static bool sets_likely_spilled (rtx);
293 static void sets_likely_spilled_1 (rtx, rtx, void *);
294 static void add_branch_dependences (rtx, rtx);
295 static void compute_block_backward_dependences (int);
296 void debug_dependencies (void);
298 static void init_regions (void);
299 static void schedule_region (int);
300 static rtx concat_INSN_LIST (rtx, rtx);
301 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
302 static void propagate_deps (int, struct deps *);
303 static void free_pending_lists (void);
305 /* Functions for construction of the control flow graph. */
307 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
309 We decide not to build the control flow graph if there is possibly more
310 than one entry to the function, if computed branches exist, of if we
311 have nonlocal gotos. */
313 static int
314 is_cfg_nonregular (void)
316 basic_block b;
317 rtx insn;
318 RTX_CODE code;
320 /* If we have a label that could be the target of a nonlocal goto, then
321 the cfg is not well structured. */
322 if (nonlocal_goto_handler_labels)
323 return 1;
325 /* If we have any forced labels, then the cfg is not well structured. */
326 if (forced_labels)
327 return 1;
329 /* If this function has a computed jump, then we consider the cfg
330 not well structured. */
331 if (current_function_has_computed_jump)
332 return 1;
334 /* If we have exception handlers, then we consider the cfg not well
335 structured. ?!? We should be able to handle this now that flow.c
336 computes an accurate cfg for EH. */
337 if (current_function_has_exception_handlers ())
338 return 1;
340 /* If we have non-jumping insns which refer to labels, then we consider
341 the cfg not well structured. */
342 /* Check for labels referred to other thn by jumps. */
343 FOR_EACH_BB (b)
344 for (insn = BB_HEAD (b); ; insn = NEXT_INSN (insn))
346 code = GET_CODE (insn);
347 if (INSN_P (insn) && code != JUMP_INSN)
349 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
351 if (note
352 && ! (JUMP_P (NEXT_INSN (insn))
353 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
354 XEXP (note, 0))))
355 return 1;
358 if (insn == BB_END (b))
359 break;
362 /* All the tests passed. Consider the cfg well structured. */
363 return 0;
366 /* Build the control flow graph and set nr_edges.
368 Instead of trying to build a cfg ourselves, we rely on flow to
369 do it for us. Stamp out useless code (and bug) duplication.
371 Return nonzero if an irregularity in the cfg is found which would
372 prevent cross block scheduling. */
374 static int
375 build_control_flow (struct edge_list *edge_list)
377 int i, unreachable, num_edges;
378 basic_block b;
380 /* This already accounts for entry/exit edges. */
381 num_edges = NUM_EDGES (edge_list);
383 /* Unreachable loops with more than one basic block are detected
384 during the DFS traversal in find_rgns.
386 Unreachable loops with a single block are detected here. This
387 test is redundant with the one in find_rgns, but it's much
388 cheaper to go ahead and catch the trivial case here. */
389 unreachable = 0;
390 FOR_EACH_BB (b)
392 if (EDGE_COUNT (b->preds) == 0
393 || (EDGE_PRED (b, 0)->src == b
394 && EDGE_COUNT (b->preds) == 1))
395 unreachable = 1;
398 /* ??? We can kill these soon. */
399 in_edges = xcalloc (last_basic_block, sizeof (int));
400 out_edges = xcalloc (last_basic_block, sizeof (int));
401 edge_table = xcalloc (num_edges, sizeof (haifa_edge));
403 nr_edges = 0;
404 for (i = 0; i < num_edges; i++)
406 edge e = INDEX_EDGE (edge_list, i);
408 if (e->dest != EXIT_BLOCK_PTR
409 && e->src != ENTRY_BLOCK_PTR)
410 new_edge (e->src->index, e->dest->index);
413 /* Increment by 1, since edge 0 is unused. */
414 nr_edges++;
416 return unreachable;
419 /* Record an edge in the control flow graph from SOURCE to TARGET.
421 In theory, this is redundant with the s_succs computed above, but
422 we have not converted all of haifa to use information from the
423 integer lists. */
425 static void
426 new_edge (int source, int target)
428 int e, next_edge;
429 int curr_edge, fst_edge;
431 /* Check for duplicates. */
432 fst_edge = curr_edge = OUT_EDGES (source);
433 while (curr_edge)
435 if (FROM_BLOCK (curr_edge) == source
436 && TO_BLOCK (curr_edge) == target)
438 return;
441 curr_edge = NEXT_OUT (curr_edge);
443 if (fst_edge == curr_edge)
444 break;
447 e = ++nr_edges;
449 FROM_BLOCK (e) = source;
450 TO_BLOCK (e) = target;
452 if (OUT_EDGES (source))
454 next_edge = NEXT_OUT (OUT_EDGES (source));
455 NEXT_OUT (OUT_EDGES (source)) = e;
456 NEXT_OUT (e) = next_edge;
458 else
460 OUT_EDGES (source) = e;
461 NEXT_OUT (e) = e;
464 if (IN_EDGES (target))
466 next_edge = NEXT_IN (IN_EDGES (target));
467 NEXT_IN (IN_EDGES (target)) = e;
468 NEXT_IN (e) = next_edge;
470 else
472 IN_EDGES (target) = e;
473 NEXT_IN (e) = e;
477 /* Translate a bit-set SET to a list BL of the bit-set members. */
479 static void
480 extract_bitlst (sbitmap set, bitlst *bl)
482 int i;
484 /* bblst table space is reused in each call to extract_bitlst. */
485 bitlst_table_last = 0;
487 bl->first_member = &bitlst_table[bitlst_table_last];
488 bl->nr_members = 0;
490 /* Iterate over each word in the bitset. */
491 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i,
493 bitlst_table[bitlst_table_last++] = i;
494 (bl->nr_members)++;
499 /* Functions for the construction of regions. */
501 /* Print the regions, for debugging purposes. Callable from debugger. */
503 void
504 debug_regions (void)
506 int rgn, bb;
508 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
509 for (rgn = 0; rgn < nr_regions; rgn++)
511 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
512 rgn_table[rgn].rgn_nr_blocks);
513 fprintf (sched_dump, ";;\tbb/block: ");
515 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
517 current_blocks = RGN_BLOCKS (rgn);
519 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
520 abort ();
522 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
525 fprintf (sched_dump, "\n\n");
529 /* Build a single block region for each basic block in the function.
530 This allows for using the same code for interblock and basic block
531 scheduling. */
533 static void
534 find_single_block_region (void)
536 basic_block bb;
538 nr_regions = 0;
540 FOR_EACH_BB (bb)
542 rgn_bb_table[nr_regions] = bb->index;
543 RGN_NR_BLOCKS (nr_regions) = 1;
544 RGN_BLOCKS (nr_regions) = nr_regions;
545 CONTAINING_RGN (bb->index) = nr_regions;
546 BLOCK_TO_BB (bb->index) = 0;
547 nr_regions++;
551 /* Update number of blocks and the estimate for number of insns
552 in the region. Return true if the region is "too large" for interblock
553 scheduling (compile time considerations). */
555 static bool
556 too_large (int block, int *num_bbs, int *num_insns)
558 (*num_bbs)++;
559 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
560 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
562 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
563 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
566 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
567 is still an inner loop. Put in max_hdr[blk] the header of the most inner
568 loop containing blk. */
569 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
571 if (max_hdr[blk] == -1) \
572 max_hdr[blk] = hdr; \
573 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
574 RESET_BIT (inner, hdr); \
575 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
577 RESET_BIT (inner,max_hdr[blk]); \
578 max_hdr[blk] = hdr; \
582 /* Find regions for interblock scheduling.
584 A region for scheduling can be:
586 * A loop-free procedure, or
588 * A reducible inner loop, or
590 * A basic block not contained in any other region.
592 ?!? In theory we could build other regions based on extended basic
593 blocks or reverse extended basic blocks. Is it worth the trouble?
595 Loop blocks that form a region are put into the region's block list
596 in topological order.
598 This procedure stores its results into the following global (ick) variables
600 * rgn_nr
601 * rgn_table
602 * rgn_bb_table
603 * block_to_bb
604 * containing region
606 We use dominator relationships to avoid making regions out of non-reducible
607 loops.
609 This procedure needs to be converted to work on pred/succ lists instead
610 of edge tables. That would simplify it somewhat. */
612 static void
613 find_rgns (struct edge_list *edge_list)
615 int *max_hdr, *dfs_nr, *stack, *degree;
616 char no_loops = 1;
617 int node, child, loop_head, i, head, tail;
618 int count = 0, sp, idx = 0;
619 int current_edge = out_edges[EDGE_SUCC (ENTRY_BLOCK_PTR, 0)->dest->index];
620 int num_bbs, num_insns, unreachable;
621 int too_large_failure;
622 basic_block bb;
624 /* Note if an edge has been passed. */
625 sbitmap passed;
627 /* Note if a block is a natural loop header. */
628 sbitmap header;
630 /* Note if a block is a natural inner loop header. */
631 sbitmap inner;
633 /* Note if a block is in the block queue. */
634 sbitmap in_queue;
636 /* Note if a block is in the block queue. */
637 sbitmap in_stack;
639 int num_edges = NUM_EDGES (edge_list);
641 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
642 and a mapping from block to its loop header (if the block is contained
643 in a loop, else -1).
645 Store results in HEADER, INNER, and MAX_HDR respectively, these will
646 be used as inputs to the second traversal.
648 STACK, SP and DFS_NR are only used during the first traversal. */
650 /* Allocate and initialize variables for the first traversal. */
651 max_hdr = xmalloc (last_basic_block * sizeof (int));
652 dfs_nr = xcalloc (last_basic_block, sizeof (int));
653 stack = xmalloc (nr_edges * sizeof (int));
655 inner = sbitmap_alloc (last_basic_block);
656 sbitmap_ones (inner);
658 header = sbitmap_alloc (last_basic_block);
659 sbitmap_zero (header);
661 passed = sbitmap_alloc (nr_edges);
662 sbitmap_zero (passed);
664 in_queue = sbitmap_alloc (last_basic_block);
665 sbitmap_zero (in_queue);
667 in_stack = sbitmap_alloc (last_basic_block);
668 sbitmap_zero (in_stack);
670 for (i = 0; i < last_basic_block; i++)
671 max_hdr[i] = -1;
673 /* DFS traversal to find inner loops in the cfg. */
675 sp = -1;
676 while (1)
678 if (current_edge == 0 || TEST_BIT (passed, current_edge))
680 /* We have reached a leaf node or a node that was already
681 processed. Pop edges off the stack until we find
682 an edge that has not yet been processed. */
683 while (sp >= 0
684 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
686 /* Pop entry off the stack. */
687 current_edge = stack[sp--];
688 node = FROM_BLOCK (current_edge);
689 child = TO_BLOCK (current_edge);
690 RESET_BIT (in_stack, child);
691 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
692 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
693 current_edge = NEXT_OUT (current_edge);
696 /* See if have finished the DFS tree traversal. */
697 if (sp < 0 && TEST_BIT (passed, current_edge))
698 break;
700 /* Nope, continue the traversal with the popped node. */
701 continue;
704 /* Process a node. */
705 node = FROM_BLOCK (current_edge);
706 child = TO_BLOCK (current_edge);
707 SET_BIT (in_stack, node);
708 dfs_nr[node] = ++count;
710 /* If the successor is in the stack, then we've found a loop.
711 Mark the loop, if it is not a natural loop, then it will
712 be rejected during the second traversal. */
713 if (TEST_BIT (in_stack, child))
715 no_loops = 0;
716 SET_BIT (header, child);
717 UPDATE_LOOP_RELATIONS (node, child);
718 SET_BIT (passed, current_edge);
719 current_edge = NEXT_OUT (current_edge);
720 continue;
723 /* If the child was already visited, then there is no need to visit
724 it again. Just update the loop relationships and restart
725 with a new edge. */
726 if (dfs_nr[child])
728 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
729 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
730 SET_BIT (passed, current_edge);
731 current_edge = NEXT_OUT (current_edge);
732 continue;
735 /* Push an entry on the stack and continue DFS traversal. */
736 stack[++sp] = current_edge;
737 SET_BIT (passed, current_edge);
738 current_edge = OUT_EDGES (child);
740 /* This is temporary until haifa is converted to use rth's new
741 cfg routines which have true entry/exit blocks and the
742 appropriate edges from/to those blocks.
744 Generally we update dfs_nr for a node when we process its
745 out edge. However, if the node has no out edge then we will
746 not set dfs_nr for that node. This can confuse the scheduler
747 into thinking that we have unreachable blocks, which in turn
748 disables cross block scheduling.
750 So, if we have a node with no out edges, go ahead and mark it
751 as reachable now. */
752 if (current_edge == 0)
753 dfs_nr[child] = ++count;
756 /* Another check for unreachable blocks. The earlier test in
757 is_cfg_nonregular only finds unreachable blocks that do not
758 form a loop.
760 The DFS traversal will mark every block that is reachable from
761 the entry node by placing a nonzero value in dfs_nr. Thus if
762 dfs_nr is zero for any block, then it must be unreachable. */
763 unreachable = 0;
764 FOR_EACH_BB (bb)
765 if (dfs_nr[bb->index] == 0)
767 unreachable = 1;
768 break;
771 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
772 to hold degree counts. */
773 degree = dfs_nr;
775 FOR_EACH_BB (bb)
776 degree[bb->index] = 0;
777 for (i = 0; i < num_edges; i++)
779 edge e = INDEX_EDGE (edge_list, i);
781 if (e->dest != EXIT_BLOCK_PTR)
782 degree[e->dest->index]++;
785 /* Do not perform region scheduling if there are any unreachable
786 blocks. */
787 if (!unreachable)
789 int *queue;
791 if (no_loops)
792 SET_BIT (header, 0);
794 /* Second traversal:find reducible inner loops and topologically sort
795 block of each region. */
797 queue = xmalloc (n_basic_blocks * sizeof (int));
799 /* Find blocks which are inner loop headers. We still have non-reducible
800 loops to consider at this point. */
801 FOR_EACH_BB (bb)
803 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
805 edge e;
806 basic_block jbb;
808 /* Now check that the loop is reducible. We do this separate
809 from finding inner loops so that we do not find a reducible
810 loop which contains an inner non-reducible loop.
812 A simple way to find reducible/natural loops is to verify
813 that each block in the loop is dominated by the loop
814 header.
816 If there exists a block that is not dominated by the loop
817 header, then the block is reachable from outside the loop
818 and thus the loop is not a natural loop. */
819 FOR_EACH_BB (jbb)
821 /* First identify blocks in the loop, except for the loop
822 entry block. */
823 if (bb->index == max_hdr[jbb->index] && bb != jbb)
825 /* Now verify that the block is dominated by the loop
826 header. */
827 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
828 break;
832 /* If we exited the loop early, then I is the header of
833 a non-reducible loop and we should quit processing it
834 now. */
835 if (jbb != EXIT_BLOCK_PTR)
836 continue;
838 /* I is a header of an inner loop, or block 0 in a subroutine
839 with no loops at all. */
840 head = tail = -1;
841 too_large_failure = 0;
842 loop_head = max_hdr[bb->index];
844 /* Decrease degree of all I's successors for topological
845 ordering. */
847 FOR_EACH_EDGE (e, bb->succs)
849 if (e->dest != EXIT_BLOCK_PTR)
850 --degree[e->dest->index];
852 END_FOR_EACH_EDGE;
854 /* Estimate # insns, and count # blocks in the region. */
855 num_bbs = 1;
856 num_insns = (INSN_LUID (BB_END (bb))
857 - INSN_LUID (BB_HEAD (bb)));
859 /* Find all loop latches (blocks with back edges to the loop
860 header) or all the leaf blocks in the cfg has no loops.
862 Place those blocks into the queue. */
863 if (no_loops)
865 FOR_EACH_BB (jbb)
866 /* Leaf nodes have only a single successor which must
867 be EXIT_BLOCK. */
868 if (EDGE_COUNT (jbb->succs) == 1
869 && EDGE_SUCC (jbb, 0)->dest == EXIT_BLOCK_PTR)
871 queue[++tail] = jbb->index;
872 SET_BIT (in_queue, jbb->index);
874 if (too_large (jbb->index, &num_bbs, &num_insns))
876 too_large_failure = 1;
877 break;
881 else
883 edge e;
885 FOR_EACH_EDGE (e, bb->preds)
887 if (e->src == ENTRY_BLOCK_PTR)
888 continue;
890 node = e->src->index;
892 if (max_hdr[node] == loop_head && node != bb->index)
894 /* This is a loop latch. */
895 queue[++tail] = node;
896 SET_BIT (in_queue, node);
898 if (too_large (node, &num_bbs, &num_insns))
900 too_large_failure = 1;
901 break;
905 END_FOR_EACH_EDGE;
908 /* Now add all the blocks in the loop to the queue.
910 We know the loop is a natural loop; however the algorithm
911 above will not always mark certain blocks as being in the
912 loop. Consider:
913 node children
914 a b,c
916 c a,d
919 The algorithm in the DFS traversal may not mark B & D as part
920 of the loop (ie they will not have max_hdr set to A).
922 We know they can not be loop latches (else they would have
923 had max_hdr set since they'd have a backedge to a dominator
924 block). So we don't need them on the initial queue.
926 We know they are part of the loop because they are dominated
927 by the loop header and can be reached by a backwards walk of
928 the edges starting with nodes on the initial queue.
930 It is safe and desirable to include those nodes in the
931 loop/scheduling region. To do so we would need to decrease
932 the degree of a node if it is the target of a backedge
933 within the loop itself as the node is placed in the queue.
935 We do not do this because I'm not sure that the actual
936 scheduling code will properly handle this case. ?!? */
938 while (head < tail && !too_large_failure)
940 edge e;
941 child = queue[++head];
943 FOR_EACH_EDGE (e, BASIC_BLOCK (child)->preds)
945 node = e->src->index;
947 /* See discussion above about nodes not marked as in
948 this loop during the initial DFS traversal. */
949 if (e->src == ENTRY_BLOCK_PTR
950 || max_hdr[node] != loop_head)
952 tail = -1;
953 break;
955 else if (!TEST_BIT (in_queue, node) && node != bb->index)
957 queue[++tail] = node;
958 SET_BIT (in_queue, node);
960 if (too_large (node, &num_bbs, &num_insns))
962 too_large_failure = 1;
963 break;
967 END_FOR_EACH_EDGE;
970 if (tail >= 0 && !too_large_failure)
972 /* Place the loop header into list of region blocks. */
973 degree[bb->index] = -1;
974 rgn_bb_table[idx] = bb->index;
975 RGN_NR_BLOCKS (nr_regions) = num_bbs;
976 RGN_BLOCKS (nr_regions) = idx++;
977 CONTAINING_RGN (bb->index) = nr_regions;
978 BLOCK_TO_BB (bb->index) = count = 0;
980 /* Remove blocks from queue[] when their in degree
981 becomes zero. Repeat until no blocks are left on the
982 list. This produces a topological list of blocks in
983 the region. */
984 while (tail >= 0)
986 if (head < 0)
987 head = tail;
988 child = queue[head];
989 if (degree[child] == 0)
991 edge e;
993 degree[child] = -1;
994 rgn_bb_table[idx++] = child;
995 BLOCK_TO_BB (child) = ++count;
996 CONTAINING_RGN (child) = nr_regions;
997 queue[head] = queue[tail--];
999 FOR_EACH_EDGE (e, BASIC_BLOCK (child)->succs)
1001 if (e->dest != EXIT_BLOCK_PTR)
1002 --degree[e->dest->index];
1004 END_FOR_EACH_EDGE;
1006 else
1007 --head;
1009 ++nr_regions;
1013 free (queue);
1016 /* Any block that did not end up in a region is placed into a region
1017 by itself. */
1018 FOR_EACH_BB (bb)
1019 if (degree[bb->index] >= 0)
1021 rgn_bb_table[idx] = bb->index;
1022 RGN_NR_BLOCKS (nr_regions) = 1;
1023 RGN_BLOCKS (nr_regions) = idx++;
1024 CONTAINING_RGN (bb->index) = nr_regions++;
1025 BLOCK_TO_BB (bb->index) = 0;
1028 free (max_hdr);
1029 free (dfs_nr);
1030 free (stack);
1031 sbitmap_free (passed);
1032 sbitmap_free (header);
1033 sbitmap_free (inner);
1034 sbitmap_free (in_queue);
1035 sbitmap_free (in_stack);
1038 /* Functions for regions scheduling information. */
1040 /* Compute dominators, probability, and potential-split-edges of bb.
1041 Assume that these values were already computed for bb's predecessors. */
1043 static void
1044 compute_dom_prob_ps (int bb)
1046 int nxt_in_edge, fst_in_edge, pred;
1047 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1049 prob[bb] = 0.0;
1050 if (IS_RGN_ENTRY (bb))
1052 SET_BIT (dom[bb], 0);
1053 prob[bb] = 1.0;
1054 return;
1057 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1059 /* Initialize dom[bb] to '111..1'. */
1060 sbitmap_ones (dom[bb]);
1064 pred = FROM_BLOCK (nxt_in_edge);
1065 sbitmap_a_and_b (dom[bb], dom[bb], dom[BLOCK_TO_BB (pred)]);
1066 sbitmap_a_or_b (ancestor_edges[bb], ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)]);
1068 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge));
1070 nr_out_edges = 1;
1071 nr_rgn_out_edges = 0;
1072 fst_out_edge = OUT_EDGES (pred);
1073 nxt_out_edge = NEXT_OUT (fst_out_edge);
1075 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[BLOCK_TO_BB (pred)]);
1077 SET_BIT (pot_split[bb], EDGE_TO_BIT (fst_out_edge));
1079 /* The successor doesn't belong in the region? */
1080 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1081 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1082 ++nr_rgn_out_edges;
1084 while (fst_out_edge != nxt_out_edge)
1086 ++nr_out_edges;
1087 /* The successor doesn't belong in the region? */
1088 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1089 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1090 ++nr_rgn_out_edges;
1091 SET_BIT (pot_split[bb], EDGE_TO_BIT (nxt_out_edge));
1092 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1096 /* Now nr_rgn_out_edges is the number of region-exit edges from
1097 pred, and nr_out_edges will be the number of pred out edges
1098 not leaving the region. */
1099 nr_out_edges -= nr_rgn_out_edges;
1100 if (nr_rgn_out_edges > 0)
1101 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1102 else
1103 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1104 nxt_in_edge = NEXT_IN (nxt_in_edge);
1106 while (fst_in_edge != nxt_in_edge);
1108 SET_BIT (dom[bb], bb);
1109 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1111 if (sched_verbose >= 2)
1112 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1113 (int) (100.0 * prob[bb]));
1116 /* Functions for target info. */
1118 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1119 Note that bb_trg dominates bb_src. */
1121 static void
1122 split_edges (int bb_src, int bb_trg, edgelst *bl)
1124 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1125 sbitmap_copy (src, pot_split[bb_src]);
1127 sbitmap_difference (src, src, pot_split[bb_trg]);
1128 extract_bitlst (src, bl);
1129 sbitmap_free (src);
1132 /* Find the valid candidate-source-blocks for the target block TRG, compute
1133 their probability, and check if they are speculative or not.
1134 For speculative sources, compute their update-blocks and split-blocks. */
1136 static void
1137 compute_trg_info (int trg)
1139 candidate *sp;
1140 edgelst el;
1141 int check_block, update_idx;
1142 int i, j, k, fst_edge, nxt_edge;
1144 /* Define some of the fields for the target bb as well. */
1145 sp = candidate_table + trg;
1146 sp->is_valid = 1;
1147 sp->is_speculative = 0;
1148 sp->src_prob = 100;
1150 for (i = trg + 1; i < current_nr_blocks; i++)
1152 sp = candidate_table + i;
1154 sp->is_valid = IS_DOMINATED (i, trg);
1155 if (sp->is_valid)
1157 sp->src_prob = GET_SRC_PROB (i, trg);
1158 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1161 if (sp->is_valid)
1163 split_edges (i, trg, &el);
1164 sp->is_speculative = (el.nr_members) ? 1 : 0;
1165 if (sp->is_speculative && !flag_schedule_speculative)
1166 sp->is_valid = 0;
1169 if (sp->is_valid)
1171 char *update_blocks;
1173 /* Compute split blocks and store them in bblst_table.
1174 The TO block of every split edge is a split block. */
1175 sp->split_bbs.first_member = &bblst_table[bblst_last];
1176 sp->split_bbs.nr_members = el.nr_members;
1177 for (j = 0; j < el.nr_members; bblst_last++, j++)
1178 bblst_table[bblst_last] =
1179 TO_BLOCK (rgn_edges[el.first_member[j]]);
1180 sp->update_bbs.first_member = &bblst_table[bblst_last];
1182 /* Compute update blocks and store them in bblst_table.
1183 For every split edge, look at the FROM block, and check
1184 all out edges. For each out edge that is not a split edge,
1185 add the TO block to the update block list. This list can end
1186 up with a lot of duplicates. We need to weed them out to avoid
1187 overrunning the end of the bblst_table. */
1188 update_blocks = alloca (last_basic_block);
1189 memset (update_blocks, 0, last_basic_block);
1191 update_idx = 0;
1192 for (j = 0; j < el.nr_members; j++)
1194 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1195 fst_edge = nxt_edge = OUT_EDGES (check_block);
1198 if (! update_blocks[TO_BLOCK (nxt_edge)])
1200 for (k = 0; k < el.nr_members; k++)
1201 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1202 break;
1204 if (k >= el.nr_members)
1206 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1207 update_blocks[TO_BLOCK (nxt_edge)] = 1;
1208 update_idx++;
1212 nxt_edge = NEXT_OUT (nxt_edge);
1214 while (fst_edge != nxt_edge);
1216 sp->update_bbs.nr_members = update_idx;
1218 /* Make sure we didn't overrun the end of bblst_table. */
1219 if (bblst_last > bblst_size)
1220 abort ();
1222 else
1224 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1226 sp->is_speculative = 0;
1227 sp->src_prob = 0;
1232 /* Print candidates info, for debugging purposes. Callable from debugger. */
1234 void
1235 debug_candidate (int i)
1237 if (!candidate_table[i].is_valid)
1238 return;
1240 if (candidate_table[i].is_speculative)
1242 int j;
1243 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1245 fprintf (sched_dump, "split path: ");
1246 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1248 int b = candidate_table[i].split_bbs.first_member[j];
1250 fprintf (sched_dump, " %d ", b);
1252 fprintf (sched_dump, "\n");
1254 fprintf (sched_dump, "update path: ");
1255 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1257 int b = candidate_table[i].update_bbs.first_member[j];
1259 fprintf (sched_dump, " %d ", b);
1261 fprintf (sched_dump, "\n");
1263 else
1265 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1269 /* Print candidates info, for debugging purposes. Callable from debugger. */
1271 void
1272 debug_candidates (int trg)
1274 int i;
1276 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1277 BB_TO_BLOCK (trg), trg);
1278 for (i = trg + 1; i < current_nr_blocks; i++)
1279 debug_candidate (i);
1282 /* Functions for speculative scheduling. */
1284 /* Return 0 if x is a set of a register alive in the beginning of one
1285 of the split-blocks of src, otherwise return 1. */
1287 static int
1288 check_live_1 (int src, rtx x)
1290 int i;
1291 int regno;
1292 rtx reg = SET_DEST (x);
1294 if (reg == 0)
1295 return 1;
1297 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1298 || GET_CODE (reg) == SIGN_EXTRACT
1299 || GET_CODE (reg) == STRICT_LOW_PART)
1300 reg = XEXP (reg, 0);
1302 if (GET_CODE (reg) == PARALLEL)
1304 int i;
1306 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1307 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1308 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1309 return 1;
1311 return 0;
1314 if (!REG_P (reg))
1315 return 1;
1317 regno = REGNO (reg);
1319 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1321 /* Global registers are assumed live. */
1322 return 0;
1324 else
1326 if (regno < FIRST_PSEUDO_REGISTER)
1328 /* Check for hard registers. */
1329 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1330 while (--j >= 0)
1332 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1334 int b = candidate_table[src].split_bbs.first_member[i];
1336 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
1337 regno + j))
1339 return 0;
1344 else
1346 /* Check for pseudo registers. */
1347 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1349 int b = candidate_table[src].split_bbs.first_member[i];
1351 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
1353 return 0;
1359 return 1;
1362 /* If x is a set of a register R, mark that R is alive in the beginning
1363 of every update-block of src. */
1365 static void
1366 update_live_1 (int src, rtx x)
1368 int i;
1369 int regno;
1370 rtx reg = SET_DEST (x);
1372 if (reg == 0)
1373 return;
1375 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
1376 || GET_CODE (reg) == SIGN_EXTRACT
1377 || GET_CODE (reg) == STRICT_LOW_PART)
1378 reg = XEXP (reg, 0);
1380 if (GET_CODE (reg) == PARALLEL)
1382 int i;
1384 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1385 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1386 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1388 return;
1391 if (!REG_P (reg))
1392 return;
1394 /* Global registers are always live, so the code below does not apply
1395 to them. */
1397 regno = REGNO (reg);
1399 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1401 if (regno < FIRST_PSEUDO_REGISTER)
1403 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1404 while (--j >= 0)
1406 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1408 int b = candidate_table[src].update_bbs.first_member[i];
1410 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
1411 regno + j);
1415 else
1417 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1419 int b = candidate_table[src].update_bbs.first_member[i];
1421 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
1427 /* Return 1 if insn can be speculatively moved from block src to trg,
1428 otherwise return 0. Called before first insertion of insn to
1429 ready-list or before the scheduling. */
1431 static int
1432 check_live (rtx insn, int src)
1434 /* Find the registers set by instruction. */
1435 if (GET_CODE (PATTERN (insn)) == SET
1436 || GET_CODE (PATTERN (insn)) == CLOBBER)
1437 return check_live_1 (src, PATTERN (insn));
1438 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1440 int j;
1441 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1442 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1443 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1444 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1445 return 0;
1447 return 1;
1450 return 1;
1453 /* Update the live registers info after insn was moved speculatively from
1454 block src to trg. */
1456 static void
1457 update_live (rtx insn, int src)
1459 /* Find the registers set by instruction. */
1460 if (GET_CODE (PATTERN (insn)) == SET
1461 || GET_CODE (PATTERN (insn)) == CLOBBER)
1462 update_live_1 (src, PATTERN (insn));
1463 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1465 int j;
1466 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1467 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1468 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1469 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1473 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1474 #define IS_REACHABLE(bb_from, bb_to) \
1475 (bb_from == bb_to \
1476 || IS_RGN_ENTRY (bb_from) \
1477 || (TEST_BIT (ancestor_edges[bb_to], \
1478 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))))))
1480 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1482 static void
1483 set_spec_fed (rtx load_insn)
1485 rtx link;
1487 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1488 if (GET_MODE (link) == VOIDmode)
1489 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1490 } /* set_spec_fed */
1492 /* On the path from the insn to load_insn_bb, find a conditional
1493 branch depending on insn, that guards the speculative load. */
1495 static int
1496 find_conditional_protection (rtx insn, int load_insn_bb)
1498 rtx link;
1500 /* Iterate through DEF-USE forward dependences. */
1501 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1503 rtx next = XEXP (link, 0);
1504 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1505 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1506 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1507 && load_insn_bb != INSN_BB (next)
1508 && GET_MODE (link) == VOIDmode
1509 && (JUMP_P (next)
1510 || find_conditional_protection (next, load_insn_bb)))
1511 return 1;
1513 return 0;
1514 } /* find_conditional_protection */
1516 /* Returns 1 if the same insn1 that participates in the computation
1517 of load_insn's address is feeding a conditional branch that is
1518 guarding on load_insn. This is true if we find a the two DEF-USE
1519 chains:
1520 insn1 -> ... -> conditional-branch
1521 insn1 -> ... -> load_insn,
1522 and if a flow path exist:
1523 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1524 and if insn1 is on the path
1525 region-entry -> ... -> bb_trg -> ... load_insn.
1527 Locate insn1 by climbing on LOG_LINKS from load_insn.
1528 Locate the branch by following INSN_DEPEND from insn1. */
1530 static int
1531 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1533 rtx link;
1535 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1537 rtx insn1 = XEXP (link, 0);
1539 /* Must be a DEF-USE dependence upon non-branch. */
1540 if (GET_MODE (link) != VOIDmode
1541 || JUMP_P (insn1))
1542 continue;
1544 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1545 if (INSN_BB (insn1) == bb_src
1546 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1547 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1548 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1549 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1550 continue;
1552 /* Now search for the conditional-branch. */
1553 if (find_conditional_protection (insn1, bb_src))
1554 return 1;
1556 /* Recursive step: search another insn1, "above" current insn1. */
1557 return is_conditionally_protected (insn1, bb_src, bb_trg);
1560 /* The chain does not exist. */
1561 return 0;
1562 } /* is_conditionally_protected */
1564 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1565 load_insn can move speculatively from bb_src to bb_trg. All the
1566 following must hold:
1568 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1569 (2) load_insn and load1 have a def-use dependence upon
1570 the same insn 'insn1'.
1571 (3) either load2 is in bb_trg, or:
1572 - there's only one split-block, and
1573 - load1 is on the escape path, and
1575 From all these we can conclude that the two loads access memory
1576 addresses that differ at most by a constant, and hence if moving
1577 load_insn would cause an exception, it would have been caused by
1578 load2 anyhow. */
1580 static int
1581 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1583 rtx back_link;
1584 candidate *candp = candidate_table + bb_src;
1586 if (candp->split_bbs.nr_members != 1)
1587 /* Must have exactly one escape block. */
1588 return 0;
1590 for (back_link = LOG_LINKS (load_insn);
1591 back_link; back_link = XEXP (back_link, 1))
1593 rtx insn1 = XEXP (back_link, 0);
1595 if (GET_MODE (back_link) == VOIDmode)
1597 /* Found a DEF-USE dependence (insn1, load_insn). */
1598 rtx fore_link;
1600 for (fore_link = INSN_DEPEND (insn1);
1601 fore_link; fore_link = XEXP (fore_link, 1))
1603 rtx insn2 = XEXP (fore_link, 0);
1604 if (GET_MODE (fore_link) == VOIDmode)
1606 /* Found a DEF-USE dependence (insn1, insn2). */
1607 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1608 /* insn2 not guaranteed to be a 1 base reg load. */
1609 continue;
1611 if (INSN_BB (insn2) == bb_trg)
1612 /* insn2 is the similar load, in the target block. */
1613 return 1;
1615 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
1616 /* insn2 is a similar load, in a split-block. */
1617 return 1;
1623 /* Couldn't find a similar load. */
1624 return 0;
1625 } /* is_pfree */
1627 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1628 a load moved speculatively, or if load_insn is protected by
1629 a compare on load_insn's address). */
1631 static int
1632 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1634 if (FED_BY_SPEC_LOAD (load_insn))
1635 return 1;
1637 if (LOG_LINKS (load_insn) == NULL)
1638 /* Dependence may 'hide' out of the region. */
1639 return 1;
1641 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1642 return 1;
1644 return 0;
1647 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1648 Return 1 if insn is exception-free (and the motion is valid)
1649 and 0 otherwise. */
1651 static int
1652 is_exception_free (rtx insn, int bb_src, int bb_trg)
1654 int insn_class = haifa_classify_insn (insn);
1656 /* Handle non-load insns. */
1657 switch (insn_class)
1659 case TRAP_FREE:
1660 return 1;
1661 case TRAP_RISKY:
1662 return 0;
1663 default:;
1666 /* Handle loads. */
1667 if (!flag_schedule_speculative_load)
1668 return 0;
1669 IS_LOAD_INSN (insn) = 1;
1670 switch (insn_class)
1672 case IFREE:
1673 return (1);
1674 case IRISKY:
1675 return 0;
1676 case PFREE_CANDIDATE:
1677 if (is_pfree (insn, bb_src, bb_trg))
1678 return 1;
1679 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1680 case PRISKY_CANDIDATE:
1681 if (!flag_schedule_speculative_load_dangerous
1682 || is_prisky (insn, bb_src, bb_trg))
1683 return 0;
1684 break;
1685 default:;
1688 return flag_schedule_speculative_load_dangerous;
1691 /* The number of insns from the current block scheduled so far. */
1692 static int sched_target_n_insns;
1693 /* The number of insns from the current block to be scheduled in total. */
1694 static int target_n_insns;
1695 /* The number of insns from the entire region scheduled so far. */
1696 static int sched_n_insns;
1697 /* Nonzero if the last scheduled insn was a jump. */
1698 static int last_was_jump;
1700 /* Implementations of the sched_info functions for region scheduling. */
1701 static void init_ready_list (struct ready_list *);
1702 static int can_schedule_ready_p (rtx);
1703 static int new_ready (rtx);
1704 static int schedule_more_p (void);
1705 static const char *rgn_print_insn (rtx, int);
1706 static int rgn_rank (rtx, rtx);
1707 static int contributes_to_priority (rtx, rtx);
1708 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1710 /* Return nonzero if there are more insns that should be scheduled. */
1712 static int
1713 schedule_more_p (void)
1715 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1718 /* Add all insns that are initially ready to the ready list READY. Called
1719 once before scheduling a set of insns. */
1721 static void
1722 init_ready_list (struct ready_list *ready)
1724 rtx prev_head = current_sched_info->prev_head;
1725 rtx next_tail = current_sched_info->next_tail;
1726 int bb_src;
1727 rtx insn;
1729 target_n_insns = 0;
1730 sched_target_n_insns = 0;
1731 sched_n_insns = 0;
1732 last_was_jump = 0;
1734 /* Print debugging information. */
1735 if (sched_verbose >= 5)
1736 debug_dependencies ();
1738 /* Prepare current target block info. */
1739 if (current_nr_blocks > 1)
1741 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1743 bblst_last = 0;
1744 /* bblst_table holds split blocks and update blocks for each block after
1745 the current one in the region. split blocks and update blocks are
1746 the TO blocks of region edges, so there can be at most rgn_nr_edges
1747 of them. */
1748 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1749 bblst_table = xmalloc (bblst_size * sizeof (int));
1751 bitlst_table_last = 0;
1752 bitlst_table = xmalloc (rgn_nr_edges * sizeof (int));
1754 compute_trg_info (target_bb);
1757 /* Initialize ready list with all 'ready' insns in target block.
1758 Count number of insns in the target block being scheduled. */
1759 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1761 if (INSN_DEP_COUNT (insn) == 0)
1763 ready_add (ready, insn);
1765 if (targetm.sched.adjust_priority)
1766 INSN_PRIORITY (insn) =
1767 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1769 target_n_insns++;
1772 /* Add to ready list all 'ready' insns in valid source blocks.
1773 For speculative insns, check-live, exception-free, and
1774 issue-delay. */
1775 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1776 if (IS_VALID (bb_src))
1778 rtx src_head;
1779 rtx src_next_tail;
1780 rtx tail, head;
1782 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1783 src_next_tail = NEXT_INSN (tail);
1784 src_head = head;
1786 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1788 if (! INSN_P (insn))
1789 continue;
1791 if (!CANT_MOVE (insn)
1792 && (!IS_SPECULATIVE_INSN (insn)
1793 || ((recog_memoized (insn) < 0
1794 || min_insn_conflict_delay (curr_state,
1795 insn, insn) <= 3)
1796 && check_live (insn, bb_src)
1797 && is_exception_free (insn, bb_src, target_bb))))
1798 if (INSN_DEP_COUNT (insn) == 0)
1800 ready_add (ready, insn);
1802 if (targetm.sched.adjust_priority)
1803 INSN_PRIORITY (insn) =
1804 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1810 /* Called after taking INSN from the ready list. Returns nonzero if this
1811 insn can be scheduled, nonzero if we should silently discard it. */
1813 static int
1814 can_schedule_ready_p (rtx insn)
1816 if (JUMP_P (insn))
1817 last_was_jump = 1;
1819 /* An interblock motion? */
1820 if (INSN_BB (insn) != target_bb)
1822 basic_block b1;
1824 if (IS_SPECULATIVE_INSN (insn))
1826 if (!check_live (insn, INSN_BB (insn)))
1827 return 0;
1828 update_live (insn, INSN_BB (insn));
1830 /* For speculative load, mark insns fed by it. */
1831 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1832 set_spec_fed (insn);
1834 nr_spec++;
1836 nr_inter++;
1838 /* Update source block boundaries. */
1839 b1 = BLOCK_FOR_INSN (insn);
1840 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1842 /* We moved all the insns in the basic block.
1843 Emit a note after the last insn and update the
1844 begin/end boundaries to point to the note. */
1845 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1846 BB_HEAD (b1) = note;
1847 BB_END (b1) = note;
1849 else if (insn == BB_END (b1))
1851 /* We took insns from the end of the basic block,
1852 so update the end of block boundary so that it
1853 points to the first insn we did not move. */
1854 BB_END (b1) = PREV_INSN (insn);
1856 else if (insn == BB_HEAD (b1))
1858 /* We took insns from the start of the basic block,
1859 so update the start of block boundary so that
1860 it points to the first insn we did not move. */
1861 BB_HEAD (b1) = NEXT_INSN (insn);
1864 else
1866 /* In block motion. */
1867 sched_target_n_insns++;
1869 sched_n_insns++;
1871 return 1;
1874 /* Called after INSN has all its dependencies resolved. Return nonzero
1875 if it should be moved to the ready list or the queue, or zero if we
1876 should silently discard it. */
1877 static int
1878 new_ready (rtx next)
1880 /* For speculative insns, before inserting to ready/queue,
1881 check live, exception-free, and issue-delay. */
1882 if (INSN_BB (next) != target_bb
1883 && (!IS_VALID (INSN_BB (next))
1884 || CANT_MOVE (next)
1885 || (IS_SPECULATIVE_INSN (next)
1886 && ((recog_memoized (next) >= 0
1887 && min_insn_conflict_delay (curr_state, next, next) > 3)
1888 || !check_live (next, INSN_BB (next))
1889 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1890 return 0;
1891 return 1;
1894 /* Return a string that contains the insn uid and optionally anything else
1895 necessary to identify this insn in an output. It's valid to use a
1896 static buffer for this. The ALIGNED parameter should cause the string
1897 to be formatted so that multiple output lines will line up nicely. */
1899 static const char *
1900 rgn_print_insn (rtx insn, int aligned)
1902 static char tmp[80];
1904 if (aligned)
1905 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1906 else
1908 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1909 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1910 else
1911 sprintf (tmp, "%d", INSN_UID (insn));
1913 return tmp;
1916 /* Compare priority of two insns. Return a positive number if the second
1917 insn is to be preferred for scheduling, and a negative one if the first
1918 is to be preferred. Zero if they are equally good. */
1920 static int
1921 rgn_rank (rtx insn1, rtx insn2)
1923 /* Some comparison make sense in interblock scheduling only. */
1924 if (INSN_BB (insn1) != INSN_BB (insn2))
1926 int spec_val, prob_val;
1928 /* Prefer an inblock motion on an interblock motion. */
1929 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1930 return 1;
1931 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1932 return -1;
1934 /* Prefer a useful motion on a speculative one. */
1935 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1936 if (spec_val)
1937 return spec_val;
1939 /* Prefer a more probable (speculative) insn. */
1940 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1941 if (prob_val)
1942 return prob_val;
1944 return 0;
1947 /* NEXT is an instruction that depends on INSN (a backward dependence);
1948 return nonzero if we should include this dependence in priority
1949 calculations. */
1951 static int
1952 contributes_to_priority (rtx next, rtx insn)
1954 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1957 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1958 conditionally set before INSN. Store the set of registers that
1959 must be considered as used by this jump in USED and that of
1960 registers that must be considered as set in SET. */
1962 static void
1963 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1964 regset cond_exec ATTRIBUTE_UNUSED,
1965 regset used ATTRIBUTE_UNUSED,
1966 regset set ATTRIBUTE_UNUSED)
1968 /* Nothing to do here, since we postprocess jumps in
1969 add_branch_dependences. */
1972 /* Used in schedule_insns to initialize current_sched_info for scheduling
1973 regions (or single basic blocks). */
1975 static struct sched_info region_sched_info =
1977 init_ready_list,
1978 can_schedule_ready_p,
1979 schedule_more_p,
1980 new_ready,
1981 rgn_rank,
1982 rgn_print_insn,
1983 contributes_to_priority,
1984 compute_jump_reg_dependencies,
1986 NULL, NULL,
1987 NULL, NULL,
1988 0, 0, 0
1991 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1993 static bool
1994 sets_likely_spilled (rtx pat)
1996 bool ret = false;
1997 note_stores (pat, sets_likely_spilled_1, &ret);
1998 return ret;
2001 static void
2002 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
2004 bool *ret = (bool *) data;
2006 if (GET_CODE (pat) == SET
2007 && REG_P (x)
2008 && REGNO (x) < FIRST_PSEUDO_REGISTER
2009 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2010 *ret = true;
2013 /* Add dependences so that branches are scheduled to run last in their
2014 block. */
2016 static void
2017 add_branch_dependences (rtx head, rtx tail)
2019 rtx insn, last;
2021 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2022 that can throw exceptions, force them to remain in order at the end of
2023 the block by adding dependencies and giving the last a high priority.
2024 There may be notes present, and prev_head may also be a note.
2026 Branches must obviously remain at the end. Calls should remain at the
2027 end since moving them results in worse register allocation. Uses remain
2028 at the end to ensure proper register allocation.
2030 cc0 setters remain at the end because they can't be moved away from
2031 their cc0 user.
2033 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2034 are not moved before reload because we can wind up with register
2035 allocation failures. */
2037 insn = tail;
2038 last = 0;
2039 while (CALL_P (insn)
2040 || JUMP_P (insn)
2041 || (NONJUMP_INSN_P (insn)
2042 && (GET_CODE (PATTERN (insn)) == USE
2043 || GET_CODE (PATTERN (insn)) == CLOBBER
2044 || can_throw_internal (insn)
2045 #ifdef HAVE_cc0
2046 || sets_cc0_p (PATTERN (insn))
2047 #endif
2048 || (!reload_completed
2049 && sets_likely_spilled (PATTERN (insn)))))
2050 || NOTE_P (insn))
2052 if (!NOTE_P (insn))
2054 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
2056 add_dependence (last, insn, REG_DEP_ANTI);
2057 INSN_REF_COUNT (insn)++;
2060 CANT_MOVE (insn) = 1;
2062 last = insn;
2065 /* Don't overrun the bounds of the basic block. */
2066 if (insn == head)
2067 break;
2069 insn = PREV_INSN (insn);
2072 /* Make sure these insns are scheduled last in their block. */
2073 insn = last;
2074 if (insn != 0)
2075 while (insn != head)
2077 insn = prev_nonnote_insn (insn);
2079 if (INSN_REF_COUNT (insn) != 0)
2080 continue;
2082 add_dependence (last, insn, REG_DEP_ANTI);
2083 INSN_REF_COUNT (insn) = 1;
2087 /* Data structures for the computation of data dependences in a regions. We
2088 keep one `deps' structure for every basic block. Before analyzing the
2089 data dependences for a bb, its variables are initialized as a function of
2090 the variables of its predecessors. When the analysis for a bb completes,
2091 we save the contents to the corresponding bb_deps[bb] variable. */
2093 static struct deps *bb_deps;
2095 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2097 static rtx
2098 concat_INSN_LIST (rtx copy, rtx old)
2100 rtx new = old;
2101 for (; copy ; copy = XEXP (copy, 1))
2102 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2103 return new;
2106 static void
2107 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2108 rtx *old_mems_p)
2110 rtx new_insns = *old_insns_p;
2111 rtx new_mems = *old_mems_p;
2113 while (copy_insns)
2115 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2116 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2117 copy_insns = XEXP (copy_insns, 1);
2118 copy_mems = XEXP (copy_mems, 1);
2121 *old_insns_p = new_insns;
2122 *old_mems_p = new_mems;
2125 /* After computing the dependencies for block BB, propagate the dependencies
2126 found in TMP_DEPS to the successors of the block. */
2127 static void
2128 propagate_deps (int bb, struct deps *pred_deps)
2130 int b = BB_TO_BLOCK (bb);
2131 int e, first_edge;
2133 /* bb's structures are inherited by its successors. */
2134 first_edge = e = OUT_EDGES (b);
2135 if (e > 0)
2138 int b_succ = TO_BLOCK (e);
2139 int bb_succ = BLOCK_TO_BB (b_succ);
2140 struct deps *succ_deps = bb_deps + bb_succ;
2141 int reg;
2143 /* Only bbs "below" bb, in the same region, are interesting. */
2144 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
2145 || bb_succ <= bb)
2147 e = NEXT_OUT (e);
2148 continue;
2151 /* The reg_last lists are inherited by bb_succ. */
2152 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg,
2154 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2155 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2157 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2158 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2159 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2160 succ_rl->clobbers);
2161 succ_rl->uses_length += pred_rl->uses_length;
2162 succ_rl->clobbers_length += pred_rl->clobbers_length;
2164 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2166 /* Mem read/write lists are inherited by bb_succ. */
2167 concat_insn_mem_list (pred_deps->pending_read_insns,
2168 pred_deps->pending_read_mems,
2169 &succ_deps->pending_read_insns,
2170 &succ_deps->pending_read_mems);
2171 concat_insn_mem_list (pred_deps->pending_write_insns,
2172 pred_deps->pending_write_mems,
2173 &succ_deps->pending_write_insns,
2174 &succ_deps->pending_write_mems);
2176 succ_deps->last_pending_memory_flush
2177 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2178 succ_deps->last_pending_memory_flush);
2180 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2181 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2183 /* last_function_call is inherited by bb_succ. */
2184 succ_deps->last_function_call
2185 = concat_INSN_LIST (pred_deps->last_function_call,
2186 succ_deps->last_function_call);
2188 /* sched_before_next_call is inherited by bb_succ. */
2189 succ_deps->sched_before_next_call
2190 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2191 succ_deps->sched_before_next_call);
2193 e = NEXT_OUT (e);
2195 while (e != first_edge);
2197 /* These lists should point to the right place, for correct
2198 freeing later. */
2199 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2200 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2201 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2202 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2204 /* Can't allow these to be freed twice. */
2205 pred_deps->pending_read_insns = 0;
2206 pred_deps->pending_read_mems = 0;
2207 pred_deps->pending_write_insns = 0;
2208 pred_deps->pending_write_mems = 0;
2211 /* Compute backward dependences inside bb. In a multiple blocks region:
2212 (1) a bb is analyzed after its predecessors, and (2) the lists in
2213 effect at the end of bb (after analyzing for bb) are inherited by
2214 bb's successors.
2216 Specifically for reg-reg data dependences, the block insns are
2217 scanned by sched_analyze () top-to-bottom. Two lists are
2218 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2219 and reg_last[].uses for register USEs.
2221 When analysis is completed for bb, we update for its successors:
2222 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2223 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2225 The mechanism for computing mem-mem data dependence is very
2226 similar, and the result is interblock dependences in the region. */
2228 static void
2229 compute_block_backward_dependences (int bb)
2231 rtx head, tail;
2232 struct deps tmp_deps;
2234 tmp_deps = bb_deps[bb];
2236 /* Do the analysis for this block. */
2237 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2238 sched_analyze (&tmp_deps, head, tail);
2239 add_branch_dependences (head, tail);
2241 if (current_nr_blocks > 1)
2242 propagate_deps (bb, &tmp_deps);
2244 /* Free up the INSN_LISTs. */
2245 free_deps (&tmp_deps);
2248 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2249 them to the unused_*_list variables, so that they can be reused. */
2251 static void
2252 free_pending_lists (void)
2254 int bb;
2256 for (bb = 0; bb < current_nr_blocks; bb++)
2258 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2259 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2260 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2261 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2265 /* Print dependences for debugging, callable from debugger. */
2267 void
2268 debug_dependencies (void)
2270 int bb;
2272 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2273 for (bb = 0; bb < current_nr_blocks; bb++)
2275 rtx head, tail;
2276 rtx next_tail;
2277 rtx insn;
2279 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2280 next_tail = NEXT_INSN (tail);
2281 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2282 BB_TO_BLOCK (bb), bb);
2284 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2285 "insn", "code", "bb", "dep", "prio", "cost",
2286 "reservation");
2287 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2288 "----", "----", "--", "---", "----", "----",
2289 "-----------");
2291 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2293 rtx link;
2295 if (! INSN_P (insn))
2297 int n;
2298 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2299 if (NOTE_P (insn))
2301 n = NOTE_LINE_NUMBER (insn);
2302 if (n < 0)
2303 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2304 else
2306 expanded_location xloc;
2307 NOTE_EXPANDED_LOCATION (xloc, insn);
2308 fprintf (sched_dump, "line %d, file %s\n",
2309 xloc.line, xloc.file);
2312 else
2313 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2314 continue;
2317 fprintf (sched_dump,
2318 ";; %s%5d%6d%6d%6d%6d%6d ",
2319 (SCHED_GROUP_P (insn) ? "+" : " "),
2320 INSN_UID (insn),
2321 INSN_CODE (insn),
2322 INSN_BB (insn),
2323 INSN_DEP_COUNT (insn),
2324 INSN_PRIORITY (insn),
2325 insn_cost (insn, 0, 0));
2327 if (recog_memoized (insn) < 0)
2328 fprintf (sched_dump, "nothing");
2329 else
2330 print_reservation (sched_dump, insn);
2332 fprintf (sched_dump, "\t: ");
2333 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2334 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2335 fprintf (sched_dump, "\n");
2338 fprintf (sched_dump, "\n");
2341 /* Schedule a region. A region is either an inner loop, a loop-free
2342 subroutine, or a single basic block. Each bb in the region is
2343 scheduled after its flow predecessors. */
2345 static void
2346 schedule_region (int rgn)
2348 int bb;
2349 int rgn_n_insns = 0;
2350 int sched_rgn_n_insns = 0;
2352 /* Set variables for the current region. */
2353 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2354 current_blocks = RGN_BLOCKS (rgn);
2356 init_deps_global ();
2358 /* Initializations for region data dependence analysis. */
2359 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2360 for (bb = 0; bb < current_nr_blocks; bb++)
2361 init_deps (bb_deps + bb);
2363 /* Compute LOG_LINKS. */
2364 for (bb = 0; bb < current_nr_blocks; bb++)
2365 compute_block_backward_dependences (bb);
2367 /* Compute INSN_DEPEND. */
2368 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2370 rtx head, tail;
2371 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2373 compute_forward_dependences (head, tail);
2375 if (targetm.sched.dependencies_evaluation_hook)
2376 targetm.sched.dependencies_evaluation_hook (head, tail);
2380 /* Set priorities. */
2381 for (bb = 0; bb < current_nr_blocks; bb++)
2383 rtx head, tail;
2384 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2386 rgn_n_insns += set_priorities (head, tail);
2389 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2390 if (current_nr_blocks > 1)
2392 int i;
2394 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2396 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2397 sbitmap_vector_zero (dom, current_nr_blocks);
2398 /* Edge to bit. */
2399 rgn_nr_edges = 0;
2400 edge_to_bit = xmalloc (nr_edges * sizeof (int));
2401 for (i = 1; i < nr_edges; i++)
2402 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
2403 EDGE_TO_BIT (i) = rgn_nr_edges++;
2404 rgn_edges = xmalloc (rgn_nr_edges * sizeof (int));
2406 rgn_nr_edges = 0;
2407 for (i = 1; i < nr_edges; i++)
2408 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
2409 rgn_edges[rgn_nr_edges++] = i;
2411 /* Split edges. */
2412 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2413 sbitmap_vector_zero (pot_split, current_nr_blocks);
2414 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2415 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2417 /* Compute probabilities, dominators, split_edges. */
2418 for (bb = 0; bb < current_nr_blocks; bb++)
2419 compute_dom_prob_ps (bb);
2422 /* Now we can schedule all blocks. */
2423 for (bb = 0; bb < current_nr_blocks; bb++)
2425 rtx head, tail;
2426 int b = BB_TO_BLOCK (bb);
2428 get_block_head_tail (b, &head, &tail);
2430 if (no_real_insns_p (head, tail))
2431 continue;
2433 current_sched_info->prev_head = PREV_INSN (head);
2434 current_sched_info->next_tail = NEXT_INSN (tail);
2436 if (write_symbols != NO_DEBUG)
2438 save_line_notes (b, head, tail);
2439 rm_line_notes (head, tail);
2442 /* rm_other_notes only removes notes which are _inside_ the
2443 block---that is, it won't remove notes before the first real insn
2444 or after the last real insn of the block. So if the first insn
2445 has a REG_SAVE_NOTE which would otherwise be emitted before the
2446 insn, it is redundant with the note before the start of the
2447 block, and so we have to take it out. */
2448 if (INSN_P (head))
2450 rtx note;
2452 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2453 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2455 remove_note (head, note);
2456 note = XEXP (note, 1);
2457 remove_note (head, note);
2461 /* Remove remaining note insns from the block, save them in
2462 note_list. These notes are restored at the end of
2463 schedule_block (). */
2464 rm_other_notes (head, tail);
2466 target_bb = bb;
2468 current_sched_info->queue_must_finish_empty
2469 = current_nr_blocks > 1 && !flag_schedule_interblock;
2471 schedule_block (b, rgn_n_insns);
2472 sched_rgn_n_insns += sched_n_insns;
2474 /* Update target block boundaries. */
2475 if (head == BB_HEAD (BASIC_BLOCK (b)))
2476 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2477 if (tail == BB_END (BASIC_BLOCK (b)))
2478 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2480 /* Clean up. */
2481 if (current_nr_blocks > 1)
2483 free (candidate_table);
2484 free (bblst_table);
2485 free (bitlst_table);
2489 /* Sanity check: verify that all region insns were scheduled. */
2490 if (sched_rgn_n_insns != rgn_n_insns)
2491 abort ();
2493 /* Restore line notes. */
2494 if (write_symbols != NO_DEBUG)
2496 for (bb = 0; bb < current_nr_blocks; bb++)
2498 rtx head, tail;
2499 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2500 restore_line_notes (head, tail);
2504 /* Done with this region. */
2505 free_pending_lists ();
2507 finish_deps_global ();
2509 free (bb_deps);
2511 if (current_nr_blocks > 1)
2513 free (prob);
2514 sbitmap_vector_free (dom);
2515 sbitmap_vector_free (pot_split);
2516 sbitmap_vector_free (ancestor_edges);
2517 free (edge_to_bit);
2518 free (rgn_edges);
2522 /* Indexed by region, holds the number of death notes found in that region.
2523 Used for consistency checks. */
2524 static int *deaths_in_region;
2526 /* Initialize data structures for region scheduling. */
2528 static void
2529 init_regions (void)
2531 sbitmap blocks;
2532 int rgn;
2534 nr_regions = 0;
2535 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2536 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2537 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2538 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2540 /* Compute regions for scheduling. */
2541 if (reload_completed
2542 || n_basic_blocks == 1
2543 || !flag_schedule_interblock)
2545 find_single_block_region ();
2547 else
2549 /* Verify that a 'good' control flow graph can be built. */
2550 if (is_cfg_nonregular ())
2552 find_single_block_region ();
2554 else
2556 struct edge_list *edge_list;
2558 /* The scheduler runs after estimate_probabilities; therefore, we
2559 can't blindly call back into find_basic_blocks since doing so
2560 could invalidate the branch probability info. We could,
2561 however, call cleanup_cfg. */
2562 edge_list = create_edge_list ();
2564 /* Compute the dominators and post dominators. */
2565 calculate_dominance_info (CDI_DOMINATORS);
2567 /* build_control_flow will return nonzero if it detects unreachable
2568 blocks or any other irregularity with the cfg which prevents
2569 cross block scheduling. */
2570 if (build_control_flow (edge_list) != 0)
2571 find_single_block_region ();
2572 else
2573 find_rgns (edge_list);
2575 if (sched_verbose >= 3)
2576 debug_regions ();
2578 /* We are done with flow's edge list. */
2579 free_edge_list (edge_list);
2581 /* For now. This will move as more and more of haifa is converted
2582 to using the cfg code in flow.c. */
2583 free_dominance_info (CDI_DOMINATORS);
2588 if (CHECK_DEAD_NOTES)
2590 blocks = sbitmap_alloc (last_basic_block);
2591 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2592 /* Remove all death notes from the subroutine. */
2593 for (rgn = 0; rgn < nr_regions; rgn++)
2595 int b;
2597 sbitmap_zero (blocks);
2598 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2599 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2601 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2603 sbitmap_free (blocks);
2605 else
2606 count_or_remove_death_notes (NULL, 1);
2609 /* The one entry point in this file. DUMP_FILE is the dump file for
2610 this pass. */
2612 void
2613 schedule_insns (FILE *dump_file)
2615 sbitmap large_region_blocks, blocks;
2616 int rgn;
2617 int any_large_regions;
2618 basic_block bb;
2620 /* Taking care of this degenerate case makes the rest of
2621 this code simpler. */
2622 if (n_basic_blocks == 0)
2623 return;
2625 nr_inter = 0;
2626 nr_spec = 0;
2628 sched_init (dump_file);
2630 init_regions ();
2632 current_sched_info = &region_sched_info;
2634 /* Schedule every region in the subroutine. */
2635 for (rgn = 0; rgn < nr_regions; rgn++)
2636 schedule_region (rgn);
2638 /* Update life analysis for the subroutine. Do single block regions
2639 first so that we can verify that live_at_start didn't change. Then
2640 do all other blocks. */
2641 /* ??? There is an outside possibility that update_life_info, or more
2642 to the point propagate_block, could get called with nonzero flags
2643 more than once for one basic block. This would be kinda bad if it
2644 were to happen, since REG_INFO would be accumulated twice for the
2645 block, and we'd have twice the REG_DEAD notes.
2647 I'm fairly certain that this _shouldn't_ happen, since I don't think
2648 that live_at_start should change at region heads. Not sure what the
2649 best way to test for this kind of thing... */
2651 allocate_reg_life_data ();
2652 compute_bb_for_insn ();
2654 any_large_regions = 0;
2655 large_region_blocks = sbitmap_alloc (last_basic_block);
2656 sbitmap_zero (large_region_blocks);
2657 FOR_EACH_BB (bb)
2658 SET_BIT (large_region_blocks, bb->index);
2660 blocks = sbitmap_alloc (last_basic_block);
2661 sbitmap_zero (blocks);
2663 /* Update life information. For regions consisting of multiple blocks
2664 we've possibly done interblock scheduling that affects global liveness.
2665 For regions consisting of single blocks we need to do only local
2666 liveness. */
2667 for (rgn = 0; rgn < nr_regions; rgn++)
2668 if (RGN_NR_BLOCKS (rgn) > 1)
2669 any_large_regions = 1;
2670 else
2672 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2673 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2676 /* Don't update reg info after reload, since that affects
2677 regs_ever_live, which should not change after reload. */
2678 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2679 (reload_completed ? PROP_DEATH_NOTES
2680 : PROP_DEATH_NOTES | PROP_REG_INFO));
2681 if (any_large_regions)
2683 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2684 PROP_DEATH_NOTES | PROP_REG_INFO);
2687 if (CHECK_DEAD_NOTES)
2689 /* Verify the counts of basic block notes in single the basic block
2690 regions. */
2691 for (rgn = 0; rgn < nr_regions; rgn++)
2692 if (RGN_NR_BLOCKS (rgn) == 1)
2694 sbitmap_zero (blocks);
2695 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2697 if (deaths_in_region[rgn]
2698 != count_or_remove_death_notes (blocks, 0))
2699 abort ();
2701 free (deaths_in_region);
2704 /* Reposition the prologue and epilogue notes in case we moved the
2705 prologue/epilogue insns. */
2706 if (reload_completed)
2707 reposition_prologue_and_epilogue_notes (get_insns ());
2709 /* Delete redundant line notes. */
2710 if (write_symbols != NO_DEBUG)
2711 rm_redundant_line_notes ();
2713 if (sched_verbose)
2715 if (reload_completed == 0 && flag_schedule_interblock)
2717 fprintf (sched_dump,
2718 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2719 nr_inter, nr_spec);
2721 else
2723 if (nr_inter > 0)
2724 abort ();
2726 fprintf (sched_dump, "\n\n");
2729 /* Clean up. */
2730 free (rgn_table);
2731 free (rgn_bb_table);
2732 free (block_to_bb);
2733 free (containing_rgn);
2735 sched_finish ();
2737 if (edge_table)
2739 free (edge_table);
2740 edge_table = NULL;
2743 if (in_edges)
2745 free (in_edges);
2746 in_edges = NULL;
2748 if (out_edges)
2750 free (out_edges);
2751 out_edges = NULL;
2754 sbitmap_free (blocks);
2755 sbitmap_free (large_region_blocks);
2757 #endif