PR c++/25369
[official-gcc.git] / gcc / sched-rgn.c
blobf66966d6adf2b2febaaa5940a6619ad4e5e200f1
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68 #include "timevar.h"
69 #include "tree-pass.h"
71 /* Define when we want to do count REG_DEAD notes before and after scheduling
72 for sanity checking. We can't do that when conditional execution is used,
73 as REG_DEAD exist only for unconditional deaths. */
75 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
76 #define CHECK_DEAD_NOTES 1
77 #else
78 #define CHECK_DEAD_NOTES 0
79 #endif
82 #ifdef INSN_SCHEDULING
83 /* Some accessor macros for h_i_d members only used within this file. */
84 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
85 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
86 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
88 /* nr_inter/spec counts interblock/speculative motion for the function. */
89 static int nr_inter, nr_spec;
91 static int is_cfg_nonregular (void);
92 static bool sched_is_disabled_for_current_region_p (void);
94 /* A region is the main entity for interblock scheduling: insns
95 are allowed to move between blocks in the same region, along
96 control flow graph edges, in the 'up' direction. */
97 typedef struct
99 int rgn_nr_blocks; /* Number of blocks in region. */
100 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
102 region;
104 /* Number of regions in the procedure. */
105 static int nr_regions;
107 /* Table of region descriptions. */
108 static region *rgn_table;
110 /* Array of lists of regions' blocks. */
111 static int *rgn_bb_table;
113 /* Topological order of blocks in the region (if b2 is reachable from
114 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
115 always referred to by either block or b, while its topological
116 order name (in the region) is referred to by bb. */
117 static int *block_to_bb;
119 /* The number of the region containing a block. */
120 static int *containing_rgn;
122 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
123 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
124 #define BLOCK_TO_BB(block) (block_to_bb[block])
125 #define CONTAINING_RGN(block) (containing_rgn[block])
127 void debug_regions (void);
128 static void find_single_block_region (void);
129 static void find_rgns (void);
130 static bool too_large (int, int *, int *);
132 extern void debug_live (int, int);
134 /* Blocks of the current region being scheduled. */
135 static int current_nr_blocks;
136 static int current_blocks;
138 /* The mapping from bb to block. */
139 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
141 /* Target info declarations.
143 The block currently being scheduled is referred to as the "target" block,
144 while other blocks in the region from which insns can be moved to the
145 target are called "source" blocks. The candidate structure holds info
146 about such sources: are they valid? Speculative? Etc. */
147 typedef struct
149 basic_block *first_member;
150 int nr_members;
152 bblst;
154 typedef struct
156 char is_valid;
157 char is_speculative;
158 int src_prob;
159 bblst split_bbs;
160 bblst update_bbs;
162 candidate;
164 static candidate *candidate_table;
166 /* A speculative motion requires checking live information on the path
167 from 'source' to 'target'. The split blocks are those to be checked.
168 After a speculative motion, live information should be modified in
169 the 'update' blocks.
171 Lists of split and update blocks for each candidate of the current
172 target are in array bblst_table. */
173 static basic_block *bblst_table;
174 static int bblst_size, bblst_last;
176 #define IS_VALID(src) ( candidate_table[src].is_valid )
177 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
178 #define SRC_PROB(src) ( candidate_table[src].src_prob )
180 /* The bb being currently scheduled. */
181 static int target_bb;
183 /* List of edges. */
184 typedef struct
186 edge *first_member;
187 int nr_members;
189 edgelst;
191 static edge *edgelst_table;
192 static int edgelst_last;
194 static void extract_edgelst (sbitmap, edgelst *);
197 /* Target info functions. */
198 static void split_edges (int, int, edgelst *);
199 static void compute_trg_info (int);
200 void debug_candidate (int);
201 void debug_candidates (int);
203 /* Dominators array: dom[i] contains the sbitmap of dominators of
204 bb i in the region. */
205 static sbitmap *dom;
207 /* bb 0 is the only region entry. */
208 #define IS_RGN_ENTRY(bb) (!bb)
210 /* Is bb_src dominated by bb_trg. */
211 #define IS_DOMINATED(bb_src, bb_trg) \
212 ( TEST_BIT (dom[bb_src], bb_trg) )
214 /* Probability: Prob[i] is a float in [0, 1] which is the probability
215 of bb i relative to the region entry. */
216 static float *prob;
218 /* The probability of bb_src, relative to bb_trg. Note, that while the
219 'prob[bb]' is a float in [0, 1], this macro returns an integer
220 in [0, 100]. */
221 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
222 prob[bb_trg])))
224 /* Bit-set of edges, where bit i stands for edge i. */
225 typedef sbitmap edgeset;
227 /* Number of edges in the region. */
228 static int rgn_nr_edges;
230 /* Array of size rgn_nr_edges. */
231 static edge *rgn_edges;
233 /* Mapping from each edge in the graph to its number in the rgn. */
234 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
235 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
237 /* The split edges of a source bb is different for each target
238 bb. In order to compute this efficiently, the 'potential-split edges'
239 are computed for each bb prior to scheduling a region. This is actually
240 the split edges of each bb relative to the region entry.
242 pot_split[bb] is the set of potential split edges of bb. */
243 static edgeset *pot_split;
245 /* For every bb, a set of its ancestor edges. */
246 static edgeset *ancestor_edges;
248 static void compute_dom_prob_ps (int);
250 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
251 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
252 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
254 /* Speculative scheduling functions. */
255 static int check_live_1 (int, rtx);
256 static void update_live_1 (int, rtx);
257 static int check_live (rtx, int);
258 static void update_live (rtx, int);
259 static void set_spec_fed (rtx);
260 static int is_pfree (rtx, int, int);
261 static int find_conditional_protection (rtx, int);
262 static int is_conditionally_protected (rtx, int, int);
263 static int is_prisky (rtx, int, int);
264 static int is_exception_free (rtx, int, int);
266 static bool sets_likely_spilled (rtx);
267 static void sets_likely_spilled_1 (rtx, rtx, void *);
268 static void add_branch_dependences (rtx, rtx);
269 static void compute_block_backward_dependences (int);
270 void debug_dependencies (void);
272 static void init_regions (void);
273 static void schedule_region (int);
274 static rtx concat_INSN_LIST (rtx, rtx);
275 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
276 static void propagate_deps (int, struct deps *);
277 static void free_pending_lists (void);
279 /* Functions for construction of the control flow graph. */
281 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
283 We decide not to build the control flow graph if there is possibly more
284 than one entry to the function, if computed branches exist, if we
285 have nonlocal gotos, or if we have an unreachable loop. */
287 static int
288 is_cfg_nonregular (void)
290 basic_block b;
291 rtx insn;
293 /* If we have a label that could be the target of a nonlocal goto, then
294 the cfg is not well structured. */
295 if (nonlocal_goto_handler_labels)
296 return 1;
298 /* If we have any forced labels, then the cfg is not well structured. */
299 if (forced_labels)
300 return 1;
302 /* If we have exception handlers, then we consider the cfg not well
303 structured. ?!? We should be able to handle this now that flow.c
304 computes an accurate cfg for EH. */
305 if (current_function_has_exception_handlers ())
306 return 1;
308 /* If we have non-jumping insns which refer to labels, then we consider
309 the cfg not well structured. */
310 FOR_EACH_BB (b)
311 FOR_BB_INSNS (b, insn)
313 /* Check for labels referred by non-jump insns. */
314 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
316 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
317 if (note
318 && ! (JUMP_P (NEXT_INSN (insn))
319 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
320 XEXP (note, 0))))
321 return 1;
323 /* If this function has a computed jump, then we consider the cfg
324 not well structured. */
325 else if (JUMP_P (insn) && computed_jump_p (insn))
326 return 1;
329 /* Unreachable loops with more than one basic block are detected
330 during the DFS traversal in find_rgns.
332 Unreachable loops with a single block are detected here. This
333 test is redundant with the one in find_rgns, but it's much
334 cheaper to go ahead and catch the trivial case here. */
335 FOR_EACH_BB (b)
337 if (EDGE_COUNT (b->preds) == 0
338 || (single_pred_p (b)
339 && single_pred (b) == b))
340 return 1;
343 /* All the tests passed. Consider the cfg well structured. */
344 return 0;
347 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
349 static void
350 extract_edgelst (sbitmap set, edgelst *el)
352 unsigned int i = 0;
353 sbitmap_iterator sbi;
355 /* edgelst table space is reused in each call to extract_edgelst. */
356 edgelst_last = 0;
358 el->first_member = &edgelst_table[edgelst_last];
359 el->nr_members = 0;
361 /* Iterate over each word in the bitset. */
362 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
364 edgelst_table[edgelst_last++] = rgn_edges[i];
365 el->nr_members++;
369 /* Functions for the construction of regions. */
371 /* Print the regions, for debugging purposes. Callable from debugger. */
373 void
374 debug_regions (void)
376 int rgn, bb;
378 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
379 for (rgn = 0; rgn < nr_regions; rgn++)
381 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
382 rgn_table[rgn].rgn_nr_blocks);
383 fprintf (sched_dump, ";;\tbb/block: ");
385 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
387 current_blocks = RGN_BLOCKS (rgn);
389 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
390 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
393 fprintf (sched_dump, "\n\n");
397 /* Build a single block region for each basic block in the function.
398 This allows for using the same code for interblock and basic block
399 scheduling. */
401 static void
402 find_single_block_region (void)
404 basic_block bb;
406 nr_regions = 0;
408 FOR_EACH_BB (bb)
410 rgn_bb_table[nr_regions] = bb->index;
411 RGN_NR_BLOCKS (nr_regions) = 1;
412 RGN_BLOCKS (nr_regions) = nr_regions;
413 CONTAINING_RGN (bb->index) = nr_regions;
414 BLOCK_TO_BB (bb->index) = 0;
415 nr_regions++;
419 /* Update number of blocks and the estimate for number of insns
420 in the region. Return true if the region is "too large" for interblock
421 scheduling (compile time considerations). */
423 static bool
424 too_large (int block, int *num_bbs, int *num_insns)
426 (*num_bbs)++;
427 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
428 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
430 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
431 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
434 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
435 is still an inner loop. Put in max_hdr[blk] the header of the most inner
436 loop containing blk. */
437 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
439 if (max_hdr[blk] == -1) \
440 max_hdr[blk] = hdr; \
441 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
442 RESET_BIT (inner, hdr); \
443 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
445 RESET_BIT (inner,max_hdr[blk]); \
446 max_hdr[blk] = hdr; \
450 /* Find regions for interblock scheduling.
452 A region for scheduling can be:
454 * A loop-free procedure, or
456 * A reducible inner loop, or
458 * A basic block not contained in any other region.
460 ?!? In theory we could build other regions based on extended basic
461 blocks or reverse extended basic blocks. Is it worth the trouble?
463 Loop blocks that form a region are put into the region's block list
464 in topological order.
466 This procedure stores its results into the following global (ick) variables
468 * rgn_nr
469 * rgn_table
470 * rgn_bb_table
471 * block_to_bb
472 * containing region
474 We use dominator relationships to avoid making regions out of non-reducible
475 loops.
477 This procedure needs to be converted to work on pred/succ lists instead
478 of edge tables. That would simplify it somewhat. */
480 static void
481 find_rgns (void)
483 int *max_hdr, *dfs_nr, *degree;
484 char no_loops = 1;
485 int node, child, loop_head, i, head, tail;
486 int count = 0, sp, idx = 0;
487 edge_iterator current_edge;
488 edge_iterator *stack;
489 int num_bbs, num_insns, unreachable;
490 int too_large_failure;
491 basic_block bb;
493 /* Note if a block is a natural loop header. */
494 sbitmap header;
496 /* Note if a block is a natural inner loop header. */
497 sbitmap inner;
499 /* Note if a block is in the block queue. */
500 sbitmap in_queue;
502 /* Note if a block is in the block queue. */
503 sbitmap in_stack;
505 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
506 and a mapping from block to its loop header (if the block is contained
507 in a loop, else -1).
509 Store results in HEADER, INNER, and MAX_HDR respectively, these will
510 be used as inputs to the second traversal.
512 STACK, SP and DFS_NR are only used during the first traversal. */
514 /* Allocate and initialize variables for the first traversal. */
515 max_hdr = xmalloc (last_basic_block * sizeof (int));
516 dfs_nr = xcalloc (last_basic_block, sizeof (int));
517 stack = xmalloc (n_edges * sizeof (edge_iterator));
519 inner = sbitmap_alloc (last_basic_block);
520 sbitmap_ones (inner);
522 header = sbitmap_alloc (last_basic_block);
523 sbitmap_zero (header);
525 in_queue = sbitmap_alloc (last_basic_block);
526 sbitmap_zero (in_queue);
528 in_stack = sbitmap_alloc (last_basic_block);
529 sbitmap_zero (in_stack);
531 for (i = 0; i < last_basic_block; i++)
532 max_hdr[i] = -1;
534 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
535 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
537 /* DFS traversal to find inner loops in the cfg. */
539 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
540 sp = -1;
542 while (1)
544 if (EDGE_PASSED (current_edge))
546 /* We have reached a leaf node or a node that was already
547 processed. Pop edges off the stack until we find
548 an edge that has not yet been processed. */
549 while (sp >= 0 && EDGE_PASSED (current_edge))
551 /* Pop entry off the stack. */
552 current_edge = stack[sp--];
553 node = ei_edge (current_edge)->src->index;
554 gcc_assert (node != ENTRY_BLOCK);
555 child = ei_edge (current_edge)->dest->index;
556 gcc_assert (child != EXIT_BLOCK);
557 RESET_BIT (in_stack, child);
558 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
559 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
560 ei_next (&current_edge);
563 /* See if have finished the DFS tree traversal. */
564 if (sp < 0 && EDGE_PASSED (current_edge))
565 break;
567 /* Nope, continue the traversal with the popped node. */
568 continue;
571 /* Process a node. */
572 node = ei_edge (current_edge)->src->index;
573 gcc_assert (node != ENTRY_BLOCK);
574 SET_BIT (in_stack, node);
575 dfs_nr[node] = ++count;
577 /* We don't traverse to the exit block. */
578 child = ei_edge (current_edge)->dest->index;
579 if (child == EXIT_BLOCK)
581 SET_EDGE_PASSED (current_edge);
582 ei_next (&current_edge);
583 continue;
586 /* If the successor is in the stack, then we've found a loop.
587 Mark the loop, if it is not a natural loop, then it will
588 be rejected during the second traversal. */
589 if (TEST_BIT (in_stack, child))
591 no_loops = 0;
592 SET_BIT (header, child);
593 UPDATE_LOOP_RELATIONS (node, child);
594 SET_EDGE_PASSED (current_edge);
595 ei_next (&current_edge);
596 continue;
599 /* If the child was already visited, then there is no need to visit
600 it again. Just update the loop relationships and restart
601 with a new edge. */
602 if (dfs_nr[child])
604 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
605 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
606 SET_EDGE_PASSED (current_edge);
607 ei_next (&current_edge);
608 continue;
611 /* Push an entry on the stack and continue DFS traversal. */
612 stack[++sp] = current_edge;
613 SET_EDGE_PASSED (current_edge);
614 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
617 /* Reset ->aux field used by EDGE_PASSED. */
618 FOR_ALL_BB (bb)
620 edge_iterator ei;
621 edge e;
622 FOR_EACH_EDGE (e, ei, bb->succs)
623 e->aux = NULL;
627 /* Another check for unreachable blocks. The earlier test in
628 is_cfg_nonregular only finds unreachable blocks that do not
629 form a loop.
631 The DFS traversal will mark every block that is reachable from
632 the entry node by placing a nonzero value in dfs_nr. Thus if
633 dfs_nr is zero for any block, then it must be unreachable. */
634 unreachable = 0;
635 FOR_EACH_BB (bb)
636 if (dfs_nr[bb->index] == 0)
638 unreachable = 1;
639 break;
642 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
643 to hold degree counts. */
644 degree = dfs_nr;
646 FOR_EACH_BB (bb)
647 degree[bb->index] = EDGE_COUNT (bb->preds);
649 /* Do not perform region scheduling if there are any unreachable
650 blocks. */
651 if (!unreachable)
653 int *queue;
655 if (no_loops)
656 SET_BIT (header, 0);
658 /* Second traversal:find reducible inner loops and topologically sort
659 block of each region. */
661 queue = xmalloc (n_basic_blocks * sizeof (int));
663 /* Find blocks which are inner loop headers. We still have non-reducible
664 loops to consider at this point. */
665 FOR_EACH_BB (bb)
667 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
669 edge e;
670 edge_iterator ei;
671 basic_block jbb;
673 /* Now check that the loop is reducible. We do this separate
674 from finding inner loops so that we do not find a reducible
675 loop which contains an inner non-reducible loop.
677 A simple way to find reducible/natural loops is to verify
678 that each block in the loop is dominated by the loop
679 header.
681 If there exists a block that is not dominated by the loop
682 header, then the block is reachable from outside the loop
683 and thus the loop is not a natural loop. */
684 FOR_EACH_BB (jbb)
686 /* First identify blocks in the loop, except for the loop
687 entry block. */
688 if (bb->index == max_hdr[jbb->index] && bb != jbb)
690 /* Now verify that the block is dominated by the loop
691 header. */
692 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
693 break;
697 /* If we exited the loop early, then I is the header of
698 a non-reducible loop and we should quit processing it
699 now. */
700 if (jbb != EXIT_BLOCK_PTR)
701 continue;
703 /* I is a header of an inner loop, or block 0 in a subroutine
704 with no loops at all. */
705 head = tail = -1;
706 too_large_failure = 0;
707 loop_head = max_hdr[bb->index];
709 /* Decrease degree of all I's successors for topological
710 ordering. */
711 FOR_EACH_EDGE (e, ei, bb->succs)
712 if (e->dest != EXIT_BLOCK_PTR)
713 --degree[e->dest->index];
715 /* Estimate # insns, and count # blocks in the region. */
716 num_bbs = 1;
717 num_insns = (INSN_LUID (BB_END (bb))
718 - INSN_LUID (BB_HEAD (bb)));
720 /* Find all loop latches (blocks with back edges to the loop
721 header) or all the leaf blocks in the cfg has no loops.
723 Place those blocks into the queue. */
724 if (no_loops)
726 FOR_EACH_BB (jbb)
727 /* Leaf nodes have only a single successor which must
728 be EXIT_BLOCK. */
729 if (single_succ_p (jbb)
730 && single_succ (jbb) == EXIT_BLOCK_PTR)
732 queue[++tail] = jbb->index;
733 SET_BIT (in_queue, jbb->index);
735 if (too_large (jbb->index, &num_bbs, &num_insns))
737 too_large_failure = 1;
738 break;
742 else
744 edge e;
746 FOR_EACH_EDGE (e, ei, bb->preds)
748 if (e->src == ENTRY_BLOCK_PTR)
749 continue;
751 node = e->src->index;
753 if (max_hdr[node] == loop_head && node != bb->index)
755 /* This is a loop latch. */
756 queue[++tail] = node;
757 SET_BIT (in_queue, node);
759 if (too_large (node, &num_bbs, &num_insns))
761 too_large_failure = 1;
762 break;
768 /* Now add all the blocks in the loop to the queue.
770 We know the loop is a natural loop; however the algorithm
771 above will not always mark certain blocks as being in the
772 loop. Consider:
773 node children
774 a b,c
776 c a,d
779 The algorithm in the DFS traversal may not mark B & D as part
780 of the loop (i.e. they will not have max_hdr set to A).
782 We know they can not be loop latches (else they would have
783 had max_hdr set since they'd have a backedge to a dominator
784 block). So we don't need them on the initial queue.
786 We know they are part of the loop because they are dominated
787 by the loop header and can be reached by a backwards walk of
788 the edges starting with nodes on the initial queue.
790 It is safe and desirable to include those nodes in the
791 loop/scheduling region. To do so we would need to decrease
792 the degree of a node if it is the target of a backedge
793 within the loop itself as the node is placed in the queue.
795 We do not do this because I'm not sure that the actual
796 scheduling code will properly handle this case. ?!? */
798 while (head < tail && !too_large_failure)
800 edge e;
801 child = queue[++head];
803 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
805 node = e->src->index;
807 /* See discussion above about nodes not marked as in
808 this loop during the initial DFS traversal. */
809 if (e->src == ENTRY_BLOCK_PTR
810 || max_hdr[node] != loop_head)
812 tail = -1;
813 break;
815 else if (!TEST_BIT (in_queue, node) && node != bb->index)
817 queue[++tail] = node;
818 SET_BIT (in_queue, node);
820 if (too_large (node, &num_bbs, &num_insns))
822 too_large_failure = 1;
823 break;
829 if (tail >= 0 && !too_large_failure)
831 /* Place the loop header into list of region blocks. */
832 degree[bb->index] = -1;
833 rgn_bb_table[idx] = bb->index;
834 RGN_NR_BLOCKS (nr_regions) = num_bbs;
835 RGN_BLOCKS (nr_regions) = idx++;
836 CONTAINING_RGN (bb->index) = nr_regions;
837 BLOCK_TO_BB (bb->index) = count = 0;
839 /* Remove blocks from queue[] when their in degree
840 becomes zero. Repeat until no blocks are left on the
841 list. This produces a topological list of blocks in
842 the region. */
843 while (tail >= 0)
845 if (head < 0)
846 head = tail;
847 child = queue[head];
848 if (degree[child] == 0)
850 edge e;
852 degree[child] = -1;
853 rgn_bb_table[idx++] = child;
854 BLOCK_TO_BB (child) = ++count;
855 CONTAINING_RGN (child) = nr_regions;
856 queue[head] = queue[tail--];
858 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
859 if (e->dest != EXIT_BLOCK_PTR)
860 --degree[e->dest->index];
862 else
863 --head;
865 ++nr_regions;
869 free (queue);
872 /* Any block that did not end up in a region is placed into a region
873 by itself. */
874 FOR_EACH_BB (bb)
875 if (degree[bb->index] >= 0)
877 rgn_bb_table[idx] = bb->index;
878 RGN_NR_BLOCKS (nr_regions) = 1;
879 RGN_BLOCKS (nr_regions) = idx++;
880 CONTAINING_RGN (bb->index) = nr_regions++;
881 BLOCK_TO_BB (bb->index) = 0;
884 free (max_hdr);
885 free (dfs_nr);
886 free (stack);
887 sbitmap_free (header);
888 sbitmap_free (inner);
889 sbitmap_free (in_queue);
890 sbitmap_free (in_stack);
893 /* Functions for regions scheduling information. */
895 /* Compute dominators, probability, and potential-split-edges of bb.
896 Assume that these values were already computed for bb's predecessors. */
898 static void
899 compute_dom_prob_ps (int bb)
901 int pred_bb;
902 int nr_out_edges, nr_rgn_out_edges;
903 edge_iterator in_ei, out_ei;
904 edge in_edge, out_edge;
906 prob[bb] = 0.0;
907 if (IS_RGN_ENTRY (bb))
909 SET_BIT (dom[bb], 0);
910 prob[bb] = 1.0;
911 return;
914 /* Initialize dom[bb] to '111..1'. */
915 sbitmap_ones (dom[bb]);
917 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
919 if (in_edge->src == ENTRY_BLOCK_PTR)
920 continue;
922 pred_bb = BLOCK_TO_BB (in_edge->src->index);
923 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
924 sbitmap_a_or_b (ancestor_edges[bb],
925 ancestor_edges[bb], ancestor_edges[pred_bb]);
927 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
929 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
931 nr_out_edges = 0;
932 nr_rgn_out_edges = 0;
934 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
936 ++nr_out_edges;
938 /* The successor doesn't belong in the region? */
939 if (out_edge->dest != EXIT_BLOCK_PTR
940 && CONTAINING_RGN (out_edge->dest->index)
941 != CONTAINING_RGN (BB_TO_BLOCK (bb)))
942 ++nr_rgn_out_edges;
944 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
947 /* Now nr_rgn_out_edges is the number of region-exit edges from
948 pred, and nr_out_edges will be the number of pred out edges
949 not leaving the region. */
950 nr_out_edges -= nr_rgn_out_edges;
951 if (nr_rgn_out_edges > 0)
952 prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
953 else
954 prob[bb] += prob[pred_bb] / nr_out_edges;
957 SET_BIT (dom[bb], bb);
958 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
960 if (sched_verbose >= 2)
961 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
962 (int) (100.0 * prob[bb]));
965 /* Functions for target info. */
967 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
968 Note that bb_trg dominates bb_src. */
970 static void
971 split_edges (int bb_src, int bb_trg, edgelst *bl)
973 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
974 sbitmap_copy (src, pot_split[bb_src]);
976 sbitmap_difference (src, src, pot_split[bb_trg]);
977 extract_edgelst (src, bl);
978 sbitmap_free (src);
981 /* Find the valid candidate-source-blocks for the target block TRG, compute
982 their probability, and check if they are speculative or not.
983 For speculative sources, compute their update-blocks and split-blocks. */
985 static void
986 compute_trg_info (int trg)
988 candidate *sp;
989 edgelst el;
990 int i, j, k, update_idx;
991 basic_block block;
992 sbitmap visited;
993 edge_iterator ei;
994 edge e;
996 /* Define some of the fields for the target bb as well. */
997 sp = candidate_table + trg;
998 sp->is_valid = 1;
999 sp->is_speculative = 0;
1000 sp->src_prob = 100;
1002 visited = sbitmap_alloc (last_basic_block);
1004 for (i = trg + 1; i < current_nr_blocks; i++)
1006 sp = candidate_table + i;
1008 sp->is_valid = IS_DOMINATED (i, trg);
1009 if (sp->is_valid)
1011 sp->src_prob = GET_SRC_PROB (i, trg);
1012 sp->is_valid = (sp->src_prob >= PARAM_VALUE (PARAM_MIN_SPEC_PROB));
1015 if (sp->is_valid)
1017 split_edges (i, trg, &el);
1018 sp->is_speculative = (el.nr_members) ? 1 : 0;
1019 if (sp->is_speculative && !flag_schedule_speculative)
1020 sp->is_valid = 0;
1023 if (sp->is_valid)
1025 /* Compute split blocks and store them in bblst_table.
1026 The TO block of every split edge is a split block. */
1027 sp->split_bbs.first_member = &bblst_table[bblst_last];
1028 sp->split_bbs.nr_members = el.nr_members;
1029 for (j = 0; j < el.nr_members; bblst_last++, j++)
1030 bblst_table[bblst_last] = el.first_member[j]->dest;
1031 sp->update_bbs.first_member = &bblst_table[bblst_last];
1033 /* Compute update blocks and store them in bblst_table.
1034 For every split edge, look at the FROM block, and check
1035 all out edges. For each out edge that is not a split edge,
1036 add the TO block to the update block list. This list can end
1037 up with a lot of duplicates. We need to weed them out to avoid
1038 overrunning the end of the bblst_table. */
1040 update_idx = 0;
1041 sbitmap_zero (visited);
1042 for (j = 0; j < el.nr_members; j++)
1044 block = el.first_member[j]->src;
1045 FOR_EACH_EDGE (e, ei, block->succs)
1047 if (!TEST_BIT (visited, e->dest->index))
1049 for (k = 0; k < el.nr_members; k++)
1050 if (e == el.first_member[k])
1051 break;
1053 if (k >= el.nr_members)
1055 bblst_table[bblst_last++] = e->dest;
1056 SET_BIT (visited, e->dest->index);
1057 update_idx++;
1062 sp->update_bbs.nr_members = update_idx;
1064 /* Make sure we didn't overrun the end of bblst_table. */
1065 gcc_assert (bblst_last <= bblst_size);
1067 else
1069 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1071 sp->is_speculative = 0;
1072 sp->src_prob = 0;
1076 sbitmap_free (visited);
1079 /* Print candidates info, for debugging purposes. Callable from debugger. */
1081 void
1082 debug_candidate (int i)
1084 if (!candidate_table[i].is_valid)
1085 return;
1087 if (candidate_table[i].is_speculative)
1089 int j;
1090 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1092 fprintf (sched_dump, "split path: ");
1093 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1095 int b = candidate_table[i].split_bbs.first_member[j]->index;
1097 fprintf (sched_dump, " %d ", b);
1099 fprintf (sched_dump, "\n");
1101 fprintf (sched_dump, "update path: ");
1102 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1104 int b = candidate_table[i].update_bbs.first_member[j]->index;
1106 fprintf (sched_dump, " %d ", b);
1108 fprintf (sched_dump, "\n");
1110 else
1112 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1116 /* Print candidates info, for debugging purposes. Callable from debugger. */
1118 void
1119 debug_candidates (int trg)
1121 int i;
1123 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1124 BB_TO_BLOCK (trg), trg);
1125 for (i = trg + 1; i < current_nr_blocks; i++)
1126 debug_candidate (i);
1129 /* Functions for speculative scheduling. */
1131 /* Return 0 if x is a set of a register alive in the beginning of one
1132 of the split-blocks of src, otherwise return 1. */
1134 static int
1135 check_live_1 (int src, rtx x)
1137 int i;
1138 int regno;
1139 rtx reg = SET_DEST (x);
1141 if (reg == 0)
1142 return 1;
1144 while (GET_CODE (reg) == SUBREG
1145 || GET_CODE (reg) == ZERO_EXTRACT
1146 || GET_CODE (reg) == STRICT_LOW_PART)
1147 reg = XEXP (reg, 0);
1149 if (GET_CODE (reg) == PARALLEL)
1151 int i;
1153 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1154 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1155 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1156 return 1;
1158 return 0;
1161 if (!REG_P (reg))
1162 return 1;
1164 regno = REGNO (reg);
1166 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1168 /* Global registers are assumed live. */
1169 return 0;
1171 else
1173 if (regno < FIRST_PSEUDO_REGISTER)
1175 /* Check for hard registers. */
1176 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1177 while (--j >= 0)
1179 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1181 basic_block b = candidate_table[src].split_bbs.first_member[i];
1183 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start,
1184 regno + j))
1186 return 0;
1191 else
1193 /* Check for pseudo registers. */
1194 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1196 basic_block b = candidate_table[src].split_bbs.first_member[i];
1198 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start, regno))
1200 return 0;
1206 return 1;
1209 /* If x is a set of a register R, mark that R is alive in the beginning
1210 of every update-block of src. */
1212 static void
1213 update_live_1 (int src, rtx x)
1215 int i;
1216 int regno;
1217 rtx reg = SET_DEST (x);
1219 if (reg == 0)
1220 return;
1222 while (GET_CODE (reg) == SUBREG
1223 || GET_CODE (reg) == ZERO_EXTRACT
1224 || GET_CODE (reg) == STRICT_LOW_PART)
1225 reg = XEXP (reg, 0);
1227 if (GET_CODE (reg) == PARALLEL)
1229 int i;
1231 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1232 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1233 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1235 return;
1238 if (!REG_P (reg))
1239 return;
1241 /* Global registers are always live, so the code below does not apply
1242 to them. */
1244 regno = REGNO (reg);
1246 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1248 if (regno < FIRST_PSEUDO_REGISTER)
1250 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1251 while (--j >= 0)
1253 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1255 basic_block b = candidate_table[src].update_bbs.first_member[i];
1257 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start,
1258 regno + j);
1262 else
1264 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1266 basic_block b = candidate_table[src].update_bbs.first_member[i];
1268 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start, regno);
1274 /* Return 1 if insn can be speculatively moved from block src to trg,
1275 otherwise return 0. Called before first insertion of insn to
1276 ready-list or before the scheduling. */
1278 static int
1279 check_live (rtx insn, int src)
1281 /* Find the registers set by instruction. */
1282 if (GET_CODE (PATTERN (insn)) == SET
1283 || GET_CODE (PATTERN (insn)) == CLOBBER)
1284 return check_live_1 (src, PATTERN (insn));
1285 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1287 int j;
1288 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1289 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1290 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1291 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1292 return 0;
1294 return 1;
1297 return 1;
1300 /* Update the live registers info after insn was moved speculatively from
1301 block src to trg. */
1303 static void
1304 update_live (rtx insn, int src)
1306 /* Find the registers set by instruction. */
1307 if (GET_CODE (PATTERN (insn)) == SET
1308 || GET_CODE (PATTERN (insn)) == CLOBBER)
1309 update_live_1 (src, PATTERN (insn));
1310 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1312 int j;
1313 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1314 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1315 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1316 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1320 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1321 #define IS_REACHABLE(bb_from, bb_to) \
1322 (bb_from == bb_to \
1323 || IS_RGN_ENTRY (bb_from) \
1324 || (TEST_BIT (ancestor_edges[bb_to], \
1325 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1327 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1329 static void
1330 set_spec_fed (rtx load_insn)
1332 rtx link;
1334 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1335 if (GET_MODE (link) == VOIDmode)
1336 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1337 } /* set_spec_fed */
1339 /* On the path from the insn to load_insn_bb, find a conditional
1340 branch depending on insn, that guards the speculative load. */
1342 static int
1343 find_conditional_protection (rtx insn, int load_insn_bb)
1345 rtx link;
1347 /* Iterate through DEF-USE forward dependences. */
1348 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1350 rtx next = XEXP (link, 0);
1351 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1352 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1353 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1354 && load_insn_bb != INSN_BB (next)
1355 && GET_MODE (link) == VOIDmode
1356 && (JUMP_P (next)
1357 || find_conditional_protection (next, load_insn_bb)))
1358 return 1;
1360 return 0;
1361 } /* find_conditional_protection */
1363 /* Returns 1 if the same insn1 that participates in the computation
1364 of load_insn's address is feeding a conditional branch that is
1365 guarding on load_insn. This is true if we find a the two DEF-USE
1366 chains:
1367 insn1 -> ... -> conditional-branch
1368 insn1 -> ... -> load_insn,
1369 and if a flow path exist:
1370 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1371 and if insn1 is on the path
1372 region-entry -> ... -> bb_trg -> ... load_insn.
1374 Locate insn1 by climbing on LOG_LINKS from load_insn.
1375 Locate the branch by following INSN_DEPEND from insn1. */
1377 static int
1378 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1380 rtx link;
1382 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1384 rtx insn1 = XEXP (link, 0);
1386 /* Must be a DEF-USE dependence upon non-branch. */
1387 if (GET_MODE (link) != VOIDmode
1388 || JUMP_P (insn1))
1389 continue;
1391 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1392 if (INSN_BB (insn1) == bb_src
1393 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1394 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1395 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1396 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1397 continue;
1399 /* Now search for the conditional-branch. */
1400 if (find_conditional_protection (insn1, bb_src))
1401 return 1;
1403 /* Recursive step: search another insn1, "above" current insn1. */
1404 return is_conditionally_protected (insn1, bb_src, bb_trg);
1407 /* The chain does not exist. */
1408 return 0;
1409 } /* is_conditionally_protected */
1411 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1412 load_insn can move speculatively from bb_src to bb_trg. All the
1413 following must hold:
1415 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1416 (2) load_insn and load1 have a def-use dependence upon
1417 the same insn 'insn1'.
1418 (3) either load2 is in bb_trg, or:
1419 - there's only one split-block, and
1420 - load1 is on the escape path, and
1422 From all these we can conclude that the two loads access memory
1423 addresses that differ at most by a constant, and hence if moving
1424 load_insn would cause an exception, it would have been caused by
1425 load2 anyhow. */
1427 static int
1428 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1430 rtx back_link;
1431 candidate *candp = candidate_table + bb_src;
1433 if (candp->split_bbs.nr_members != 1)
1434 /* Must have exactly one escape block. */
1435 return 0;
1437 for (back_link = LOG_LINKS (load_insn);
1438 back_link; back_link = XEXP (back_link, 1))
1440 rtx insn1 = XEXP (back_link, 0);
1442 if (GET_MODE (back_link) == VOIDmode)
1444 /* Found a DEF-USE dependence (insn1, load_insn). */
1445 rtx fore_link;
1447 for (fore_link = INSN_DEPEND (insn1);
1448 fore_link; fore_link = XEXP (fore_link, 1))
1450 rtx insn2 = XEXP (fore_link, 0);
1451 if (GET_MODE (fore_link) == VOIDmode)
1453 /* Found a DEF-USE dependence (insn1, insn2). */
1454 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1455 /* insn2 not guaranteed to be a 1 base reg load. */
1456 continue;
1458 if (INSN_BB (insn2) == bb_trg)
1459 /* insn2 is the similar load, in the target block. */
1460 return 1;
1462 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1463 /* insn2 is a similar load, in a split-block. */
1464 return 1;
1470 /* Couldn't find a similar load. */
1471 return 0;
1472 } /* is_pfree */
1474 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1475 a load moved speculatively, or if load_insn is protected by
1476 a compare on load_insn's address). */
1478 static int
1479 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1481 if (FED_BY_SPEC_LOAD (load_insn))
1482 return 1;
1484 if (LOG_LINKS (load_insn) == NULL)
1485 /* Dependence may 'hide' out of the region. */
1486 return 1;
1488 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1489 return 1;
1491 return 0;
1494 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1495 Return 1 if insn is exception-free (and the motion is valid)
1496 and 0 otherwise. */
1498 static int
1499 is_exception_free (rtx insn, int bb_src, int bb_trg)
1501 int insn_class = haifa_classify_insn (insn);
1503 /* Handle non-load insns. */
1504 switch (insn_class)
1506 case TRAP_FREE:
1507 return 1;
1508 case TRAP_RISKY:
1509 return 0;
1510 default:;
1513 /* Handle loads. */
1514 if (!flag_schedule_speculative_load)
1515 return 0;
1516 IS_LOAD_INSN (insn) = 1;
1517 switch (insn_class)
1519 case IFREE:
1520 return (1);
1521 case IRISKY:
1522 return 0;
1523 case PFREE_CANDIDATE:
1524 if (is_pfree (insn, bb_src, bb_trg))
1525 return 1;
1526 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1527 case PRISKY_CANDIDATE:
1528 if (!flag_schedule_speculative_load_dangerous
1529 || is_prisky (insn, bb_src, bb_trg))
1530 return 0;
1531 break;
1532 default:;
1535 return flag_schedule_speculative_load_dangerous;
1538 /* The number of insns from the current block scheduled so far. */
1539 static int sched_target_n_insns;
1540 /* The number of insns from the current block to be scheduled in total. */
1541 static int target_n_insns;
1542 /* The number of insns from the entire region scheduled so far. */
1543 static int sched_n_insns;
1544 /* Nonzero if the last scheduled insn was a jump. */
1545 static int last_was_jump;
1547 /* Implementations of the sched_info functions for region scheduling. */
1548 static void init_ready_list (struct ready_list *);
1549 static int can_schedule_ready_p (rtx);
1550 static int new_ready (rtx);
1551 static int schedule_more_p (void);
1552 static const char *rgn_print_insn (rtx, int);
1553 static int rgn_rank (rtx, rtx);
1554 static int contributes_to_priority (rtx, rtx);
1555 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1557 /* Return nonzero if there are more insns that should be scheduled. */
1559 static int
1560 schedule_more_p (void)
1562 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1565 /* Add all insns that are initially ready to the ready list READY. Called
1566 once before scheduling a set of insns. */
1568 static void
1569 init_ready_list (struct ready_list *ready)
1571 rtx prev_head = current_sched_info->prev_head;
1572 rtx next_tail = current_sched_info->next_tail;
1573 int bb_src;
1574 rtx insn;
1576 target_n_insns = 0;
1577 sched_target_n_insns = 0;
1578 sched_n_insns = 0;
1579 last_was_jump = 0;
1581 /* Print debugging information. */
1582 if (sched_verbose >= 5)
1583 debug_dependencies ();
1585 /* Prepare current target block info. */
1586 if (current_nr_blocks > 1)
1588 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1590 bblst_last = 0;
1591 /* bblst_table holds split blocks and update blocks for each block after
1592 the current one in the region. split blocks and update blocks are
1593 the TO blocks of region edges, so there can be at most rgn_nr_edges
1594 of them. */
1595 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1596 bblst_table = xmalloc (bblst_size * sizeof (basic_block));
1598 edgelst_last = 0;
1599 edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge));
1601 compute_trg_info (target_bb);
1604 /* Initialize ready list with all 'ready' insns in target block.
1605 Count number of insns in the target block being scheduled. */
1606 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1608 if (INSN_DEP_COUNT (insn) == 0)
1610 ready_add (ready, insn);
1612 if (targetm.sched.adjust_priority)
1613 INSN_PRIORITY (insn) =
1614 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1616 target_n_insns++;
1619 /* Add to ready list all 'ready' insns in valid source blocks.
1620 For speculative insns, check-live, exception-free, and
1621 issue-delay. */
1622 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1623 if (IS_VALID (bb_src))
1625 rtx src_head;
1626 rtx src_next_tail;
1627 rtx tail, head;
1629 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1630 src_next_tail = NEXT_INSN (tail);
1631 src_head = head;
1633 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1635 if (! INSN_P (insn))
1636 continue;
1638 if (!CANT_MOVE (insn)
1639 && (!IS_SPECULATIVE_INSN (insn)
1640 || ((recog_memoized (insn) < 0
1641 || min_insn_conflict_delay (curr_state,
1642 insn, insn) <= 3)
1643 && check_live (insn, bb_src)
1644 && is_exception_free (insn, bb_src, target_bb))))
1645 if (INSN_DEP_COUNT (insn) == 0)
1647 ready_add (ready, insn);
1649 if (targetm.sched.adjust_priority)
1650 INSN_PRIORITY (insn) =
1651 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1657 /* Called after taking INSN from the ready list. Returns nonzero if this
1658 insn can be scheduled, nonzero if we should silently discard it. */
1660 static int
1661 can_schedule_ready_p (rtx insn)
1663 if (JUMP_P (insn))
1664 last_was_jump = 1;
1666 /* An interblock motion? */
1667 if (INSN_BB (insn) != target_bb)
1669 basic_block b1;
1671 if (IS_SPECULATIVE_INSN (insn))
1673 if (!check_live (insn, INSN_BB (insn)))
1674 return 0;
1675 update_live (insn, INSN_BB (insn));
1677 /* For speculative load, mark insns fed by it. */
1678 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1679 set_spec_fed (insn);
1681 nr_spec++;
1683 nr_inter++;
1685 /* Update source block boundaries. */
1686 b1 = BLOCK_FOR_INSN (insn);
1687 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1689 /* We moved all the insns in the basic block.
1690 Emit a note after the last insn and update the
1691 begin/end boundaries to point to the note. */
1692 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1693 BB_HEAD (b1) = note;
1694 BB_END (b1) = note;
1696 else if (insn == BB_END (b1))
1698 /* We took insns from the end of the basic block,
1699 so update the end of block boundary so that it
1700 points to the first insn we did not move. */
1701 BB_END (b1) = PREV_INSN (insn);
1703 else if (insn == BB_HEAD (b1))
1705 /* We took insns from the start of the basic block,
1706 so update the start of block boundary so that
1707 it points to the first insn we did not move. */
1708 BB_HEAD (b1) = NEXT_INSN (insn);
1711 else
1713 /* In block motion. */
1714 sched_target_n_insns++;
1716 sched_n_insns++;
1718 return 1;
1721 /* Called after INSN has all its dependencies resolved. Return nonzero
1722 if it should be moved to the ready list or the queue, or zero if we
1723 should silently discard it. */
1724 static int
1725 new_ready (rtx next)
1727 /* For speculative insns, before inserting to ready/queue,
1728 check live, exception-free, and issue-delay. */
1729 if (INSN_BB (next) != target_bb
1730 && (!IS_VALID (INSN_BB (next))
1731 || CANT_MOVE (next)
1732 || (IS_SPECULATIVE_INSN (next)
1733 && ((recog_memoized (next) >= 0
1734 && min_insn_conflict_delay (curr_state, next, next) > 3)
1735 || !check_live (next, INSN_BB (next))
1736 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1737 return 0;
1738 return 1;
1741 /* Return a string that contains the insn uid and optionally anything else
1742 necessary to identify this insn in an output. It's valid to use a
1743 static buffer for this. The ALIGNED parameter should cause the string
1744 to be formatted so that multiple output lines will line up nicely. */
1746 static const char *
1747 rgn_print_insn (rtx insn, int aligned)
1749 static char tmp[80];
1751 if (aligned)
1752 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1753 else
1755 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1756 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1757 else
1758 sprintf (tmp, "%d", INSN_UID (insn));
1760 return tmp;
1763 /* Compare priority of two insns. Return a positive number if the second
1764 insn is to be preferred for scheduling, and a negative one if the first
1765 is to be preferred. Zero if they are equally good. */
1767 static int
1768 rgn_rank (rtx insn1, rtx insn2)
1770 /* Some comparison make sense in interblock scheduling only. */
1771 if (INSN_BB (insn1) != INSN_BB (insn2))
1773 int spec_val, prob_val;
1775 /* Prefer an inblock motion on an interblock motion. */
1776 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1777 return 1;
1778 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1779 return -1;
1781 /* Prefer a useful motion on a speculative one. */
1782 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1783 if (spec_val)
1784 return spec_val;
1786 /* Prefer a more probable (speculative) insn. */
1787 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1788 if (prob_val)
1789 return prob_val;
1791 return 0;
1794 /* NEXT is an instruction that depends on INSN (a backward dependence);
1795 return nonzero if we should include this dependence in priority
1796 calculations. */
1798 static int
1799 contributes_to_priority (rtx next, rtx insn)
1801 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1804 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1805 conditionally set before INSN. Store the set of registers that
1806 must be considered as used by this jump in USED and that of
1807 registers that must be considered as set in SET. */
1809 static void
1810 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1811 regset cond_exec ATTRIBUTE_UNUSED,
1812 regset used ATTRIBUTE_UNUSED,
1813 regset set ATTRIBUTE_UNUSED)
1815 /* Nothing to do here, since we postprocess jumps in
1816 add_branch_dependences. */
1819 /* Used in schedule_insns to initialize current_sched_info for scheduling
1820 regions (or single basic blocks). */
1822 static struct sched_info region_sched_info =
1824 init_ready_list,
1825 can_schedule_ready_p,
1826 schedule_more_p,
1827 new_ready,
1828 rgn_rank,
1829 rgn_print_insn,
1830 contributes_to_priority,
1831 compute_jump_reg_dependencies,
1833 NULL, NULL,
1834 NULL, NULL,
1835 0, 0, 0
1838 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1840 static bool
1841 sets_likely_spilled (rtx pat)
1843 bool ret = false;
1844 note_stores (pat, sets_likely_spilled_1, &ret);
1845 return ret;
1848 static void
1849 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1851 bool *ret = (bool *) data;
1853 if (GET_CODE (pat) == SET
1854 && REG_P (x)
1855 && REGNO (x) < FIRST_PSEUDO_REGISTER
1856 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
1857 *ret = true;
1860 /* Add dependences so that branches are scheduled to run last in their
1861 block. */
1863 static void
1864 add_branch_dependences (rtx head, rtx tail)
1866 rtx insn, last;
1868 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
1869 that can throw exceptions, force them to remain in order at the end of
1870 the block by adding dependencies and giving the last a high priority.
1871 There may be notes present, and prev_head may also be a note.
1873 Branches must obviously remain at the end. Calls should remain at the
1874 end since moving them results in worse register allocation. Uses remain
1875 at the end to ensure proper register allocation.
1877 cc0 setters remain at the end because they can't be moved away from
1878 their cc0 user.
1880 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
1882 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
1883 are not moved before reload because we can wind up with register
1884 allocation failures. */
1886 insn = tail;
1887 last = 0;
1888 while (CALL_P (insn)
1889 || JUMP_P (insn)
1890 || (NONJUMP_INSN_P (insn)
1891 && (GET_CODE (PATTERN (insn)) == USE
1892 || GET_CODE (PATTERN (insn)) == CLOBBER
1893 || can_throw_internal (insn)
1894 #ifdef HAVE_cc0
1895 || sets_cc0_p (PATTERN (insn))
1896 #endif
1897 || (!reload_completed
1898 && sets_likely_spilled (PATTERN (insn)))))
1899 || NOTE_P (insn))
1901 if (!NOTE_P (insn))
1903 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
1905 if (! sched_insns_conditions_mutex_p (last, insn))
1906 add_dependence (last, insn, REG_DEP_ANTI);
1907 INSN_REF_COUNT (insn)++;
1910 CANT_MOVE (insn) = 1;
1912 last = insn;
1915 /* Don't overrun the bounds of the basic block. */
1916 if (insn == head)
1917 break;
1919 insn = PREV_INSN (insn);
1922 /* Make sure these insns are scheduled last in their block. */
1923 insn = last;
1924 if (insn != 0)
1925 while (insn != head)
1927 insn = prev_nonnote_insn (insn);
1929 if (INSN_REF_COUNT (insn) != 0)
1930 continue;
1932 if (! sched_insns_conditions_mutex_p (last, insn))
1933 add_dependence (last, insn, REG_DEP_ANTI);
1934 INSN_REF_COUNT (insn) = 1;
1937 #ifdef HAVE_conditional_execution
1938 /* Finally, if the block ends in a jump, and we are doing intra-block
1939 scheduling, make sure that the branch depends on any COND_EXEC insns
1940 inside the block to avoid moving the COND_EXECs past the branch insn.
1942 We only have to do this after reload, because (1) before reload there
1943 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
1944 scheduler after reload.
1946 FIXME: We could in some cases move COND_EXEC insns past the branch if
1947 this scheduler would be a little smarter. Consider this code:
1949 T = [addr]
1950 C ? addr += 4
1951 !C ? X += 12
1952 C ? T += 1
1953 C ? jump foo
1955 On a target with a one cycle stall on a memory access the optimal
1956 sequence would be:
1958 T = [addr]
1959 C ? addr += 4
1960 C ? T += 1
1961 C ? jump foo
1962 !C ? X += 12
1964 We don't want to put the 'X += 12' before the branch because it just
1965 wastes a cycle of execution time when the branch is taken.
1967 Note that in the example "!C" will always be true. That is another
1968 possible improvement for handling COND_EXECs in this scheduler: it
1969 could remove always-true predicates. */
1971 if (!reload_completed || ! JUMP_P (tail))
1972 return;
1974 insn = tail;
1975 while (insn != head)
1977 insn = PREV_INSN (insn);
1979 /* Note that we want to add this dependency even when
1980 sched_insns_conditions_mutex_p returns true. The whole point
1981 is that we _want_ this dependency, even if these insns really
1982 are independent. */
1983 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
1984 add_dependence (tail, insn, REG_DEP_ANTI);
1986 #endif
1989 /* Data structures for the computation of data dependences in a regions. We
1990 keep one `deps' structure for every basic block. Before analyzing the
1991 data dependences for a bb, its variables are initialized as a function of
1992 the variables of its predecessors. When the analysis for a bb completes,
1993 we save the contents to the corresponding bb_deps[bb] variable. */
1995 static struct deps *bb_deps;
1997 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
1999 static rtx
2000 concat_INSN_LIST (rtx copy, rtx old)
2002 rtx new = old;
2003 for (; copy ; copy = XEXP (copy, 1))
2004 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2005 return new;
2008 static void
2009 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2010 rtx *old_mems_p)
2012 rtx new_insns = *old_insns_p;
2013 rtx new_mems = *old_mems_p;
2015 while (copy_insns)
2017 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2018 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2019 copy_insns = XEXP (copy_insns, 1);
2020 copy_mems = XEXP (copy_mems, 1);
2023 *old_insns_p = new_insns;
2024 *old_mems_p = new_mems;
2027 /* After computing the dependencies for block BB, propagate the dependencies
2028 found in TMP_DEPS to the successors of the block. */
2029 static void
2030 propagate_deps (int bb, struct deps *pred_deps)
2032 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2033 edge_iterator ei;
2034 edge e;
2036 /* bb's structures are inherited by its successors. */
2037 FOR_EACH_EDGE (e, ei, block->succs)
2039 struct deps *succ_deps;
2040 unsigned reg;
2041 reg_set_iterator rsi;
2043 /* Only bbs "below" bb, in the same region, are interesting. */
2044 if (e->dest == EXIT_BLOCK_PTR
2045 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2046 || BLOCK_TO_BB (e->dest->index) <= bb)
2047 continue;
2049 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2051 /* The reg_last lists are inherited by successor. */
2052 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2054 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2055 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2057 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2058 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2059 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2060 succ_rl->clobbers);
2061 succ_rl->uses_length += pred_rl->uses_length;
2062 succ_rl->clobbers_length += pred_rl->clobbers_length;
2064 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2066 /* Mem read/write lists are inherited by successor. */
2067 concat_insn_mem_list (pred_deps->pending_read_insns,
2068 pred_deps->pending_read_mems,
2069 &succ_deps->pending_read_insns,
2070 &succ_deps->pending_read_mems);
2071 concat_insn_mem_list (pred_deps->pending_write_insns,
2072 pred_deps->pending_write_mems,
2073 &succ_deps->pending_write_insns,
2074 &succ_deps->pending_write_mems);
2076 succ_deps->last_pending_memory_flush
2077 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2078 succ_deps->last_pending_memory_flush);
2080 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2081 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2083 /* last_function_call is inherited by successor. */
2084 succ_deps->last_function_call
2085 = concat_INSN_LIST (pred_deps->last_function_call,
2086 succ_deps->last_function_call);
2088 /* sched_before_next_call is inherited by successor. */
2089 succ_deps->sched_before_next_call
2090 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2091 succ_deps->sched_before_next_call);
2094 /* These lists should point to the right place, for correct
2095 freeing later. */
2096 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2097 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2098 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2099 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2101 /* Can't allow these to be freed twice. */
2102 pred_deps->pending_read_insns = 0;
2103 pred_deps->pending_read_mems = 0;
2104 pred_deps->pending_write_insns = 0;
2105 pred_deps->pending_write_mems = 0;
2108 /* Compute backward dependences inside bb. In a multiple blocks region:
2109 (1) a bb is analyzed after its predecessors, and (2) the lists in
2110 effect at the end of bb (after analyzing for bb) are inherited by
2111 bb's successors.
2113 Specifically for reg-reg data dependences, the block insns are
2114 scanned by sched_analyze () top-to-bottom. Two lists are
2115 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2116 and reg_last[].uses for register USEs.
2118 When analysis is completed for bb, we update for its successors:
2119 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2120 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2122 The mechanism for computing mem-mem data dependence is very
2123 similar, and the result is interblock dependences in the region. */
2125 static void
2126 compute_block_backward_dependences (int bb)
2128 rtx head, tail;
2129 struct deps tmp_deps;
2131 tmp_deps = bb_deps[bb];
2133 /* Do the analysis for this block. */
2134 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2135 sched_analyze (&tmp_deps, head, tail);
2136 add_branch_dependences (head, tail);
2138 if (current_nr_blocks > 1)
2139 propagate_deps (bb, &tmp_deps);
2141 /* Free up the INSN_LISTs. */
2142 free_deps (&tmp_deps);
2145 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2146 them to the unused_*_list variables, so that they can be reused. */
2148 static void
2149 free_pending_lists (void)
2151 int bb;
2153 for (bb = 0; bb < current_nr_blocks; bb++)
2155 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2156 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2157 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2158 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2162 /* Print dependences for debugging, callable from debugger. */
2164 void
2165 debug_dependencies (void)
2167 int bb;
2169 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2170 for (bb = 0; bb < current_nr_blocks; bb++)
2172 rtx head, tail;
2173 rtx next_tail;
2174 rtx insn;
2176 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2177 next_tail = NEXT_INSN (tail);
2178 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2179 BB_TO_BLOCK (bb), bb);
2181 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2182 "insn", "code", "bb", "dep", "prio", "cost",
2183 "reservation");
2184 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2185 "----", "----", "--", "---", "----", "----",
2186 "-----------");
2188 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2190 rtx link;
2192 if (! INSN_P (insn))
2194 int n;
2195 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2196 if (NOTE_P (insn))
2198 n = NOTE_LINE_NUMBER (insn);
2199 if (n < 0)
2200 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2201 else
2203 expanded_location xloc;
2204 NOTE_EXPANDED_LOCATION (xloc, insn);
2205 fprintf (sched_dump, "line %d, file %s\n",
2206 xloc.line, xloc.file);
2209 else
2210 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2211 continue;
2214 fprintf (sched_dump,
2215 ";; %s%5d%6d%6d%6d%6d%6d ",
2216 (SCHED_GROUP_P (insn) ? "+" : " "),
2217 INSN_UID (insn),
2218 INSN_CODE (insn),
2219 INSN_BB (insn),
2220 INSN_DEP_COUNT (insn),
2221 INSN_PRIORITY (insn),
2222 insn_cost (insn, 0, 0));
2224 if (recog_memoized (insn) < 0)
2225 fprintf (sched_dump, "nothing");
2226 else
2227 print_reservation (sched_dump, insn);
2229 fprintf (sched_dump, "\t: ");
2230 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2231 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2232 fprintf (sched_dump, "\n");
2235 fprintf (sched_dump, "\n");
2238 /* Returns true if all the basic blocks of the current region have
2239 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2240 static bool
2241 sched_is_disabled_for_current_region_p (void)
2243 int bb;
2245 for (bb = 0; bb < current_nr_blocks; bb++)
2246 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2247 return false;
2249 return true;
2252 /* Schedule a region. A region is either an inner loop, a loop-free
2253 subroutine, or a single basic block. Each bb in the region is
2254 scheduled after its flow predecessors. */
2256 static void
2257 schedule_region (int rgn)
2259 basic_block block;
2260 edge_iterator ei;
2261 edge e;
2262 int bb;
2263 int rgn_n_insns = 0;
2264 int sched_rgn_n_insns = 0;
2266 /* Set variables for the current region. */
2267 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2268 current_blocks = RGN_BLOCKS (rgn);
2270 /* Don't schedule region that is marked by
2271 NOTE_DISABLE_SCHED_OF_BLOCK. */
2272 if (sched_is_disabled_for_current_region_p ())
2273 return;
2275 init_deps_global ();
2277 /* Initializations for region data dependence analysis. */
2278 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2279 for (bb = 0; bb < current_nr_blocks; bb++)
2280 init_deps (bb_deps + bb);
2282 /* Compute LOG_LINKS. */
2283 for (bb = 0; bb < current_nr_blocks; bb++)
2284 compute_block_backward_dependences (bb);
2286 /* Compute INSN_DEPEND. */
2287 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2289 rtx head, tail;
2290 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2292 compute_forward_dependences (head, tail);
2294 if (targetm.sched.dependencies_evaluation_hook)
2295 targetm.sched.dependencies_evaluation_hook (head, tail);
2299 /* Set priorities. */
2300 for (bb = 0; bb < current_nr_blocks; bb++)
2302 rtx head, tail;
2303 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2305 rgn_n_insns += set_priorities (head, tail);
2308 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2309 if (current_nr_blocks > 1)
2311 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2313 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2314 sbitmap_vector_zero (dom, current_nr_blocks);
2316 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2317 rgn_nr_edges = 0;
2318 FOR_EACH_BB (block)
2320 if (CONTAINING_RGN (block->index) != rgn)
2321 continue;
2322 FOR_EACH_EDGE (e, ei, block->succs)
2323 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2326 rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge));
2327 rgn_nr_edges = 0;
2328 FOR_EACH_BB (block)
2330 if (CONTAINING_RGN (block->index) != rgn)
2331 continue;
2332 FOR_EACH_EDGE (e, ei, block->succs)
2333 rgn_edges[rgn_nr_edges++] = e;
2336 /* Split edges. */
2337 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2338 sbitmap_vector_zero (pot_split, current_nr_blocks);
2339 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2340 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2342 /* Compute probabilities, dominators, split_edges. */
2343 for (bb = 0; bb < current_nr_blocks; bb++)
2344 compute_dom_prob_ps (bb);
2347 /* Now we can schedule all blocks. */
2348 for (bb = 0; bb < current_nr_blocks; bb++)
2350 rtx head, tail;
2351 int b = BB_TO_BLOCK (bb);
2353 get_block_head_tail (b, &head, &tail);
2355 if (no_real_insns_p (head, tail))
2356 continue;
2358 current_sched_info->prev_head = PREV_INSN (head);
2359 current_sched_info->next_tail = NEXT_INSN (tail);
2361 if (write_symbols != NO_DEBUG)
2363 save_line_notes (b, head, tail);
2364 rm_line_notes (head, tail);
2367 /* rm_other_notes only removes notes which are _inside_ the
2368 block---that is, it won't remove notes before the first real insn
2369 or after the last real insn of the block. So if the first insn
2370 has a REG_SAVE_NOTE which would otherwise be emitted before the
2371 insn, it is redundant with the note before the start of the
2372 block, and so we have to take it out. */
2373 if (INSN_P (head))
2375 rtx note;
2377 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2378 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2379 remove_note (head, note);
2382 /* Remove remaining note insns from the block, save them in
2383 note_list. These notes are restored at the end of
2384 schedule_block (). */
2385 rm_other_notes (head, tail);
2387 target_bb = bb;
2389 current_sched_info->queue_must_finish_empty
2390 = current_nr_blocks > 1 && !flag_schedule_interblock;
2392 schedule_block (b, rgn_n_insns);
2393 sched_rgn_n_insns += sched_n_insns;
2395 /* Update target block boundaries. */
2396 if (head == BB_HEAD (BASIC_BLOCK (b)))
2397 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2398 if (tail == BB_END (BASIC_BLOCK (b)))
2399 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2401 /* Clean up. */
2402 if (current_nr_blocks > 1)
2404 free (candidate_table);
2405 free (bblst_table);
2406 free (edgelst_table);
2410 /* Sanity check: verify that all region insns were scheduled. */
2411 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2413 /* Restore line notes. */
2414 if (write_symbols != NO_DEBUG)
2416 for (bb = 0; bb < current_nr_blocks; bb++)
2418 rtx head, tail;
2419 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2420 restore_line_notes (head, tail);
2424 /* Done with this region. */
2425 free_pending_lists ();
2427 finish_deps_global ();
2429 free (bb_deps);
2431 if (current_nr_blocks > 1)
2433 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2434 FOR_EACH_BB (block)
2436 if (CONTAINING_RGN (block->index) != rgn)
2437 continue;
2438 FOR_EACH_EDGE (e, ei, block->succs)
2439 e->aux = NULL;
2442 free (prob);
2443 sbitmap_vector_free (dom);
2444 sbitmap_vector_free (pot_split);
2445 sbitmap_vector_free (ancestor_edges);
2446 free (rgn_edges);
2450 /* Indexed by region, holds the number of death notes found in that region.
2451 Used for consistency checks. */
2452 static int *deaths_in_region;
2454 /* Initialize data structures for region scheduling. */
2456 static void
2457 init_regions (void)
2459 sbitmap blocks;
2460 int rgn;
2462 nr_regions = 0;
2463 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2464 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2465 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2466 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2468 /* Compute regions for scheduling. */
2469 if (reload_completed
2470 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
2471 || !flag_schedule_interblock
2472 || is_cfg_nonregular ())
2474 find_single_block_region ();
2476 else
2478 /* Compute the dominators and post dominators. */
2479 calculate_dominance_info (CDI_DOMINATORS);
2481 /* Find regions. */
2482 find_rgns ();
2484 if (sched_verbose >= 3)
2485 debug_regions ();
2487 /* For now. This will move as more and more of haifa is converted
2488 to using the cfg code in flow.c. */
2489 free_dominance_info (CDI_DOMINATORS);
2493 if (CHECK_DEAD_NOTES)
2495 blocks = sbitmap_alloc (last_basic_block);
2496 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2497 /* Remove all death notes from the subroutine. */
2498 for (rgn = 0; rgn < nr_regions; rgn++)
2500 int b;
2502 sbitmap_zero (blocks);
2503 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2504 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2506 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2508 sbitmap_free (blocks);
2510 else
2511 count_or_remove_death_notes (NULL, 1);
2514 /* The one entry point in this file. DUMP_FILE is the dump file for
2515 this pass. */
2517 void
2518 schedule_insns (FILE *dump_file)
2520 sbitmap large_region_blocks, blocks;
2521 int rgn;
2522 int any_large_regions;
2523 basic_block bb;
2525 /* Taking care of this degenerate case makes the rest of
2526 this code simpler. */
2527 if (n_basic_blocks == NUM_FIXED_BLOCKS)
2528 return;
2530 nr_inter = 0;
2531 nr_spec = 0;
2532 sched_init (dump_file);
2534 init_regions ();
2536 current_sched_info = &region_sched_info;
2537 /* Schedule every region in the subroutine. */
2538 for (rgn = 0; rgn < nr_regions; rgn++)
2539 schedule_region (rgn);
2541 /* Update life analysis for the subroutine. Do single block regions
2542 first so that we can verify that live_at_start didn't change. Then
2543 do all other blocks. */
2544 /* ??? There is an outside possibility that update_life_info, or more
2545 to the point propagate_block, could get called with nonzero flags
2546 more than once for one basic block. This would be kinda bad if it
2547 were to happen, since REG_INFO would be accumulated twice for the
2548 block, and we'd have twice the REG_DEAD notes.
2550 I'm fairly certain that this _shouldn't_ happen, since I don't think
2551 that live_at_start should change at region heads. Not sure what the
2552 best way to test for this kind of thing... */
2554 allocate_reg_life_data ();
2555 compute_bb_for_insn ();
2557 any_large_regions = 0;
2558 large_region_blocks = sbitmap_alloc (last_basic_block);
2559 sbitmap_zero (large_region_blocks);
2560 FOR_EACH_BB (bb)
2561 SET_BIT (large_region_blocks, bb->index);
2563 blocks = sbitmap_alloc (last_basic_block);
2564 sbitmap_zero (blocks);
2566 /* Update life information. For regions consisting of multiple blocks
2567 we've possibly done interblock scheduling that affects global liveness.
2568 For regions consisting of single blocks we need to do only local
2569 liveness. */
2570 for (rgn = 0; rgn < nr_regions; rgn++)
2571 if (RGN_NR_BLOCKS (rgn) > 1)
2572 any_large_regions = 1;
2573 else
2575 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2576 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2579 /* Don't update reg info after reload, since that affects
2580 regs_ever_live, which should not change after reload. */
2581 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2582 (reload_completed ? PROP_DEATH_NOTES
2583 : PROP_DEATH_NOTES | PROP_REG_INFO));
2584 if (any_large_regions)
2586 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2587 PROP_DEATH_NOTES | PROP_REG_INFO);
2590 if (CHECK_DEAD_NOTES)
2592 /* Verify the counts of basic block notes in single the basic block
2593 regions. */
2594 for (rgn = 0; rgn < nr_regions; rgn++)
2595 if (RGN_NR_BLOCKS (rgn) == 1)
2597 sbitmap_zero (blocks);
2598 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2600 gcc_assert (deaths_in_region[rgn]
2601 == count_or_remove_death_notes (blocks, 0));
2603 free (deaths_in_region);
2606 /* Reposition the prologue and epilogue notes in case we moved the
2607 prologue/epilogue insns. */
2608 if (reload_completed)
2609 reposition_prologue_and_epilogue_notes (get_insns ());
2611 /* Delete redundant line notes. */
2612 if (write_symbols != NO_DEBUG)
2613 rm_redundant_line_notes ();
2615 if (sched_verbose)
2617 if (reload_completed == 0 && flag_schedule_interblock)
2619 fprintf (sched_dump,
2620 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2621 nr_inter, nr_spec);
2623 else
2624 gcc_assert (nr_inter <= 0);
2625 fprintf (sched_dump, "\n\n");
2628 /* Clean up. */
2629 free (rgn_table);
2630 free (rgn_bb_table);
2631 free (block_to_bb);
2632 free (containing_rgn);
2634 sched_finish ();
2636 sbitmap_free (blocks);
2637 sbitmap_free (large_region_blocks);
2639 #endif
2641 static bool
2642 gate_handle_sched (void)
2644 #ifdef INSN_SCHEDULING
2645 return flag_schedule_insns;
2646 #else
2647 return 0;
2648 #endif
2651 /* Run instruction scheduler. */
2652 static void
2653 rest_of_handle_sched (void)
2655 #ifdef INSN_SCHEDULING
2656 /* Do control and data sched analysis,
2657 and write some of the results to dump file. */
2659 schedule_insns (dump_file);
2660 #endif
2663 static bool
2664 gate_handle_sched2 (void)
2666 #ifdef INSN_SCHEDULING
2667 return optimize > 0 && flag_schedule_insns_after_reload;
2668 #else
2669 return 0;
2670 #endif
2673 /* Run second scheduling pass after reload. */
2674 static void
2675 rest_of_handle_sched2 (void)
2677 #ifdef INSN_SCHEDULING
2678 /* Do control and data sched analysis again,
2679 and write some more of the results to dump file. */
2681 split_all_insns (1);
2683 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
2685 schedule_ebbs (dump_file);
2686 /* No liveness updating code yet, but it should be easy to do.
2687 reg-stack recomputes the liveness when needed for now. */
2688 count_or_remove_death_notes (NULL, 1);
2689 cleanup_cfg (CLEANUP_EXPENSIVE);
2691 else
2692 schedule_insns (dump_file);
2693 #endif
2696 struct tree_opt_pass pass_sched =
2698 "sched1", /* name */
2699 gate_handle_sched, /* gate */
2700 rest_of_handle_sched, /* execute */
2701 NULL, /* sub */
2702 NULL, /* next */
2703 0, /* static_pass_number */
2704 TV_SCHED, /* tv_id */
2705 0, /* properties_required */
2706 0, /* properties_provided */
2707 0, /* properties_destroyed */
2708 0, /* todo_flags_start */
2709 TODO_dump_func |
2710 TODO_ggc_collect, /* todo_flags_finish */
2711 'S' /* letter */
2714 struct tree_opt_pass pass_sched2 =
2716 "sched2", /* name */
2717 gate_handle_sched2, /* gate */
2718 rest_of_handle_sched2, /* execute */
2719 NULL, /* sub */
2720 NULL, /* next */
2721 0, /* static_pass_number */
2722 TV_SCHED2, /* tv_id */
2723 0, /* properties_required */
2724 0, /* properties_provided */
2725 0, /* properties_destroyed */
2726 0, /* todo_flags_start */
2727 TODO_dump_func |
2728 TODO_ggc_collect, /* todo_flags_finish */
2729 'R' /* letter */