2006-02-17 Grigory Zagorodnev <grigory_zagorodnev@linux.intel.com>
[official-gcc.git] / gcc / sched-rgn.c
blob3109786576f03298612861e8f9cca0e8dd26dfa7
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68 #include "timevar.h"
69 #include "tree-pass.h"
71 /* Define when we want to do count REG_DEAD notes before and after scheduling
72 for sanity checking. We can't do that when conditional execution is used,
73 as REG_DEAD exist only for unconditional deaths. */
75 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
76 #define CHECK_DEAD_NOTES 1
77 #else
78 #define CHECK_DEAD_NOTES 0
79 #endif
82 #ifdef INSN_SCHEDULING
83 /* Some accessor macros for h_i_d members only used within this file. */
84 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
85 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
86 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
88 /* nr_inter/spec counts interblock/speculative motion for the function. */
89 static int nr_inter, nr_spec;
91 static int is_cfg_nonregular (void);
92 static bool sched_is_disabled_for_current_region_p (void);
94 /* A region is the main entity for interblock scheduling: insns
95 are allowed to move between blocks in the same region, along
96 control flow graph edges, in the 'up' direction. */
97 typedef struct
99 int rgn_nr_blocks; /* Number of blocks in region. */
100 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
102 region;
104 /* Number of regions in the procedure. */
105 static int nr_regions;
107 /* Table of region descriptions. */
108 static region *rgn_table;
110 /* Array of lists of regions' blocks. */
111 static int *rgn_bb_table;
113 /* Topological order of blocks in the region (if b2 is reachable from
114 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
115 always referred to by either block or b, while its topological
116 order name (in the region) is referred to by bb. */
117 static int *block_to_bb;
119 /* The number of the region containing a block. */
120 static int *containing_rgn;
122 /* The minimum probability of reaching a source block so that it will be
123 considered for speculative scheduling. */
124 static int min_spec_prob;
126 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
127 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
128 #define BLOCK_TO_BB(block) (block_to_bb[block])
129 #define CONTAINING_RGN(block) (containing_rgn[block])
131 void debug_regions (void);
132 static void find_single_block_region (void);
133 static void find_rgns (void);
134 static bool too_large (int, int *, int *);
136 extern void debug_live (int, int);
138 /* Blocks of the current region being scheduled. */
139 static int current_nr_blocks;
140 static int current_blocks;
142 /* The mapping from bb to block. */
143 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
145 /* Target info declarations.
147 The block currently being scheduled is referred to as the "target" block,
148 while other blocks in the region from which insns can be moved to the
149 target are called "source" blocks. The candidate structure holds info
150 about such sources: are they valid? Speculative? Etc. */
151 typedef struct
153 basic_block *first_member;
154 int nr_members;
156 bblst;
158 typedef struct
160 char is_valid;
161 char is_speculative;
162 int src_prob;
163 bblst split_bbs;
164 bblst update_bbs;
166 candidate;
168 static candidate *candidate_table;
170 /* A speculative motion requires checking live information on the path
171 from 'source' to 'target'. The split blocks are those to be checked.
172 After a speculative motion, live information should be modified in
173 the 'update' blocks.
175 Lists of split and update blocks for each candidate of the current
176 target are in array bblst_table. */
177 static basic_block *bblst_table;
178 static int bblst_size, bblst_last;
180 #define IS_VALID(src) ( candidate_table[src].is_valid )
181 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
182 #define SRC_PROB(src) ( candidate_table[src].src_prob )
184 /* The bb being currently scheduled. */
185 static int target_bb;
187 /* List of edges. */
188 typedef struct
190 edge *first_member;
191 int nr_members;
193 edgelst;
195 static edge *edgelst_table;
196 static int edgelst_last;
198 static void extract_edgelst (sbitmap, edgelst *);
201 /* Target info functions. */
202 static void split_edges (int, int, edgelst *);
203 static void compute_trg_info (int);
204 void debug_candidate (int);
205 void debug_candidates (int);
207 /* Dominators array: dom[i] contains the sbitmap of dominators of
208 bb i in the region. */
209 static sbitmap *dom;
211 /* bb 0 is the only region entry. */
212 #define IS_RGN_ENTRY(bb) (!bb)
214 /* Is bb_src dominated by bb_trg. */
215 #define IS_DOMINATED(bb_src, bb_trg) \
216 ( TEST_BIT (dom[bb_src], bb_trg) )
218 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
219 the probability of bb i relative to the region entry. */
220 static int *prob;
222 /* Bit-set of edges, where bit i stands for edge i. */
223 typedef sbitmap edgeset;
225 /* Number of edges in the region. */
226 static int rgn_nr_edges;
228 /* Array of size rgn_nr_edges. */
229 static edge *rgn_edges;
231 /* Mapping from each edge in the graph to its number in the rgn. */
232 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
233 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
235 /* The split edges of a source bb is different for each target
236 bb. In order to compute this efficiently, the 'potential-split edges'
237 are computed for each bb prior to scheduling a region. This is actually
238 the split edges of each bb relative to the region entry.
240 pot_split[bb] is the set of potential split edges of bb. */
241 static edgeset *pot_split;
243 /* For every bb, a set of its ancestor edges. */
244 static edgeset *ancestor_edges;
246 static void compute_dom_prob_ps (int);
248 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
249 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
250 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
252 /* Speculative scheduling functions. */
253 static int check_live_1 (int, rtx);
254 static void update_live_1 (int, rtx);
255 static int check_live (rtx, int);
256 static void update_live (rtx, int);
257 static void set_spec_fed (rtx);
258 static int is_pfree (rtx, int, int);
259 static int find_conditional_protection (rtx, int);
260 static int is_conditionally_protected (rtx, int, int);
261 static int is_prisky (rtx, int, int);
262 static int is_exception_free (rtx, int, int);
264 static bool sets_likely_spilled (rtx);
265 static void sets_likely_spilled_1 (rtx, rtx, void *);
266 static void add_branch_dependences (rtx, rtx);
267 static void compute_block_backward_dependences (int);
268 void debug_dependencies (void);
270 static void init_regions (void);
271 static void schedule_region (int);
272 static rtx concat_INSN_LIST (rtx, rtx);
273 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
274 static void propagate_deps (int, struct deps *);
275 static void free_pending_lists (void);
277 /* Functions for construction of the control flow graph. */
279 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
281 We decide not to build the control flow graph if there is possibly more
282 than one entry to the function, if computed branches exist, if we
283 have nonlocal gotos, or if we have an unreachable loop. */
285 static int
286 is_cfg_nonregular (void)
288 basic_block b;
289 rtx insn;
291 /* If we have a label that could be the target of a nonlocal goto, then
292 the cfg is not well structured. */
293 if (nonlocal_goto_handler_labels)
294 return 1;
296 /* If we have any forced labels, then the cfg is not well structured. */
297 if (forced_labels)
298 return 1;
300 /* If we have exception handlers, then we consider the cfg not well
301 structured. ?!? We should be able to handle this now that flow.c
302 computes an accurate cfg for EH. */
303 if (current_function_has_exception_handlers ())
304 return 1;
306 /* If we have non-jumping insns which refer to labels, then we consider
307 the cfg not well structured. */
308 FOR_EACH_BB (b)
309 FOR_BB_INSNS (b, insn)
311 /* Check for labels referred by non-jump insns. */
312 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
314 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
315 if (note
316 && ! (JUMP_P (NEXT_INSN (insn))
317 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
318 XEXP (note, 0))))
319 return 1;
321 /* If this function has a computed jump, then we consider the cfg
322 not well structured. */
323 else if (JUMP_P (insn) && computed_jump_p (insn))
324 return 1;
327 /* Unreachable loops with more than one basic block are detected
328 during the DFS traversal in find_rgns.
330 Unreachable loops with a single block are detected here. This
331 test is redundant with the one in find_rgns, but it's much
332 cheaper to go ahead and catch the trivial case here. */
333 FOR_EACH_BB (b)
335 if (EDGE_COUNT (b->preds) == 0
336 || (single_pred_p (b)
337 && single_pred (b) == b))
338 return 1;
341 /* All the tests passed. Consider the cfg well structured. */
342 return 0;
345 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
347 static void
348 extract_edgelst (sbitmap set, edgelst *el)
350 unsigned int i = 0;
351 sbitmap_iterator sbi;
353 /* edgelst table space is reused in each call to extract_edgelst. */
354 edgelst_last = 0;
356 el->first_member = &edgelst_table[edgelst_last];
357 el->nr_members = 0;
359 /* Iterate over each word in the bitset. */
360 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
362 edgelst_table[edgelst_last++] = rgn_edges[i];
363 el->nr_members++;
367 /* Functions for the construction of regions. */
369 /* Print the regions, for debugging purposes. Callable from debugger. */
371 void
372 debug_regions (void)
374 int rgn, bb;
376 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
377 for (rgn = 0; rgn < nr_regions; rgn++)
379 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
380 rgn_table[rgn].rgn_nr_blocks);
381 fprintf (sched_dump, ";;\tbb/block: ");
383 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
385 current_blocks = RGN_BLOCKS (rgn);
387 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
388 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
391 fprintf (sched_dump, "\n\n");
395 /* Build a single block region for each basic block in the function.
396 This allows for using the same code for interblock and basic block
397 scheduling. */
399 static void
400 find_single_block_region (void)
402 basic_block bb;
404 nr_regions = 0;
406 FOR_EACH_BB (bb)
408 rgn_bb_table[nr_regions] = bb->index;
409 RGN_NR_BLOCKS (nr_regions) = 1;
410 RGN_BLOCKS (nr_regions) = nr_regions;
411 CONTAINING_RGN (bb->index) = nr_regions;
412 BLOCK_TO_BB (bb->index) = 0;
413 nr_regions++;
417 /* Update number of blocks and the estimate for number of insns
418 in the region. Return true if the region is "too large" for interblock
419 scheduling (compile time considerations). */
421 static bool
422 too_large (int block, int *num_bbs, int *num_insns)
424 (*num_bbs)++;
425 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
426 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
428 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
429 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
432 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
433 is still an inner loop. Put in max_hdr[blk] the header of the most inner
434 loop containing blk. */
435 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
437 if (max_hdr[blk] == -1) \
438 max_hdr[blk] = hdr; \
439 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
440 RESET_BIT (inner, hdr); \
441 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
443 RESET_BIT (inner,max_hdr[blk]); \
444 max_hdr[blk] = hdr; \
448 /* Find regions for interblock scheduling.
450 A region for scheduling can be:
452 * A loop-free procedure, or
454 * A reducible inner loop, or
456 * A basic block not contained in any other region.
458 ?!? In theory we could build other regions based on extended basic
459 blocks or reverse extended basic blocks. Is it worth the trouble?
461 Loop blocks that form a region are put into the region's block list
462 in topological order.
464 This procedure stores its results into the following global (ick) variables
466 * rgn_nr
467 * rgn_table
468 * rgn_bb_table
469 * block_to_bb
470 * containing region
472 We use dominator relationships to avoid making regions out of non-reducible
473 loops.
475 This procedure needs to be converted to work on pred/succ lists instead
476 of edge tables. That would simplify it somewhat. */
478 static void
479 find_rgns (void)
481 int *max_hdr, *dfs_nr, *degree;
482 char no_loops = 1;
483 int node, child, loop_head, i, head, tail;
484 int count = 0, sp, idx = 0;
485 edge_iterator current_edge;
486 edge_iterator *stack;
487 int num_bbs, num_insns, unreachable;
488 int too_large_failure;
489 basic_block bb;
491 /* Note if a block is a natural loop header. */
492 sbitmap header;
494 /* Note if a block is a natural inner loop header. */
495 sbitmap inner;
497 /* Note if a block is in the block queue. */
498 sbitmap in_queue;
500 /* Note if a block is in the block queue. */
501 sbitmap in_stack;
503 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
504 and a mapping from block to its loop header (if the block is contained
505 in a loop, else -1).
507 Store results in HEADER, INNER, and MAX_HDR respectively, these will
508 be used as inputs to the second traversal.
510 STACK, SP and DFS_NR are only used during the first traversal. */
512 /* Allocate and initialize variables for the first traversal. */
513 max_hdr = XNEWVEC (int, last_basic_block);
514 dfs_nr = XCNEWVEC (int, last_basic_block);
515 stack = XNEWVEC (edge_iterator, n_edges);
517 inner = sbitmap_alloc (last_basic_block);
518 sbitmap_ones (inner);
520 header = sbitmap_alloc (last_basic_block);
521 sbitmap_zero (header);
523 in_queue = sbitmap_alloc (last_basic_block);
524 sbitmap_zero (in_queue);
526 in_stack = sbitmap_alloc (last_basic_block);
527 sbitmap_zero (in_stack);
529 for (i = 0; i < last_basic_block; i++)
530 max_hdr[i] = -1;
532 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
533 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
535 /* DFS traversal to find inner loops in the cfg. */
537 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
538 sp = -1;
540 while (1)
542 if (EDGE_PASSED (current_edge))
544 /* We have reached a leaf node or a node that was already
545 processed. Pop edges off the stack until we find
546 an edge that has not yet been processed. */
547 while (sp >= 0 && EDGE_PASSED (current_edge))
549 /* Pop entry off the stack. */
550 current_edge = stack[sp--];
551 node = ei_edge (current_edge)->src->index;
552 gcc_assert (node != ENTRY_BLOCK);
553 child = ei_edge (current_edge)->dest->index;
554 gcc_assert (child != EXIT_BLOCK);
555 RESET_BIT (in_stack, child);
556 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
557 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
558 ei_next (&current_edge);
561 /* See if have finished the DFS tree traversal. */
562 if (sp < 0 && EDGE_PASSED (current_edge))
563 break;
565 /* Nope, continue the traversal with the popped node. */
566 continue;
569 /* Process a node. */
570 node = ei_edge (current_edge)->src->index;
571 gcc_assert (node != ENTRY_BLOCK);
572 SET_BIT (in_stack, node);
573 dfs_nr[node] = ++count;
575 /* We don't traverse to the exit block. */
576 child = ei_edge (current_edge)->dest->index;
577 if (child == EXIT_BLOCK)
579 SET_EDGE_PASSED (current_edge);
580 ei_next (&current_edge);
581 continue;
584 /* If the successor is in the stack, then we've found a loop.
585 Mark the loop, if it is not a natural loop, then it will
586 be rejected during the second traversal. */
587 if (TEST_BIT (in_stack, child))
589 no_loops = 0;
590 SET_BIT (header, child);
591 UPDATE_LOOP_RELATIONS (node, child);
592 SET_EDGE_PASSED (current_edge);
593 ei_next (&current_edge);
594 continue;
597 /* If the child was already visited, then there is no need to visit
598 it again. Just update the loop relationships and restart
599 with a new edge. */
600 if (dfs_nr[child])
602 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
603 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
604 SET_EDGE_PASSED (current_edge);
605 ei_next (&current_edge);
606 continue;
609 /* Push an entry on the stack and continue DFS traversal. */
610 stack[++sp] = current_edge;
611 SET_EDGE_PASSED (current_edge);
612 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
615 /* Reset ->aux field used by EDGE_PASSED. */
616 FOR_ALL_BB (bb)
618 edge_iterator ei;
619 edge e;
620 FOR_EACH_EDGE (e, ei, bb->succs)
621 e->aux = NULL;
625 /* Another check for unreachable blocks. The earlier test in
626 is_cfg_nonregular only finds unreachable blocks that do not
627 form a loop.
629 The DFS traversal will mark every block that is reachable from
630 the entry node by placing a nonzero value in dfs_nr. Thus if
631 dfs_nr is zero for any block, then it must be unreachable. */
632 unreachable = 0;
633 FOR_EACH_BB (bb)
634 if (dfs_nr[bb->index] == 0)
636 unreachable = 1;
637 break;
640 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
641 to hold degree counts. */
642 degree = dfs_nr;
644 FOR_EACH_BB (bb)
645 degree[bb->index] = EDGE_COUNT (bb->preds);
647 /* Do not perform region scheduling if there are any unreachable
648 blocks. */
649 if (!unreachable)
651 int *queue;
653 if (no_loops)
654 SET_BIT (header, 0);
656 /* Second traversal:find reducible inner loops and topologically sort
657 block of each region. */
659 queue = XNEWVEC (int, n_basic_blocks);
661 /* Find blocks which are inner loop headers. We still have non-reducible
662 loops to consider at this point. */
663 FOR_EACH_BB (bb)
665 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
667 edge e;
668 edge_iterator ei;
669 basic_block jbb;
671 /* Now check that the loop is reducible. We do this separate
672 from finding inner loops so that we do not find a reducible
673 loop which contains an inner non-reducible loop.
675 A simple way to find reducible/natural loops is to verify
676 that each block in the loop is dominated by the loop
677 header.
679 If there exists a block that is not dominated by the loop
680 header, then the block is reachable from outside the loop
681 and thus the loop is not a natural loop. */
682 FOR_EACH_BB (jbb)
684 /* First identify blocks in the loop, except for the loop
685 entry block. */
686 if (bb->index == max_hdr[jbb->index] && bb != jbb)
688 /* Now verify that the block is dominated by the loop
689 header. */
690 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
691 break;
695 /* If we exited the loop early, then I is the header of
696 a non-reducible loop and we should quit processing it
697 now. */
698 if (jbb != EXIT_BLOCK_PTR)
699 continue;
701 /* I is a header of an inner loop, or block 0 in a subroutine
702 with no loops at all. */
703 head = tail = -1;
704 too_large_failure = 0;
705 loop_head = max_hdr[bb->index];
707 /* Decrease degree of all I's successors for topological
708 ordering. */
709 FOR_EACH_EDGE (e, ei, bb->succs)
710 if (e->dest != EXIT_BLOCK_PTR)
711 --degree[e->dest->index];
713 /* Estimate # insns, and count # blocks in the region. */
714 num_bbs = 1;
715 num_insns = (INSN_LUID (BB_END (bb))
716 - INSN_LUID (BB_HEAD (bb)));
718 /* Find all loop latches (blocks with back edges to the loop
719 header) or all the leaf blocks in the cfg has no loops.
721 Place those blocks into the queue. */
722 if (no_loops)
724 FOR_EACH_BB (jbb)
725 /* Leaf nodes have only a single successor which must
726 be EXIT_BLOCK. */
727 if (single_succ_p (jbb)
728 && single_succ (jbb) == EXIT_BLOCK_PTR)
730 queue[++tail] = jbb->index;
731 SET_BIT (in_queue, jbb->index);
733 if (too_large (jbb->index, &num_bbs, &num_insns))
735 too_large_failure = 1;
736 break;
740 else
742 edge e;
744 FOR_EACH_EDGE (e, ei, bb->preds)
746 if (e->src == ENTRY_BLOCK_PTR)
747 continue;
749 node = e->src->index;
751 if (max_hdr[node] == loop_head && node != bb->index)
753 /* This is a loop latch. */
754 queue[++tail] = node;
755 SET_BIT (in_queue, node);
757 if (too_large (node, &num_bbs, &num_insns))
759 too_large_failure = 1;
760 break;
766 /* Now add all the blocks in the loop to the queue.
768 We know the loop is a natural loop; however the algorithm
769 above will not always mark certain blocks as being in the
770 loop. Consider:
771 node children
772 a b,c
774 c a,d
777 The algorithm in the DFS traversal may not mark B & D as part
778 of the loop (i.e. they will not have max_hdr set to A).
780 We know they can not be loop latches (else they would have
781 had max_hdr set since they'd have a backedge to a dominator
782 block). So we don't need them on the initial queue.
784 We know they are part of the loop because they are dominated
785 by the loop header and can be reached by a backwards walk of
786 the edges starting with nodes on the initial queue.
788 It is safe and desirable to include those nodes in the
789 loop/scheduling region. To do so we would need to decrease
790 the degree of a node if it is the target of a backedge
791 within the loop itself as the node is placed in the queue.
793 We do not do this because I'm not sure that the actual
794 scheduling code will properly handle this case. ?!? */
796 while (head < tail && !too_large_failure)
798 edge e;
799 child = queue[++head];
801 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
803 node = e->src->index;
805 /* See discussion above about nodes not marked as in
806 this loop during the initial DFS traversal. */
807 if (e->src == ENTRY_BLOCK_PTR
808 || max_hdr[node] != loop_head)
810 tail = -1;
811 break;
813 else if (!TEST_BIT (in_queue, node) && node != bb->index)
815 queue[++tail] = node;
816 SET_BIT (in_queue, node);
818 if (too_large (node, &num_bbs, &num_insns))
820 too_large_failure = 1;
821 break;
827 if (tail >= 0 && !too_large_failure)
829 /* Place the loop header into list of region blocks. */
830 degree[bb->index] = -1;
831 rgn_bb_table[idx] = bb->index;
832 RGN_NR_BLOCKS (nr_regions) = num_bbs;
833 RGN_BLOCKS (nr_regions) = idx++;
834 CONTAINING_RGN (bb->index) = nr_regions;
835 BLOCK_TO_BB (bb->index) = count = 0;
837 /* Remove blocks from queue[] when their in degree
838 becomes zero. Repeat until no blocks are left on the
839 list. This produces a topological list of blocks in
840 the region. */
841 while (tail >= 0)
843 if (head < 0)
844 head = tail;
845 child = queue[head];
846 if (degree[child] == 0)
848 edge e;
850 degree[child] = -1;
851 rgn_bb_table[idx++] = child;
852 BLOCK_TO_BB (child) = ++count;
853 CONTAINING_RGN (child) = nr_regions;
854 queue[head] = queue[tail--];
856 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
857 if (e->dest != EXIT_BLOCK_PTR)
858 --degree[e->dest->index];
860 else
861 --head;
863 ++nr_regions;
867 free (queue);
870 /* Any block that did not end up in a region is placed into a region
871 by itself. */
872 FOR_EACH_BB (bb)
873 if (degree[bb->index] >= 0)
875 rgn_bb_table[idx] = bb->index;
876 RGN_NR_BLOCKS (nr_regions) = 1;
877 RGN_BLOCKS (nr_regions) = idx++;
878 CONTAINING_RGN (bb->index) = nr_regions++;
879 BLOCK_TO_BB (bb->index) = 0;
882 free (max_hdr);
883 free (dfs_nr);
884 free (stack);
885 sbitmap_free (header);
886 sbitmap_free (inner);
887 sbitmap_free (in_queue);
888 sbitmap_free (in_stack);
891 /* Functions for regions scheduling information. */
893 /* Compute dominators, probability, and potential-split-edges of bb.
894 Assume that these values were already computed for bb's predecessors. */
896 static void
897 compute_dom_prob_ps (int bb)
899 edge_iterator in_ei;
900 edge in_edge;
902 if (IS_RGN_ENTRY (bb))
904 SET_BIT (dom[bb], 0);
905 prob[bb] = REG_BR_PROB_BASE;
906 return;
909 prob[bb] = 0;
911 /* Initialize dom[bb] to '111..1'. */
912 sbitmap_ones (dom[bb]);
914 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
916 int pred_bb;
917 edge out_edge;
918 edge_iterator out_ei;
920 if (in_edge->src == ENTRY_BLOCK_PTR)
921 continue;
923 pred_bb = BLOCK_TO_BB (in_edge->src->index);
924 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
925 sbitmap_a_or_b (ancestor_edges[bb],
926 ancestor_edges[bb], ancestor_edges[pred_bb]);
928 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
930 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
932 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
933 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
935 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
938 SET_BIT (dom[bb], bb);
939 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
941 if (sched_verbose >= 2)
942 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
943 (100 * prob[bb]) / REG_BR_PROB_BASE);
946 /* Functions for target info. */
948 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
949 Note that bb_trg dominates bb_src. */
951 static void
952 split_edges (int bb_src, int bb_trg, edgelst *bl)
954 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
955 sbitmap_copy (src, pot_split[bb_src]);
957 sbitmap_difference (src, src, pot_split[bb_trg]);
958 extract_edgelst (src, bl);
959 sbitmap_free (src);
962 /* Find the valid candidate-source-blocks for the target block TRG, compute
963 their probability, and check if they are speculative or not.
964 For speculative sources, compute their update-blocks and split-blocks. */
966 static void
967 compute_trg_info (int trg)
969 candidate *sp;
970 edgelst el;
971 int i, j, k, update_idx;
972 basic_block block;
973 sbitmap visited;
974 edge_iterator ei;
975 edge e;
977 /* Define some of the fields for the target bb as well. */
978 sp = candidate_table + trg;
979 sp->is_valid = 1;
980 sp->is_speculative = 0;
981 sp->src_prob = REG_BR_PROB_BASE;
983 visited = sbitmap_alloc (last_basic_block);
985 for (i = trg + 1; i < current_nr_blocks; i++)
987 sp = candidate_table + i;
989 sp->is_valid = IS_DOMINATED (i, trg);
990 if (sp->is_valid)
992 int tf = prob[trg], cf = prob[i];
994 /* In CFGs with low probability edges TF can possibly be zero. */
995 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
996 sp->is_valid = (sp->src_prob >= min_spec_prob);
999 if (sp->is_valid)
1001 split_edges (i, trg, &el);
1002 sp->is_speculative = (el.nr_members) ? 1 : 0;
1003 if (sp->is_speculative && !flag_schedule_speculative)
1004 sp->is_valid = 0;
1007 if (sp->is_valid)
1009 /* Compute split blocks and store them in bblst_table.
1010 The TO block of every split edge is a split block. */
1011 sp->split_bbs.first_member = &bblst_table[bblst_last];
1012 sp->split_bbs.nr_members = el.nr_members;
1013 for (j = 0; j < el.nr_members; bblst_last++, j++)
1014 bblst_table[bblst_last] = el.first_member[j]->dest;
1015 sp->update_bbs.first_member = &bblst_table[bblst_last];
1017 /* Compute update blocks and store them in bblst_table.
1018 For every split edge, look at the FROM block, and check
1019 all out edges. For each out edge that is not a split edge,
1020 add the TO block to the update block list. This list can end
1021 up with a lot of duplicates. We need to weed them out to avoid
1022 overrunning the end of the bblst_table. */
1024 update_idx = 0;
1025 sbitmap_zero (visited);
1026 for (j = 0; j < el.nr_members; j++)
1028 block = el.first_member[j]->src;
1029 FOR_EACH_EDGE (e, ei, block->succs)
1031 if (!TEST_BIT (visited, e->dest->index))
1033 for (k = 0; k < el.nr_members; k++)
1034 if (e == el.first_member[k])
1035 break;
1037 if (k >= el.nr_members)
1039 bblst_table[bblst_last++] = e->dest;
1040 SET_BIT (visited, e->dest->index);
1041 update_idx++;
1046 sp->update_bbs.nr_members = update_idx;
1048 /* Make sure we didn't overrun the end of bblst_table. */
1049 gcc_assert (bblst_last <= bblst_size);
1051 else
1053 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1055 sp->is_speculative = 0;
1056 sp->src_prob = 0;
1060 sbitmap_free (visited);
1063 /* Print candidates info, for debugging purposes. Callable from debugger. */
1065 void
1066 debug_candidate (int i)
1068 if (!candidate_table[i].is_valid)
1069 return;
1071 if (candidate_table[i].is_speculative)
1073 int j;
1074 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1076 fprintf (sched_dump, "split path: ");
1077 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1079 int b = candidate_table[i].split_bbs.first_member[j]->index;
1081 fprintf (sched_dump, " %d ", b);
1083 fprintf (sched_dump, "\n");
1085 fprintf (sched_dump, "update path: ");
1086 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1088 int b = candidate_table[i].update_bbs.first_member[j]->index;
1090 fprintf (sched_dump, " %d ", b);
1092 fprintf (sched_dump, "\n");
1094 else
1096 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1100 /* Print candidates info, for debugging purposes. Callable from debugger. */
1102 void
1103 debug_candidates (int trg)
1105 int i;
1107 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1108 BB_TO_BLOCK (trg), trg);
1109 for (i = trg + 1; i < current_nr_blocks; i++)
1110 debug_candidate (i);
1113 /* Functions for speculative scheduling. */
1115 /* Return 0 if x is a set of a register alive in the beginning of one
1116 of the split-blocks of src, otherwise return 1. */
1118 static int
1119 check_live_1 (int src, rtx x)
1121 int i;
1122 int regno;
1123 rtx reg = SET_DEST (x);
1125 if (reg == 0)
1126 return 1;
1128 while (GET_CODE (reg) == SUBREG
1129 || GET_CODE (reg) == ZERO_EXTRACT
1130 || GET_CODE (reg) == STRICT_LOW_PART)
1131 reg = XEXP (reg, 0);
1133 if (GET_CODE (reg) == PARALLEL)
1135 int i;
1137 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1138 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1139 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1140 return 1;
1142 return 0;
1145 if (!REG_P (reg))
1146 return 1;
1148 regno = REGNO (reg);
1150 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1152 /* Global registers are assumed live. */
1153 return 0;
1155 else
1157 if (regno < FIRST_PSEUDO_REGISTER)
1159 /* Check for hard registers. */
1160 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1161 while (--j >= 0)
1163 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1165 basic_block b = candidate_table[src].split_bbs.first_member[i];
1167 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start,
1168 regno + j))
1170 return 0;
1175 else
1177 /* Check for pseudo registers. */
1178 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1180 basic_block b = candidate_table[src].split_bbs.first_member[i];
1182 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start, regno))
1184 return 0;
1190 return 1;
1193 /* If x is a set of a register R, mark that R is alive in the beginning
1194 of every update-block of src. */
1196 static void
1197 update_live_1 (int src, rtx x)
1199 int i;
1200 int regno;
1201 rtx reg = SET_DEST (x);
1203 if (reg == 0)
1204 return;
1206 while (GET_CODE (reg) == SUBREG
1207 || GET_CODE (reg) == ZERO_EXTRACT
1208 || GET_CODE (reg) == STRICT_LOW_PART)
1209 reg = XEXP (reg, 0);
1211 if (GET_CODE (reg) == PARALLEL)
1213 int i;
1215 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1216 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1217 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1219 return;
1222 if (!REG_P (reg))
1223 return;
1225 /* Global registers are always live, so the code below does not apply
1226 to them. */
1228 regno = REGNO (reg);
1230 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1232 if (regno < FIRST_PSEUDO_REGISTER)
1234 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1235 while (--j >= 0)
1237 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1239 basic_block b = candidate_table[src].update_bbs.first_member[i];
1241 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start,
1242 regno + j);
1246 else
1248 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1250 basic_block b = candidate_table[src].update_bbs.first_member[i];
1252 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start, regno);
1258 /* Return 1 if insn can be speculatively moved from block src to trg,
1259 otherwise return 0. Called before first insertion of insn to
1260 ready-list or before the scheduling. */
1262 static int
1263 check_live (rtx insn, int src)
1265 /* Find the registers set by instruction. */
1266 if (GET_CODE (PATTERN (insn)) == SET
1267 || GET_CODE (PATTERN (insn)) == CLOBBER)
1268 return check_live_1 (src, PATTERN (insn));
1269 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1271 int j;
1272 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1273 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1274 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1275 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1276 return 0;
1278 return 1;
1281 return 1;
1284 /* Update the live registers info after insn was moved speculatively from
1285 block src to trg. */
1287 static void
1288 update_live (rtx insn, int src)
1290 /* Find the registers set by instruction. */
1291 if (GET_CODE (PATTERN (insn)) == SET
1292 || GET_CODE (PATTERN (insn)) == CLOBBER)
1293 update_live_1 (src, PATTERN (insn));
1294 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1296 int j;
1297 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1298 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1299 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1300 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1304 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1305 #define IS_REACHABLE(bb_from, bb_to) \
1306 (bb_from == bb_to \
1307 || IS_RGN_ENTRY (bb_from) \
1308 || (TEST_BIT (ancestor_edges[bb_to], \
1309 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1311 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1313 static void
1314 set_spec_fed (rtx load_insn)
1316 rtx link;
1318 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1319 if (GET_MODE (link) == VOIDmode)
1320 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1321 } /* set_spec_fed */
1323 /* On the path from the insn to load_insn_bb, find a conditional
1324 branch depending on insn, that guards the speculative load. */
1326 static int
1327 find_conditional_protection (rtx insn, int load_insn_bb)
1329 rtx link;
1331 /* Iterate through DEF-USE forward dependences. */
1332 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1334 rtx next = XEXP (link, 0);
1335 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1336 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1337 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1338 && load_insn_bb != INSN_BB (next)
1339 && GET_MODE (link) == VOIDmode
1340 && (JUMP_P (next)
1341 || find_conditional_protection (next, load_insn_bb)))
1342 return 1;
1344 return 0;
1345 } /* find_conditional_protection */
1347 /* Returns 1 if the same insn1 that participates in the computation
1348 of load_insn's address is feeding a conditional branch that is
1349 guarding on load_insn. This is true if we find a the two DEF-USE
1350 chains:
1351 insn1 -> ... -> conditional-branch
1352 insn1 -> ... -> load_insn,
1353 and if a flow path exist:
1354 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1355 and if insn1 is on the path
1356 region-entry -> ... -> bb_trg -> ... load_insn.
1358 Locate insn1 by climbing on LOG_LINKS from load_insn.
1359 Locate the branch by following INSN_DEPEND from insn1. */
1361 static int
1362 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1364 rtx link;
1366 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1368 rtx insn1 = XEXP (link, 0);
1370 /* Must be a DEF-USE dependence upon non-branch. */
1371 if (GET_MODE (link) != VOIDmode
1372 || JUMP_P (insn1))
1373 continue;
1375 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1376 if (INSN_BB (insn1) == bb_src
1377 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1378 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1379 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1380 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1381 continue;
1383 /* Now search for the conditional-branch. */
1384 if (find_conditional_protection (insn1, bb_src))
1385 return 1;
1387 /* Recursive step: search another insn1, "above" current insn1. */
1388 return is_conditionally_protected (insn1, bb_src, bb_trg);
1391 /* The chain does not exist. */
1392 return 0;
1393 } /* is_conditionally_protected */
1395 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1396 load_insn can move speculatively from bb_src to bb_trg. All the
1397 following must hold:
1399 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1400 (2) load_insn and load1 have a def-use dependence upon
1401 the same insn 'insn1'.
1402 (3) either load2 is in bb_trg, or:
1403 - there's only one split-block, and
1404 - load1 is on the escape path, and
1406 From all these we can conclude that the two loads access memory
1407 addresses that differ at most by a constant, and hence if moving
1408 load_insn would cause an exception, it would have been caused by
1409 load2 anyhow. */
1411 static int
1412 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1414 rtx back_link;
1415 candidate *candp = candidate_table + bb_src;
1417 if (candp->split_bbs.nr_members != 1)
1418 /* Must have exactly one escape block. */
1419 return 0;
1421 for (back_link = LOG_LINKS (load_insn);
1422 back_link; back_link = XEXP (back_link, 1))
1424 rtx insn1 = XEXP (back_link, 0);
1426 if (GET_MODE (back_link) == VOIDmode)
1428 /* Found a DEF-USE dependence (insn1, load_insn). */
1429 rtx fore_link;
1431 for (fore_link = INSN_DEPEND (insn1);
1432 fore_link; fore_link = XEXP (fore_link, 1))
1434 rtx insn2 = XEXP (fore_link, 0);
1435 if (GET_MODE (fore_link) == VOIDmode)
1437 /* Found a DEF-USE dependence (insn1, insn2). */
1438 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1439 /* insn2 not guaranteed to be a 1 base reg load. */
1440 continue;
1442 if (INSN_BB (insn2) == bb_trg)
1443 /* insn2 is the similar load, in the target block. */
1444 return 1;
1446 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1447 /* insn2 is a similar load, in a split-block. */
1448 return 1;
1454 /* Couldn't find a similar load. */
1455 return 0;
1456 } /* is_pfree */
1458 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1459 a load moved speculatively, or if load_insn is protected by
1460 a compare on load_insn's address). */
1462 static int
1463 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1465 if (FED_BY_SPEC_LOAD (load_insn))
1466 return 1;
1468 if (LOG_LINKS (load_insn) == NULL)
1469 /* Dependence may 'hide' out of the region. */
1470 return 1;
1472 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1473 return 1;
1475 return 0;
1478 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1479 Return 1 if insn is exception-free (and the motion is valid)
1480 and 0 otherwise. */
1482 static int
1483 is_exception_free (rtx insn, int bb_src, int bb_trg)
1485 int insn_class = haifa_classify_insn (insn);
1487 /* Handle non-load insns. */
1488 switch (insn_class)
1490 case TRAP_FREE:
1491 return 1;
1492 case TRAP_RISKY:
1493 return 0;
1494 default:;
1497 /* Handle loads. */
1498 if (!flag_schedule_speculative_load)
1499 return 0;
1500 IS_LOAD_INSN (insn) = 1;
1501 switch (insn_class)
1503 case IFREE:
1504 return (1);
1505 case IRISKY:
1506 return 0;
1507 case PFREE_CANDIDATE:
1508 if (is_pfree (insn, bb_src, bb_trg))
1509 return 1;
1510 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1511 case PRISKY_CANDIDATE:
1512 if (!flag_schedule_speculative_load_dangerous
1513 || is_prisky (insn, bb_src, bb_trg))
1514 return 0;
1515 break;
1516 default:;
1519 return flag_schedule_speculative_load_dangerous;
1522 /* The number of insns from the current block scheduled so far. */
1523 static int sched_target_n_insns;
1524 /* The number of insns from the current block to be scheduled in total. */
1525 static int target_n_insns;
1526 /* The number of insns from the entire region scheduled so far. */
1527 static int sched_n_insns;
1528 /* Nonzero if the last scheduled insn was a jump. */
1529 static int last_was_jump;
1531 /* Implementations of the sched_info functions for region scheduling. */
1532 static void init_ready_list (struct ready_list *);
1533 static int can_schedule_ready_p (rtx);
1534 static int new_ready (rtx);
1535 static int schedule_more_p (void);
1536 static const char *rgn_print_insn (rtx, int);
1537 static int rgn_rank (rtx, rtx);
1538 static int contributes_to_priority (rtx, rtx);
1539 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1541 /* Return nonzero if there are more insns that should be scheduled. */
1543 static int
1544 schedule_more_p (void)
1546 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1549 /* Add all insns that are initially ready to the ready list READY. Called
1550 once before scheduling a set of insns. */
1552 static void
1553 init_ready_list (struct ready_list *ready)
1555 rtx prev_head = current_sched_info->prev_head;
1556 rtx next_tail = current_sched_info->next_tail;
1557 int bb_src;
1558 rtx insn;
1560 target_n_insns = 0;
1561 sched_target_n_insns = 0;
1562 sched_n_insns = 0;
1563 last_was_jump = 0;
1565 /* Print debugging information. */
1566 if (sched_verbose >= 5)
1567 debug_dependencies ();
1569 /* Prepare current target block info. */
1570 if (current_nr_blocks > 1)
1572 candidate_table = XNEWVEC (candidate, current_nr_blocks);
1574 bblst_last = 0;
1575 /* bblst_table holds split blocks and update blocks for each block after
1576 the current one in the region. split blocks and update blocks are
1577 the TO blocks of region edges, so there can be at most rgn_nr_edges
1578 of them. */
1579 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1580 bblst_table = XNEWVEC (basic_block, bblst_size);
1582 edgelst_last = 0;
1583 edgelst_table = XNEWVEC (edge, rgn_nr_edges);
1585 compute_trg_info (target_bb);
1588 /* Initialize ready list with all 'ready' insns in target block.
1589 Count number of insns in the target block being scheduled. */
1590 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1592 if (INSN_DEP_COUNT (insn) == 0)
1594 ready_add (ready, insn);
1596 if (targetm.sched.adjust_priority)
1597 INSN_PRIORITY (insn) =
1598 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1600 target_n_insns++;
1603 /* Add to ready list all 'ready' insns in valid source blocks.
1604 For speculative insns, check-live, exception-free, and
1605 issue-delay. */
1606 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1607 if (IS_VALID (bb_src))
1609 rtx src_head;
1610 rtx src_next_tail;
1611 rtx tail, head;
1613 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1614 src_next_tail = NEXT_INSN (tail);
1615 src_head = head;
1617 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1619 if (! INSN_P (insn))
1620 continue;
1622 if (!CANT_MOVE (insn)
1623 && (!IS_SPECULATIVE_INSN (insn)
1624 || ((recog_memoized (insn) < 0
1625 || min_insn_conflict_delay (curr_state,
1626 insn, insn) <= 3)
1627 && check_live (insn, bb_src)
1628 && is_exception_free (insn, bb_src, target_bb))))
1629 if (INSN_DEP_COUNT (insn) == 0)
1631 ready_add (ready, insn);
1633 if (targetm.sched.adjust_priority)
1634 INSN_PRIORITY (insn) =
1635 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1641 /* Called after taking INSN from the ready list. Returns nonzero if this
1642 insn can be scheduled, nonzero if we should silently discard it. */
1644 static int
1645 can_schedule_ready_p (rtx insn)
1647 if (JUMP_P (insn))
1648 last_was_jump = 1;
1650 /* An interblock motion? */
1651 if (INSN_BB (insn) != target_bb)
1653 basic_block b1;
1655 if (IS_SPECULATIVE_INSN (insn))
1657 if (!check_live (insn, INSN_BB (insn)))
1658 return 0;
1659 update_live (insn, INSN_BB (insn));
1661 /* For speculative load, mark insns fed by it. */
1662 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1663 set_spec_fed (insn);
1665 nr_spec++;
1667 nr_inter++;
1669 /* Update source block boundaries. */
1670 b1 = BLOCK_FOR_INSN (insn);
1671 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1673 /* We moved all the insns in the basic block.
1674 Emit a note after the last insn and update the
1675 begin/end boundaries to point to the note. */
1676 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1677 BB_HEAD (b1) = note;
1678 BB_END (b1) = note;
1680 else if (insn == BB_END (b1))
1682 /* We took insns from the end of the basic block,
1683 so update the end of block boundary so that it
1684 points to the first insn we did not move. */
1685 BB_END (b1) = PREV_INSN (insn);
1687 else if (insn == BB_HEAD (b1))
1689 /* We took insns from the start of the basic block,
1690 so update the start of block boundary so that
1691 it points to the first insn we did not move. */
1692 BB_HEAD (b1) = NEXT_INSN (insn);
1695 else
1697 /* In block motion. */
1698 sched_target_n_insns++;
1700 sched_n_insns++;
1702 return 1;
1705 /* Called after INSN has all its dependencies resolved. Return nonzero
1706 if it should be moved to the ready list or the queue, or zero if we
1707 should silently discard it. */
1708 static int
1709 new_ready (rtx next)
1711 /* For speculative insns, before inserting to ready/queue,
1712 check live, exception-free, and issue-delay. */
1713 if (INSN_BB (next) != target_bb
1714 && (!IS_VALID (INSN_BB (next))
1715 || CANT_MOVE (next)
1716 || (IS_SPECULATIVE_INSN (next)
1717 && ((recog_memoized (next) >= 0
1718 && min_insn_conflict_delay (curr_state, next, next) > 3)
1719 || !check_live (next, INSN_BB (next))
1720 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1721 return 0;
1722 return 1;
1725 /* Return a string that contains the insn uid and optionally anything else
1726 necessary to identify this insn in an output. It's valid to use a
1727 static buffer for this. The ALIGNED parameter should cause the string
1728 to be formatted so that multiple output lines will line up nicely. */
1730 static const char *
1731 rgn_print_insn (rtx insn, int aligned)
1733 static char tmp[80];
1735 if (aligned)
1736 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1737 else
1739 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1740 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1741 else
1742 sprintf (tmp, "%d", INSN_UID (insn));
1744 return tmp;
1747 /* Compare priority of two insns. Return a positive number if the second
1748 insn is to be preferred for scheduling, and a negative one if the first
1749 is to be preferred. Zero if they are equally good. */
1751 static int
1752 rgn_rank (rtx insn1, rtx insn2)
1754 /* Some comparison make sense in interblock scheduling only. */
1755 if (INSN_BB (insn1) != INSN_BB (insn2))
1757 int spec_val, prob_val;
1759 /* Prefer an inblock motion on an interblock motion. */
1760 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1761 return 1;
1762 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1763 return -1;
1765 /* Prefer a useful motion on a speculative one. */
1766 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1767 if (spec_val)
1768 return spec_val;
1770 /* Prefer a more probable (speculative) insn. */
1771 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1772 if (prob_val)
1773 return prob_val;
1775 return 0;
1778 /* NEXT is an instruction that depends on INSN (a backward dependence);
1779 return nonzero if we should include this dependence in priority
1780 calculations. */
1782 static int
1783 contributes_to_priority (rtx next, rtx insn)
1785 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1788 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1789 conditionally set before INSN. Store the set of registers that
1790 must be considered as used by this jump in USED and that of
1791 registers that must be considered as set in SET. */
1793 static void
1794 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1795 regset cond_exec ATTRIBUTE_UNUSED,
1796 regset used ATTRIBUTE_UNUSED,
1797 regset set ATTRIBUTE_UNUSED)
1799 /* Nothing to do here, since we postprocess jumps in
1800 add_branch_dependences. */
1803 /* Used in schedule_insns to initialize current_sched_info for scheduling
1804 regions (or single basic blocks). */
1806 static struct sched_info region_sched_info =
1808 init_ready_list,
1809 can_schedule_ready_p,
1810 schedule_more_p,
1811 new_ready,
1812 rgn_rank,
1813 rgn_print_insn,
1814 contributes_to_priority,
1815 compute_jump_reg_dependencies,
1817 NULL, NULL,
1818 NULL, NULL,
1819 0, 0, 0
1822 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1824 static bool
1825 sets_likely_spilled (rtx pat)
1827 bool ret = false;
1828 note_stores (pat, sets_likely_spilled_1, &ret);
1829 return ret;
1832 static void
1833 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1835 bool *ret = (bool *) data;
1837 if (GET_CODE (pat) == SET
1838 && REG_P (x)
1839 && REGNO (x) < FIRST_PSEUDO_REGISTER
1840 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
1841 *ret = true;
1844 /* Add dependences so that branches are scheduled to run last in their
1845 block. */
1847 static void
1848 add_branch_dependences (rtx head, rtx tail)
1850 rtx insn, last;
1852 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
1853 that can throw exceptions, force them to remain in order at the end of
1854 the block by adding dependencies and giving the last a high priority.
1855 There may be notes present, and prev_head may also be a note.
1857 Branches must obviously remain at the end. Calls should remain at the
1858 end since moving them results in worse register allocation. Uses remain
1859 at the end to ensure proper register allocation.
1861 cc0 setters remain at the end because they can't be moved away from
1862 their cc0 user.
1864 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
1866 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
1867 are not moved before reload because we can wind up with register
1868 allocation failures. */
1870 insn = tail;
1871 last = 0;
1872 while (CALL_P (insn)
1873 || JUMP_P (insn)
1874 || (NONJUMP_INSN_P (insn)
1875 && (GET_CODE (PATTERN (insn)) == USE
1876 || GET_CODE (PATTERN (insn)) == CLOBBER
1877 || can_throw_internal (insn)
1878 #ifdef HAVE_cc0
1879 || sets_cc0_p (PATTERN (insn))
1880 #endif
1881 || (!reload_completed
1882 && sets_likely_spilled (PATTERN (insn)))))
1883 || NOTE_P (insn))
1885 if (!NOTE_P (insn))
1887 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
1889 if (! sched_insns_conditions_mutex_p (last, insn))
1890 add_dependence (last, insn, REG_DEP_ANTI);
1891 INSN_REF_COUNT (insn)++;
1894 CANT_MOVE (insn) = 1;
1896 last = insn;
1899 /* Don't overrun the bounds of the basic block. */
1900 if (insn == head)
1901 break;
1903 insn = PREV_INSN (insn);
1906 /* Make sure these insns are scheduled last in their block. */
1907 insn = last;
1908 if (insn != 0)
1909 while (insn != head)
1911 insn = prev_nonnote_insn (insn);
1913 if (INSN_REF_COUNT (insn) != 0)
1914 continue;
1916 if (! sched_insns_conditions_mutex_p (last, insn))
1917 add_dependence (last, insn, REG_DEP_ANTI);
1918 INSN_REF_COUNT (insn) = 1;
1921 #ifdef HAVE_conditional_execution
1922 /* Finally, if the block ends in a jump, and we are doing intra-block
1923 scheduling, make sure that the branch depends on any COND_EXEC insns
1924 inside the block to avoid moving the COND_EXECs past the branch insn.
1926 We only have to do this after reload, because (1) before reload there
1927 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
1928 scheduler after reload.
1930 FIXME: We could in some cases move COND_EXEC insns past the branch if
1931 this scheduler would be a little smarter. Consider this code:
1933 T = [addr]
1934 C ? addr += 4
1935 !C ? X += 12
1936 C ? T += 1
1937 C ? jump foo
1939 On a target with a one cycle stall on a memory access the optimal
1940 sequence would be:
1942 T = [addr]
1943 C ? addr += 4
1944 C ? T += 1
1945 C ? jump foo
1946 !C ? X += 12
1948 We don't want to put the 'X += 12' before the branch because it just
1949 wastes a cycle of execution time when the branch is taken.
1951 Note that in the example "!C" will always be true. That is another
1952 possible improvement for handling COND_EXECs in this scheduler: it
1953 could remove always-true predicates. */
1955 if (!reload_completed || ! JUMP_P (tail))
1956 return;
1958 insn = tail;
1959 while (insn != head)
1961 insn = PREV_INSN (insn);
1963 /* Note that we want to add this dependency even when
1964 sched_insns_conditions_mutex_p returns true. The whole point
1965 is that we _want_ this dependency, even if these insns really
1966 are independent. */
1967 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
1968 add_dependence (tail, insn, REG_DEP_ANTI);
1970 #endif
1973 /* Data structures for the computation of data dependences in a regions. We
1974 keep one `deps' structure for every basic block. Before analyzing the
1975 data dependences for a bb, its variables are initialized as a function of
1976 the variables of its predecessors. When the analysis for a bb completes,
1977 we save the contents to the corresponding bb_deps[bb] variable. */
1979 static struct deps *bb_deps;
1981 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
1983 static rtx
1984 concat_INSN_LIST (rtx copy, rtx old)
1986 rtx new = old;
1987 for (; copy ; copy = XEXP (copy, 1))
1988 new = alloc_INSN_LIST (XEXP (copy, 0), new);
1989 return new;
1992 static void
1993 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
1994 rtx *old_mems_p)
1996 rtx new_insns = *old_insns_p;
1997 rtx new_mems = *old_mems_p;
1999 while (copy_insns)
2001 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2002 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2003 copy_insns = XEXP (copy_insns, 1);
2004 copy_mems = XEXP (copy_mems, 1);
2007 *old_insns_p = new_insns;
2008 *old_mems_p = new_mems;
2011 /* After computing the dependencies for block BB, propagate the dependencies
2012 found in TMP_DEPS to the successors of the block. */
2013 static void
2014 propagate_deps (int bb, struct deps *pred_deps)
2016 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2017 edge_iterator ei;
2018 edge e;
2020 /* bb's structures are inherited by its successors. */
2021 FOR_EACH_EDGE (e, ei, block->succs)
2023 struct deps *succ_deps;
2024 unsigned reg;
2025 reg_set_iterator rsi;
2027 /* Only bbs "below" bb, in the same region, are interesting. */
2028 if (e->dest == EXIT_BLOCK_PTR
2029 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2030 || BLOCK_TO_BB (e->dest->index) <= bb)
2031 continue;
2033 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2035 /* The reg_last lists are inherited by successor. */
2036 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2038 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2039 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2041 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2042 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2043 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2044 succ_rl->clobbers);
2045 succ_rl->uses_length += pred_rl->uses_length;
2046 succ_rl->clobbers_length += pred_rl->clobbers_length;
2048 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2050 /* Mem read/write lists are inherited by successor. */
2051 concat_insn_mem_list (pred_deps->pending_read_insns,
2052 pred_deps->pending_read_mems,
2053 &succ_deps->pending_read_insns,
2054 &succ_deps->pending_read_mems);
2055 concat_insn_mem_list (pred_deps->pending_write_insns,
2056 pred_deps->pending_write_mems,
2057 &succ_deps->pending_write_insns,
2058 &succ_deps->pending_write_mems);
2060 succ_deps->last_pending_memory_flush
2061 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2062 succ_deps->last_pending_memory_flush);
2064 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2065 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2067 /* last_function_call is inherited by successor. */
2068 succ_deps->last_function_call
2069 = concat_INSN_LIST (pred_deps->last_function_call,
2070 succ_deps->last_function_call);
2072 /* sched_before_next_call is inherited by successor. */
2073 succ_deps->sched_before_next_call
2074 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2075 succ_deps->sched_before_next_call);
2078 /* These lists should point to the right place, for correct
2079 freeing later. */
2080 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2081 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2082 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2083 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2085 /* Can't allow these to be freed twice. */
2086 pred_deps->pending_read_insns = 0;
2087 pred_deps->pending_read_mems = 0;
2088 pred_deps->pending_write_insns = 0;
2089 pred_deps->pending_write_mems = 0;
2092 /* Compute backward dependences inside bb. In a multiple blocks region:
2093 (1) a bb is analyzed after its predecessors, and (2) the lists in
2094 effect at the end of bb (after analyzing for bb) are inherited by
2095 bb's successors.
2097 Specifically for reg-reg data dependences, the block insns are
2098 scanned by sched_analyze () top-to-bottom. Two lists are
2099 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2100 and reg_last[].uses for register USEs.
2102 When analysis is completed for bb, we update for its successors:
2103 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2104 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2106 The mechanism for computing mem-mem data dependence is very
2107 similar, and the result is interblock dependences in the region. */
2109 static void
2110 compute_block_backward_dependences (int bb)
2112 rtx head, tail;
2113 struct deps tmp_deps;
2115 tmp_deps = bb_deps[bb];
2117 /* Do the analysis for this block. */
2118 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2119 sched_analyze (&tmp_deps, head, tail);
2120 add_branch_dependences (head, tail);
2122 if (current_nr_blocks > 1)
2123 propagate_deps (bb, &tmp_deps);
2125 /* Free up the INSN_LISTs. */
2126 free_deps (&tmp_deps);
2129 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2130 them to the unused_*_list variables, so that they can be reused. */
2132 static void
2133 free_pending_lists (void)
2135 int bb;
2137 for (bb = 0; bb < current_nr_blocks; bb++)
2139 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2140 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2141 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2142 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2146 /* Print dependences for debugging, callable from debugger. */
2148 void
2149 debug_dependencies (void)
2151 int bb;
2153 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2154 for (bb = 0; bb < current_nr_blocks; bb++)
2156 rtx head, tail;
2157 rtx next_tail;
2158 rtx insn;
2160 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2161 next_tail = NEXT_INSN (tail);
2162 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2163 BB_TO_BLOCK (bb), bb);
2165 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2166 "insn", "code", "bb", "dep", "prio", "cost",
2167 "reservation");
2168 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2169 "----", "----", "--", "---", "----", "----",
2170 "-----------");
2172 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2174 rtx link;
2176 if (! INSN_P (insn))
2178 int n;
2179 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2180 if (NOTE_P (insn))
2182 n = NOTE_LINE_NUMBER (insn);
2183 if (n < 0)
2184 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2185 else
2187 expanded_location xloc;
2188 NOTE_EXPANDED_LOCATION (xloc, insn);
2189 fprintf (sched_dump, "line %d, file %s\n",
2190 xloc.line, xloc.file);
2193 else
2194 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2195 continue;
2198 fprintf (sched_dump,
2199 ";; %s%5d%6d%6d%6d%6d%6d ",
2200 (SCHED_GROUP_P (insn) ? "+" : " "),
2201 INSN_UID (insn),
2202 INSN_CODE (insn),
2203 INSN_BB (insn),
2204 INSN_DEP_COUNT (insn),
2205 INSN_PRIORITY (insn),
2206 insn_cost (insn, 0, 0));
2208 if (recog_memoized (insn) < 0)
2209 fprintf (sched_dump, "nothing");
2210 else
2211 print_reservation (sched_dump, insn);
2213 fprintf (sched_dump, "\t: ");
2214 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2215 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2216 fprintf (sched_dump, "\n");
2219 fprintf (sched_dump, "\n");
2222 /* Returns true if all the basic blocks of the current region have
2223 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2224 static bool
2225 sched_is_disabled_for_current_region_p (void)
2227 int bb;
2229 for (bb = 0; bb < current_nr_blocks; bb++)
2230 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2231 return false;
2233 return true;
2236 /* Schedule a region. A region is either an inner loop, a loop-free
2237 subroutine, or a single basic block. Each bb in the region is
2238 scheduled after its flow predecessors. */
2240 static void
2241 schedule_region (int rgn)
2243 basic_block block;
2244 edge_iterator ei;
2245 edge e;
2246 int bb;
2247 int rgn_n_insns = 0;
2248 int sched_rgn_n_insns = 0;
2250 /* Set variables for the current region. */
2251 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2252 current_blocks = RGN_BLOCKS (rgn);
2254 /* Don't schedule region that is marked by
2255 NOTE_DISABLE_SCHED_OF_BLOCK. */
2256 if (sched_is_disabled_for_current_region_p ())
2257 return;
2259 init_deps_global ();
2261 /* Initializations for region data dependence analysis. */
2262 bb_deps = XNEWVEC (struct deps, current_nr_blocks);
2263 for (bb = 0; bb < current_nr_blocks; bb++)
2264 init_deps (bb_deps + bb);
2266 /* Compute LOG_LINKS. */
2267 for (bb = 0; bb < current_nr_blocks; bb++)
2268 compute_block_backward_dependences (bb);
2270 /* Compute INSN_DEPEND. */
2271 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2273 rtx head, tail;
2274 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2276 compute_forward_dependences (head, tail);
2278 if (targetm.sched.dependencies_evaluation_hook)
2279 targetm.sched.dependencies_evaluation_hook (head, tail);
2283 /* Set priorities. */
2284 for (bb = 0; bb < current_nr_blocks; bb++)
2286 rtx head, tail;
2287 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2289 rgn_n_insns += set_priorities (head, tail);
2292 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2293 if (current_nr_blocks > 1)
2295 prob = XNEWVEC (int, current_nr_blocks);
2297 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2298 sbitmap_vector_zero (dom, current_nr_blocks);
2300 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2301 rgn_nr_edges = 0;
2302 FOR_EACH_BB (block)
2304 if (CONTAINING_RGN (block->index) != rgn)
2305 continue;
2306 FOR_EACH_EDGE (e, ei, block->succs)
2307 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2310 rgn_edges = XNEWVEC (edge, rgn_nr_edges);
2311 rgn_nr_edges = 0;
2312 FOR_EACH_BB (block)
2314 if (CONTAINING_RGN (block->index) != rgn)
2315 continue;
2316 FOR_EACH_EDGE (e, ei, block->succs)
2317 rgn_edges[rgn_nr_edges++] = e;
2320 /* Split edges. */
2321 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2322 sbitmap_vector_zero (pot_split, current_nr_blocks);
2323 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2324 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2326 /* Compute probabilities, dominators, split_edges. */
2327 for (bb = 0; bb < current_nr_blocks; bb++)
2328 compute_dom_prob_ps (bb);
2331 /* Now we can schedule all blocks. */
2332 for (bb = 0; bb < current_nr_blocks; bb++)
2334 rtx head, tail;
2335 int b = BB_TO_BLOCK (bb);
2337 get_block_head_tail (b, &head, &tail);
2339 if (no_real_insns_p (head, tail))
2340 continue;
2342 current_sched_info->prev_head = PREV_INSN (head);
2343 current_sched_info->next_tail = NEXT_INSN (tail);
2345 if (write_symbols != NO_DEBUG)
2347 save_line_notes (b, head, tail);
2348 rm_line_notes (head, tail);
2351 /* rm_other_notes only removes notes which are _inside_ the
2352 block---that is, it won't remove notes before the first real insn
2353 or after the last real insn of the block. So if the first insn
2354 has a REG_SAVE_NOTE which would otherwise be emitted before the
2355 insn, it is redundant with the note before the start of the
2356 block, and so we have to take it out. */
2357 if (INSN_P (head))
2359 rtx note;
2361 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2362 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2363 remove_note (head, note);
2366 /* Remove remaining note insns from the block, save them in
2367 note_list. These notes are restored at the end of
2368 schedule_block (). */
2369 rm_other_notes (head, tail);
2371 target_bb = bb;
2373 current_sched_info->queue_must_finish_empty
2374 = current_nr_blocks > 1 && !flag_schedule_interblock;
2376 schedule_block (b, rgn_n_insns);
2377 sched_rgn_n_insns += sched_n_insns;
2379 /* Update target block boundaries. */
2380 if (head == BB_HEAD (BASIC_BLOCK (b)))
2381 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2382 if (tail == BB_END (BASIC_BLOCK (b)))
2383 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2385 /* Clean up. */
2386 if (current_nr_blocks > 1)
2388 free (candidate_table);
2389 free (bblst_table);
2390 free (edgelst_table);
2394 /* Sanity check: verify that all region insns were scheduled. */
2395 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2397 /* Restore line notes. */
2398 if (write_symbols != NO_DEBUG)
2400 for (bb = 0; bb < current_nr_blocks; bb++)
2402 rtx head, tail;
2403 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2404 restore_line_notes (head, tail);
2408 /* Done with this region. */
2409 free_pending_lists ();
2411 finish_deps_global ();
2413 free (bb_deps);
2415 if (current_nr_blocks > 1)
2417 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2418 FOR_EACH_BB (block)
2420 if (CONTAINING_RGN (block->index) != rgn)
2421 continue;
2422 FOR_EACH_EDGE (e, ei, block->succs)
2423 e->aux = NULL;
2426 free (prob);
2427 sbitmap_vector_free (dom);
2428 sbitmap_vector_free (pot_split);
2429 sbitmap_vector_free (ancestor_edges);
2430 free (rgn_edges);
2434 /* Indexed by region, holds the number of death notes found in that region.
2435 Used for consistency checks. */
2436 static int *deaths_in_region;
2438 /* Initialize data structures for region scheduling. */
2440 static void
2441 init_regions (void)
2443 sbitmap blocks;
2444 int rgn;
2446 nr_regions = 0;
2447 rgn_table = XNEWVEC (region, n_basic_blocks);
2448 rgn_bb_table = XNEWVEC (int, n_basic_blocks);
2449 block_to_bb = XNEWVEC (int, last_basic_block);
2450 containing_rgn = XNEWVEC (int, last_basic_block);
2452 /* Compute regions for scheduling. */
2453 if (reload_completed
2454 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
2455 || !flag_schedule_interblock
2456 || is_cfg_nonregular ())
2458 find_single_block_region ();
2460 else
2462 /* Compute the dominators and post dominators. */
2463 calculate_dominance_info (CDI_DOMINATORS);
2465 /* Find regions. */
2466 find_rgns ();
2468 if (sched_verbose >= 3)
2469 debug_regions ();
2471 /* For now. This will move as more and more of haifa is converted
2472 to using the cfg code in flow.c. */
2473 free_dominance_info (CDI_DOMINATORS);
2477 if (CHECK_DEAD_NOTES)
2479 blocks = sbitmap_alloc (last_basic_block);
2480 deaths_in_region = XNEWVEC (int, nr_regions);
2481 /* Remove all death notes from the subroutine. */
2482 for (rgn = 0; rgn < nr_regions; rgn++)
2484 int b;
2486 sbitmap_zero (blocks);
2487 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2488 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2490 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2492 sbitmap_free (blocks);
2494 else
2495 count_or_remove_death_notes (NULL, 1);
2498 /* The one entry point in this file. */
2500 void
2501 schedule_insns (void)
2503 sbitmap large_region_blocks, blocks;
2504 int rgn;
2505 int any_large_regions;
2506 basic_block bb;
2508 /* Taking care of this degenerate case makes the rest of
2509 this code simpler. */
2510 if (n_basic_blocks == NUM_FIXED_BLOCKS)
2511 return;
2513 nr_inter = 0;
2514 nr_spec = 0;
2515 sched_init ();
2517 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
2518 / 100);
2520 init_regions ();
2522 current_sched_info = &region_sched_info;
2523 /* Schedule every region in the subroutine. */
2524 for (rgn = 0; rgn < nr_regions; rgn++)
2525 schedule_region (rgn);
2527 /* Update life analysis for the subroutine. Do single block regions
2528 first so that we can verify that live_at_start didn't change. Then
2529 do all other blocks. */
2530 /* ??? There is an outside possibility that update_life_info, or more
2531 to the point propagate_block, could get called with nonzero flags
2532 more than once for one basic block. This would be kinda bad if it
2533 were to happen, since REG_INFO would be accumulated twice for the
2534 block, and we'd have twice the REG_DEAD notes.
2536 I'm fairly certain that this _shouldn't_ happen, since I don't think
2537 that live_at_start should change at region heads. Not sure what the
2538 best way to test for this kind of thing... */
2540 allocate_reg_life_data ();
2541 compute_bb_for_insn ();
2543 any_large_regions = 0;
2544 large_region_blocks = sbitmap_alloc (last_basic_block);
2545 sbitmap_zero (large_region_blocks);
2546 FOR_EACH_BB (bb)
2547 SET_BIT (large_region_blocks, bb->index);
2549 blocks = sbitmap_alloc (last_basic_block);
2550 sbitmap_zero (blocks);
2552 /* Update life information. For regions consisting of multiple blocks
2553 we've possibly done interblock scheduling that affects global liveness.
2554 For regions consisting of single blocks we need to do only local
2555 liveness. */
2556 for (rgn = 0; rgn < nr_regions; rgn++)
2557 if (RGN_NR_BLOCKS (rgn) > 1)
2558 any_large_regions = 1;
2559 else
2561 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2562 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2565 /* Don't update reg info after reload, since that affects
2566 regs_ever_live, which should not change after reload. */
2567 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2568 (reload_completed ? PROP_DEATH_NOTES
2569 : PROP_DEATH_NOTES | PROP_REG_INFO));
2570 if (any_large_regions)
2572 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2573 PROP_DEATH_NOTES | PROP_REG_INFO);
2576 if (CHECK_DEAD_NOTES)
2578 /* Verify the counts of basic block notes in single the basic block
2579 regions. */
2580 for (rgn = 0; rgn < nr_regions; rgn++)
2581 if (RGN_NR_BLOCKS (rgn) == 1)
2583 sbitmap_zero (blocks);
2584 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2586 gcc_assert (deaths_in_region[rgn]
2587 == count_or_remove_death_notes (blocks, 0));
2589 free (deaths_in_region);
2592 /* Reposition the prologue and epilogue notes in case we moved the
2593 prologue/epilogue insns. */
2594 if (reload_completed)
2595 reposition_prologue_and_epilogue_notes (get_insns ());
2597 /* Delete redundant line notes. */
2598 if (write_symbols != NO_DEBUG)
2599 rm_redundant_line_notes ();
2601 if (sched_verbose)
2603 if (reload_completed == 0 && flag_schedule_interblock)
2605 fprintf (sched_dump,
2606 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2607 nr_inter, nr_spec);
2609 else
2610 gcc_assert (nr_inter <= 0);
2611 fprintf (sched_dump, "\n\n");
2614 /* Clean up. */
2615 free (rgn_table);
2616 free (rgn_bb_table);
2617 free (block_to_bb);
2618 free (containing_rgn);
2620 sched_finish ();
2622 sbitmap_free (blocks);
2623 sbitmap_free (large_region_blocks);
2625 #endif
2627 static bool
2628 gate_handle_sched (void)
2630 #ifdef INSN_SCHEDULING
2631 return flag_schedule_insns;
2632 #else
2633 return 0;
2634 #endif
2637 /* Run instruction scheduler. */
2638 static void
2639 rest_of_handle_sched (void)
2641 #ifdef INSN_SCHEDULING
2642 /* Do control and data sched analysis,
2643 and write some of the results to dump file. */
2645 schedule_insns ();
2646 #endif
2649 static bool
2650 gate_handle_sched2 (void)
2652 #ifdef INSN_SCHEDULING
2653 return optimize > 0 && flag_schedule_insns_after_reload;
2654 #else
2655 return 0;
2656 #endif
2659 /* Run second scheduling pass after reload. */
2660 static void
2661 rest_of_handle_sched2 (void)
2663 #ifdef INSN_SCHEDULING
2664 /* Do control and data sched analysis again,
2665 and write some more of the results to dump file. */
2667 split_all_insns (1);
2669 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
2671 schedule_ebbs ();
2672 /* No liveness updating code yet, but it should be easy to do.
2673 reg-stack recomputes the liveness when needed for now. */
2674 count_or_remove_death_notes (NULL, 1);
2675 cleanup_cfg (CLEANUP_EXPENSIVE);
2677 else
2678 schedule_insns ();
2679 #endif
2682 struct tree_opt_pass pass_sched =
2684 "sched1", /* name */
2685 gate_handle_sched, /* gate */
2686 rest_of_handle_sched, /* execute */
2687 NULL, /* sub */
2688 NULL, /* next */
2689 0, /* static_pass_number */
2690 TV_SCHED, /* tv_id */
2691 0, /* properties_required */
2692 0, /* properties_provided */
2693 0, /* properties_destroyed */
2694 0, /* todo_flags_start */
2695 TODO_dump_func |
2696 TODO_ggc_collect, /* todo_flags_finish */
2697 'S' /* letter */
2700 struct tree_opt_pass pass_sched2 =
2702 "sched2", /* name */
2703 gate_handle_sched2, /* gate */
2704 rest_of_handle_sched2, /* execute */
2705 NULL, /* sub */
2706 NULL, /* next */
2707 0, /* static_pass_number */
2708 TV_SCHED2, /* tv_id */
2709 0, /* properties_required */
2710 0, /* properties_provided */
2711 0, /* properties_destroyed */
2712 0, /* todo_flags_start */
2713 TODO_dump_func |
2714 TODO_ggc_collect, /* todo_flags_finish */
2715 'R' /* letter */