Richard Earnshaw <richard.earnshaw@arm.com>
[official-gcc.git] / gcc / sched-rgn.c
blobef182821412e111013d677210ce305482d95d6db
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
5 and currently maintained by, Jim Wilson (wilson@cygnus.com)
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to the Free
21 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
22 02110-1301, USA. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68 #include "timevar.h"
69 #include "tree-pass.h"
71 /* Define when we want to do count REG_DEAD notes before and after scheduling
72 for sanity checking. We can't do that when conditional execution is used,
73 as REG_DEAD exist only for unconditional deaths. */
75 #if !defined (HAVE_conditional_execution) && defined (ENABLE_CHECKING)
76 #define CHECK_DEAD_NOTES 1
77 #else
78 #define CHECK_DEAD_NOTES 0
79 #endif
82 #ifdef INSN_SCHEDULING
83 /* Some accessor macros for h_i_d members only used within this file. */
84 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
85 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
86 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
88 /* nr_inter/spec counts interblock/speculative motion for the function. */
89 static int nr_inter, nr_spec;
91 static int is_cfg_nonregular (void);
92 static bool sched_is_disabled_for_current_region_p (void);
94 /* A region is the main entity for interblock scheduling: insns
95 are allowed to move between blocks in the same region, along
96 control flow graph edges, in the 'up' direction. */
97 typedef struct
99 int rgn_nr_blocks; /* Number of blocks in region. */
100 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
102 region;
104 /* Number of regions in the procedure. */
105 static int nr_regions;
107 /* Table of region descriptions. */
108 static region *rgn_table;
110 /* Array of lists of regions' blocks. */
111 static int *rgn_bb_table;
113 /* Topological order of blocks in the region (if b2 is reachable from
114 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
115 always referred to by either block or b, while its topological
116 order name (in the region) is referred to by bb. */
117 static int *block_to_bb;
119 /* The number of the region containing a block. */
120 static int *containing_rgn;
122 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
123 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
124 #define BLOCK_TO_BB(block) (block_to_bb[block])
125 #define CONTAINING_RGN(block) (containing_rgn[block])
127 void debug_regions (void);
128 static void find_single_block_region (void);
129 static void find_rgns (void);
130 static bool too_large (int, int *, int *);
132 extern void debug_live (int, int);
134 /* Blocks of the current region being scheduled. */
135 static int current_nr_blocks;
136 static int current_blocks;
138 /* The mapping from bb to block. */
139 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
141 /* Target info declarations.
143 The block currently being scheduled is referred to as the "target" block,
144 while other blocks in the region from which insns can be moved to the
145 target are called "source" blocks. The candidate structure holds info
146 about such sources: are they valid? Speculative? Etc. */
147 typedef struct
149 basic_block *first_member;
150 int nr_members;
152 bblst;
154 typedef struct
156 char is_valid;
157 char is_speculative;
158 int src_prob;
159 bblst split_bbs;
160 bblst update_bbs;
162 candidate;
164 static candidate *candidate_table;
166 /* A speculative motion requires checking live information on the path
167 from 'source' to 'target'. The split blocks are those to be checked.
168 After a speculative motion, live information should be modified in
169 the 'update' blocks.
171 Lists of split and update blocks for each candidate of the current
172 target are in array bblst_table. */
173 static basic_block *bblst_table;
174 static int bblst_size, bblst_last;
176 #define IS_VALID(src) ( candidate_table[src].is_valid )
177 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
178 #define SRC_PROB(src) ( candidate_table[src].src_prob )
180 /* The bb being currently scheduled. */
181 static int target_bb;
183 /* List of edges. */
184 typedef struct
186 edge *first_member;
187 int nr_members;
189 edgelst;
191 static edge *edgelst_table;
192 static int edgelst_last;
194 static void extract_edgelst (sbitmap, edgelst *);
197 /* Target info functions. */
198 static void split_edges (int, int, edgelst *);
199 static void compute_trg_info (int);
200 void debug_candidate (int);
201 void debug_candidates (int);
203 /* Dominators array: dom[i] contains the sbitmap of dominators of
204 bb i in the region. */
205 static sbitmap *dom;
207 /* bb 0 is the only region entry. */
208 #define IS_RGN_ENTRY(bb) (!bb)
210 /* Is bb_src dominated by bb_trg. */
211 #define IS_DOMINATED(bb_src, bb_trg) \
212 ( TEST_BIT (dom[bb_src], bb_trg) )
214 /* Probability: Prob[i] is a float in [0, 1] which is the probability
215 of bb i relative to the region entry. */
216 static float *prob;
218 /* The probability of bb_src, relative to bb_trg. Note, that while the
219 'prob[bb]' is a float in [0, 1], this macro returns an integer
220 in [0, 100]. */
221 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
222 prob[bb_trg])))
224 /* Bit-set of edges, where bit i stands for edge i. */
225 typedef sbitmap edgeset;
227 /* Number of edges in the region. */
228 static int rgn_nr_edges;
230 /* Array of size rgn_nr_edges. */
231 static edge *rgn_edges;
233 /* Mapping from each edge in the graph to its number in the rgn. */
234 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
235 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
237 /* The split edges of a source bb is different for each target
238 bb. In order to compute this efficiently, the 'potential-split edges'
239 are computed for each bb prior to scheduling a region. This is actually
240 the split edges of each bb relative to the region entry.
242 pot_split[bb] is the set of potential split edges of bb. */
243 static edgeset *pot_split;
245 /* For every bb, a set of its ancestor edges. */
246 static edgeset *ancestor_edges;
248 static void compute_dom_prob_ps (int);
250 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
251 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
252 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
254 /* Parameters affecting the decision of rank_for_schedule().
255 ??? Nope. But MIN_PROBABILITY is used in compute_trg_info. */
256 #define MIN_PROBABILITY 40
258 /* Speculative scheduling functions. */
259 static int check_live_1 (int, rtx);
260 static void update_live_1 (int, rtx);
261 static int check_live (rtx, int);
262 static void update_live (rtx, int);
263 static void set_spec_fed (rtx);
264 static int is_pfree (rtx, int, int);
265 static int find_conditional_protection (rtx, int);
266 static int is_conditionally_protected (rtx, int, int);
267 static int is_prisky (rtx, int, int);
268 static int is_exception_free (rtx, int, int);
270 static bool sets_likely_spilled (rtx);
271 static void sets_likely_spilled_1 (rtx, rtx, void *);
272 static void add_branch_dependences (rtx, rtx);
273 static void compute_block_backward_dependences (int);
274 void debug_dependencies (void);
276 static void init_regions (void);
277 static void schedule_region (int);
278 static rtx concat_INSN_LIST (rtx, rtx);
279 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
280 static void propagate_deps (int, struct deps *);
281 static void free_pending_lists (void);
283 /* Functions for construction of the control flow graph. */
285 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
287 We decide not to build the control flow graph if there is possibly more
288 than one entry to the function, if computed branches exist, if we
289 have nonlocal gotos, or if we have an unreachable loop. */
291 static int
292 is_cfg_nonregular (void)
294 basic_block b;
295 rtx insn;
297 /* If we have a label that could be the target of a nonlocal goto, then
298 the cfg is not well structured. */
299 if (nonlocal_goto_handler_labels)
300 return 1;
302 /* If we have any forced labels, then the cfg is not well structured. */
303 if (forced_labels)
304 return 1;
306 /* If we have exception handlers, then we consider the cfg not well
307 structured. ?!? We should be able to handle this now that flow.c
308 computes an accurate cfg for EH. */
309 if (current_function_has_exception_handlers ())
310 return 1;
312 /* If we have non-jumping insns which refer to labels, then we consider
313 the cfg not well structured. */
314 FOR_EACH_BB (b)
315 FOR_BB_INSNS (b, insn)
317 /* Check for labels referred by non-jump insns. */
318 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
320 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
321 if (note
322 && ! (JUMP_P (NEXT_INSN (insn))
323 && find_reg_note (NEXT_INSN (insn), REG_LABEL,
324 XEXP (note, 0))))
325 return 1;
327 /* If this function has a computed jump, then we consider the cfg
328 not well structured. */
329 else if (JUMP_P (insn) && computed_jump_p (insn))
330 return 1;
333 /* Unreachable loops with more than one basic block are detected
334 during the DFS traversal in find_rgns.
336 Unreachable loops with a single block are detected here. This
337 test is redundant with the one in find_rgns, but it's much
338 cheaper to go ahead and catch the trivial case here. */
339 FOR_EACH_BB (b)
341 if (EDGE_COUNT (b->preds) == 0
342 || (single_pred_p (b)
343 && single_pred (b) == b))
344 return 1;
347 /* All the tests passed. Consider the cfg well structured. */
348 return 0;
351 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
353 static void
354 extract_edgelst (sbitmap set, edgelst *el)
356 unsigned int i = 0;
357 sbitmap_iterator sbi;
359 /* edgelst table space is reused in each call to extract_edgelst. */
360 edgelst_last = 0;
362 el->first_member = &edgelst_table[edgelst_last];
363 el->nr_members = 0;
365 /* Iterate over each word in the bitset. */
366 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
368 edgelst_table[edgelst_last++] = rgn_edges[i];
369 el->nr_members++;
373 /* Functions for the construction of regions. */
375 /* Print the regions, for debugging purposes. Callable from debugger. */
377 void
378 debug_regions (void)
380 int rgn, bb;
382 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
383 for (rgn = 0; rgn < nr_regions; rgn++)
385 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
386 rgn_table[rgn].rgn_nr_blocks);
387 fprintf (sched_dump, ";;\tbb/block: ");
389 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
391 current_blocks = RGN_BLOCKS (rgn);
393 gcc_assert (bb == BLOCK_TO_BB (BB_TO_BLOCK (bb)));
394 fprintf (sched_dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
397 fprintf (sched_dump, "\n\n");
401 /* Build a single block region for each basic block in the function.
402 This allows for using the same code for interblock and basic block
403 scheduling. */
405 static void
406 find_single_block_region (void)
408 basic_block bb;
410 nr_regions = 0;
412 FOR_EACH_BB (bb)
414 rgn_bb_table[nr_regions] = bb->index;
415 RGN_NR_BLOCKS (nr_regions) = 1;
416 RGN_BLOCKS (nr_regions) = nr_regions;
417 CONTAINING_RGN (bb->index) = nr_regions;
418 BLOCK_TO_BB (bb->index) = 0;
419 nr_regions++;
423 /* Update number of blocks and the estimate for number of insns
424 in the region. Return true if the region is "too large" for interblock
425 scheduling (compile time considerations). */
427 static bool
428 too_large (int block, int *num_bbs, int *num_insns)
430 (*num_bbs)++;
431 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
432 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
434 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
435 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
438 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
439 is still an inner loop. Put in max_hdr[blk] the header of the most inner
440 loop containing blk. */
441 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
443 if (max_hdr[blk] == -1) \
444 max_hdr[blk] = hdr; \
445 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
446 RESET_BIT (inner, hdr); \
447 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
449 RESET_BIT (inner,max_hdr[blk]); \
450 max_hdr[blk] = hdr; \
454 /* Find regions for interblock scheduling.
456 A region for scheduling can be:
458 * A loop-free procedure, or
460 * A reducible inner loop, or
462 * A basic block not contained in any other region.
464 ?!? In theory we could build other regions based on extended basic
465 blocks or reverse extended basic blocks. Is it worth the trouble?
467 Loop blocks that form a region are put into the region's block list
468 in topological order.
470 This procedure stores its results into the following global (ick) variables
472 * rgn_nr
473 * rgn_table
474 * rgn_bb_table
475 * block_to_bb
476 * containing region
478 We use dominator relationships to avoid making regions out of non-reducible
479 loops.
481 This procedure needs to be converted to work on pred/succ lists instead
482 of edge tables. That would simplify it somewhat. */
484 static void
485 find_rgns (void)
487 int *max_hdr, *dfs_nr, *degree;
488 char no_loops = 1;
489 int node, child, loop_head, i, head, tail;
490 int count = 0, sp, idx = 0;
491 edge_iterator current_edge;
492 edge_iterator *stack;
493 int num_bbs, num_insns, unreachable;
494 int too_large_failure;
495 basic_block bb;
497 /* Note if a block is a natural loop header. */
498 sbitmap header;
500 /* Note if a block is a natural inner loop header. */
501 sbitmap inner;
503 /* Note if a block is in the block queue. */
504 sbitmap in_queue;
506 /* Note if a block is in the block queue. */
507 sbitmap in_stack;
509 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
510 and a mapping from block to its loop header (if the block is contained
511 in a loop, else -1).
513 Store results in HEADER, INNER, and MAX_HDR respectively, these will
514 be used as inputs to the second traversal.
516 STACK, SP and DFS_NR are only used during the first traversal. */
518 /* Allocate and initialize variables for the first traversal. */
519 max_hdr = xmalloc (last_basic_block * sizeof (int));
520 dfs_nr = xcalloc (last_basic_block, sizeof (int));
521 stack = xmalloc (n_edges * sizeof (edge_iterator));
523 inner = sbitmap_alloc (last_basic_block);
524 sbitmap_ones (inner);
526 header = sbitmap_alloc (last_basic_block);
527 sbitmap_zero (header);
529 in_queue = sbitmap_alloc (last_basic_block);
530 sbitmap_zero (in_queue);
532 in_stack = sbitmap_alloc (last_basic_block);
533 sbitmap_zero (in_stack);
535 for (i = 0; i < last_basic_block; i++)
536 max_hdr[i] = -1;
538 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
539 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
541 /* DFS traversal to find inner loops in the cfg. */
543 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
544 sp = -1;
546 while (1)
548 if (EDGE_PASSED (current_edge))
550 /* We have reached a leaf node or a node that was already
551 processed. Pop edges off the stack until we find
552 an edge that has not yet been processed. */
553 while (sp >= 0 && EDGE_PASSED (current_edge))
555 /* Pop entry off the stack. */
556 current_edge = stack[sp--];
557 node = ei_edge (current_edge)->src->index;
558 gcc_assert (node != ENTRY_BLOCK);
559 child = ei_edge (current_edge)->dest->index;
560 gcc_assert (child != EXIT_BLOCK);
561 RESET_BIT (in_stack, child);
562 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
563 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
564 ei_next (&current_edge);
567 /* See if have finished the DFS tree traversal. */
568 if (sp < 0 && EDGE_PASSED (current_edge))
569 break;
571 /* Nope, continue the traversal with the popped node. */
572 continue;
575 /* Process a node. */
576 node = ei_edge (current_edge)->src->index;
577 gcc_assert (node != ENTRY_BLOCK);
578 SET_BIT (in_stack, node);
579 dfs_nr[node] = ++count;
581 /* We don't traverse to the exit block. */
582 child = ei_edge (current_edge)->dest->index;
583 if (child == EXIT_BLOCK)
585 SET_EDGE_PASSED (current_edge);
586 ei_next (&current_edge);
587 continue;
590 /* If the successor is in the stack, then we've found a loop.
591 Mark the loop, if it is not a natural loop, then it will
592 be rejected during the second traversal. */
593 if (TEST_BIT (in_stack, child))
595 no_loops = 0;
596 SET_BIT (header, child);
597 UPDATE_LOOP_RELATIONS (node, child);
598 SET_EDGE_PASSED (current_edge);
599 ei_next (&current_edge);
600 continue;
603 /* If the child was already visited, then there is no need to visit
604 it again. Just update the loop relationships and restart
605 with a new edge. */
606 if (dfs_nr[child])
608 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
609 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
610 SET_EDGE_PASSED (current_edge);
611 ei_next (&current_edge);
612 continue;
615 /* Push an entry on the stack and continue DFS traversal. */
616 stack[++sp] = current_edge;
617 SET_EDGE_PASSED (current_edge);
618 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
621 /* Reset ->aux field used by EDGE_PASSED. */
622 FOR_ALL_BB (bb)
624 edge_iterator ei;
625 edge e;
626 FOR_EACH_EDGE (e, ei, bb->succs)
627 e->aux = NULL;
631 /* Another check for unreachable blocks. The earlier test in
632 is_cfg_nonregular only finds unreachable blocks that do not
633 form a loop.
635 The DFS traversal will mark every block that is reachable from
636 the entry node by placing a nonzero value in dfs_nr. Thus if
637 dfs_nr is zero for any block, then it must be unreachable. */
638 unreachable = 0;
639 FOR_EACH_BB (bb)
640 if (dfs_nr[bb->index] == 0)
642 unreachable = 1;
643 break;
646 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
647 to hold degree counts. */
648 degree = dfs_nr;
650 FOR_EACH_BB (bb)
651 degree[bb->index] = EDGE_COUNT (bb->preds);
653 /* Do not perform region scheduling if there are any unreachable
654 blocks. */
655 if (!unreachable)
657 int *queue;
659 if (no_loops)
660 SET_BIT (header, 0);
662 /* Second traversal:find reducible inner loops and topologically sort
663 block of each region. */
665 queue = xmalloc (n_basic_blocks * sizeof (int));
667 /* Find blocks which are inner loop headers. We still have non-reducible
668 loops to consider at this point. */
669 FOR_EACH_BB (bb)
671 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
673 edge e;
674 edge_iterator ei;
675 basic_block jbb;
677 /* Now check that the loop is reducible. We do this separate
678 from finding inner loops so that we do not find a reducible
679 loop which contains an inner non-reducible loop.
681 A simple way to find reducible/natural loops is to verify
682 that each block in the loop is dominated by the loop
683 header.
685 If there exists a block that is not dominated by the loop
686 header, then the block is reachable from outside the loop
687 and thus the loop is not a natural loop. */
688 FOR_EACH_BB (jbb)
690 /* First identify blocks in the loop, except for the loop
691 entry block. */
692 if (bb->index == max_hdr[jbb->index] && bb != jbb)
694 /* Now verify that the block is dominated by the loop
695 header. */
696 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
697 break;
701 /* If we exited the loop early, then I is the header of
702 a non-reducible loop and we should quit processing it
703 now. */
704 if (jbb != EXIT_BLOCK_PTR)
705 continue;
707 /* I is a header of an inner loop, or block 0 in a subroutine
708 with no loops at all. */
709 head = tail = -1;
710 too_large_failure = 0;
711 loop_head = max_hdr[bb->index];
713 /* Decrease degree of all I's successors for topological
714 ordering. */
715 FOR_EACH_EDGE (e, ei, bb->succs)
716 if (e->dest != EXIT_BLOCK_PTR)
717 --degree[e->dest->index];
719 /* Estimate # insns, and count # blocks in the region. */
720 num_bbs = 1;
721 num_insns = (INSN_LUID (BB_END (bb))
722 - INSN_LUID (BB_HEAD (bb)));
724 /* Find all loop latches (blocks with back edges to the loop
725 header) or all the leaf blocks in the cfg has no loops.
727 Place those blocks into the queue. */
728 if (no_loops)
730 FOR_EACH_BB (jbb)
731 /* Leaf nodes have only a single successor which must
732 be EXIT_BLOCK. */
733 if (single_succ_p (jbb)
734 && single_succ (jbb) == EXIT_BLOCK_PTR)
736 queue[++tail] = jbb->index;
737 SET_BIT (in_queue, jbb->index);
739 if (too_large (jbb->index, &num_bbs, &num_insns))
741 too_large_failure = 1;
742 break;
746 else
748 edge e;
750 FOR_EACH_EDGE (e, ei, bb->preds)
752 if (e->src == ENTRY_BLOCK_PTR)
753 continue;
755 node = e->src->index;
757 if (max_hdr[node] == loop_head && node != bb->index)
759 /* This is a loop latch. */
760 queue[++tail] = node;
761 SET_BIT (in_queue, node);
763 if (too_large (node, &num_bbs, &num_insns))
765 too_large_failure = 1;
766 break;
772 /* Now add all the blocks in the loop to the queue.
774 We know the loop is a natural loop; however the algorithm
775 above will not always mark certain blocks as being in the
776 loop. Consider:
777 node children
778 a b,c
780 c a,d
783 The algorithm in the DFS traversal may not mark B & D as part
784 of the loop (i.e. they will not have max_hdr set to A).
786 We know they can not be loop latches (else they would have
787 had max_hdr set since they'd have a backedge to a dominator
788 block). So we don't need them on the initial queue.
790 We know they are part of the loop because they are dominated
791 by the loop header and can be reached by a backwards walk of
792 the edges starting with nodes on the initial queue.
794 It is safe and desirable to include those nodes in the
795 loop/scheduling region. To do so we would need to decrease
796 the degree of a node if it is the target of a backedge
797 within the loop itself as the node is placed in the queue.
799 We do not do this because I'm not sure that the actual
800 scheduling code will properly handle this case. ?!? */
802 while (head < tail && !too_large_failure)
804 edge e;
805 child = queue[++head];
807 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
809 node = e->src->index;
811 /* See discussion above about nodes not marked as in
812 this loop during the initial DFS traversal. */
813 if (e->src == ENTRY_BLOCK_PTR
814 || max_hdr[node] != loop_head)
816 tail = -1;
817 break;
819 else if (!TEST_BIT (in_queue, node) && node != bb->index)
821 queue[++tail] = node;
822 SET_BIT (in_queue, node);
824 if (too_large (node, &num_bbs, &num_insns))
826 too_large_failure = 1;
827 break;
833 if (tail >= 0 && !too_large_failure)
835 /* Place the loop header into list of region blocks. */
836 degree[bb->index] = -1;
837 rgn_bb_table[idx] = bb->index;
838 RGN_NR_BLOCKS (nr_regions) = num_bbs;
839 RGN_BLOCKS (nr_regions) = idx++;
840 CONTAINING_RGN (bb->index) = nr_regions;
841 BLOCK_TO_BB (bb->index) = count = 0;
843 /* Remove blocks from queue[] when their in degree
844 becomes zero. Repeat until no blocks are left on the
845 list. This produces a topological list of blocks in
846 the region. */
847 while (tail >= 0)
849 if (head < 0)
850 head = tail;
851 child = queue[head];
852 if (degree[child] == 0)
854 edge e;
856 degree[child] = -1;
857 rgn_bb_table[idx++] = child;
858 BLOCK_TO_BB (child) = ++count;
859 CONTAINING_RGN (child) = nr_regions;
860 queue[head] = queue[tail--];
862 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
863 if (e->dest != EXIT_BLOCK_PTR)
864 --degree[e->dest->index];
866 else
867 --head;
869 ++nr_regions;
873 free (queue);
876 /* Any block that did not end up in a region is placed into a region
877 by itself. */
878 FOR_EACH_BB (bb)
879 if (degree[bb->index] >= 0)
881 rgn_bb_table[idx] = bb->index;
882 RGN_NR_BLOCKS (nr_regions) = 1;
883 RGN_BLOCKS (nr_regions) = idx++;
884 CONTAINING_RGN (bb->index) = nr_regions++;
885 BLOCK_TO_BB (bb->index) = 0;
888 free (max_hdr);
889 free (dfs_nr);
890 free (stack);
891 sbitmap_free (header);
892 sbitmap_free (inner);
893 sbitmap_free (in_queue);
894 sbitmap_free (in_stack);
897 /* Functions for regions scheduling information. */
899 /* Compute dominators, probability, and potential-split-edges of bb.
900 Assume that these values were already computed for bb's predecessors. */
902 static void
903 compute_dom_prob_ps (int bb)
905 int pred_bb;
906 int nr_out_edges, nr_rgn_out_edges;
907 edge_iterator in_ei, out_ei;
908 edge in_edge, out_edge;
910 prob[bb] = 0.0;
911 if (IS_RGN_ENTRY (bb))
913 SET_BIT (dom[bb], 0);
914 prob[bb] = 1.0;
915 return;
918 /* Initialize dom[bb] to '111..1'. */
919 sbitmap_ones (dom[bb]);
921 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
923 if (in_edge->src == ENTRY_BLOCK_PTR)
924 continue;
926 pred_bb = BLOCK_TO_BB (in_edge->src->index);
927 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
928 sbitmap_a_or_b (ancestor_edges[bb],
929 ancestor_edges[bb], ancestor_edges[pred_bb]);
931 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
933 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
935 nr_out_edges = 0;
936 nr_rgn_out_edges = 0;
938 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
940 ++nr_out_edges;
942 /* The successor doesn't belong in the region? */
943 if (out_edge->dest != EXIT_BLOCK_PTR
944 && CONTAINING_RGN (out_edge->dest->index)
945 != CONTAINING_RGN (BB_TO_BLOCK (bb)))
946 ++nr_rgn_out_edges;
948 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
951 /* Now nr_rgn_out_edges is the number of region-exit edges from
952 pred, and nr_out_edges will be the number of pred out edges
953 not leaving the region. */
954 nr_out_edges -= nr_rgn_out_edges;
955 if (nr_rgn_out_edges > 0)
956 prob[bb] += 0.9 * prob[pred_bb] / nr_out_edges;
957 else
958 prob[bb] += prob[pred_bb] / nr_out_edges;
961 SET_BIT (dom[bb], bb);
962 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
964 if (sched_verbose >= 2)
965 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
966 (int) (100.0 * prob[bb]));
969 /* Functions for target info. */
971 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
972 Note that bb_trg dominates bb_src. */
974 static void
975 split_edges (int bb_src, int bb_trg, edgelst *bl)
977 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
978 sbitmap_copy (src, pot_split[bb_src]);
980 sbitmap_difference (src, src, pot_split[bb_trg]);
981 extract_edgelst (src, bl);
982 sbitmap_free (src);
985 /* Find the valid candidate-source-blocks for the target block TRG, compute
986 their probability, and check if they are speculative or not.
987 For speculative sources, compute their update-blocks and split-blocks. */
989 static void
990 compute_trg_info (int trg)
992 candidate *sp;
993 edgelst el;
994 int i, j, k, update_idx;
995 basic_block block;
996 sbitmap visited;
997 edge_iterator ei;
998 edge e;
1000 /* Define some of the fields for the target bb as well. */
1001 sp = candidate_table + trg;
1002 sp->is_valid = 1;
1003 sp->is_speculative = 0;
1004 sp->src_prob = 100;
1006 visited = sbitmap_alloc (last_basic_block - (INVALID_BLOCK + 1));
1008 for (i = trg + 1; i < current_nr_blocks; i++)
1010 sp = candidate_table + i;
1012 sp->is_valid = IS_DOMINATED (i, trg);
1013 if (sp->is_valid)
1015 sp->src_prob = GET_SRC_PROB (i, trg);
1016 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1019 if (sp->is_valid)
1021 split_edges (i, trg, &el);
1022 sp->is_speculative = (el.nr_members) ? 1 : 0;
1023 if (sp->is_speculative && !flag_schedule_speculative)
1024 sp->is_valid = 0;
1027 if (sp->is_valid)
1029 /* Compute split blocks and store them in bblst_table.
1030 The TO block of every split edge is a split block. */
1031 sp->split_bbs.first_member = &bblst_table[bblst_last];
1032 sp->split_bbs.nr_members = el.nr_members;
1033 for (j = 0; j < el.nr_members; bblst_last++, j++)
1034 bblst_table[bblst_last] = el.first_member[j]->dest;
1035 sp->update_bbs.first_member = &bblst_table[bblst_last];
1037 /* Compute update blocks and store them in bblst_table.
1038 For every split edge, look at the FROM block, and check
1039 all out edges. For each out edge that is not a split edge,
1040 add the TO block to the update block list. This list can end
1041 up with a lot of duplicates. We need to weed them out to avoid
1042 overrunning the end of the bblst_table. */
1044 update_idx = 0;
1045 sbitmap_zero (visited);
1046 for (j = 0; j < el.nr_members; j++)
1048 block = el.first_member[j]->src;
1049 FOR_EACH_EDGE (e, ei, block->succs)
1051 if (!TEST_BIT (visited,
1052 e->dest->index - (INVALID_BLOCK + 1)))
1054 for (k = 0; k < el.nr_members; k++)
1055 if (e == el.first_member[k])
1056 break;
1058 if (k >= el.nr_members)
1060 bblst_table[bblst_last++] = e->dest;
1061 SET_BIT (visited,
1062 e->dest->index - (INVALID_BLOCK + 1));
1063 update_idx++;
1068 sp->update_bbs.nr_members = update_idx;
1070 /* Make sure we didn't overrun the end of bblst_table. */
1071 gcc_assert (bblst_last <= bblst_size);
1073 else
1075 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1077 sp->is_speculative = 0;
1078 sp->src_prob = 0;
1082 sbitmap_free (visited);
1085 /* Print candidates info, for debugging purposes. Callable from debugger. */
1087 void
1088 debug_candidate (int i)
1090 if (!candidate_table[i].is_valid)
1091 return;
1093 if (candidate_table[i].is_speculative)
1095 int j;
1096 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1098 fprintf (sched_dump, "split path: ");
1099 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1101 int b = candidate_table[i].split_bbs.first_member[j]->index;
1103 fprintf (sched_dump, " %d ", b);
1105 fprintf (sched_dump, "\n");
1107 fprintf (sched_dump, "update path: ");
1108 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1110 int b = candidate_table[i].update_bbs.first_member[j]->index;
1112 fprintf (sched_dump, " %d ", b);
1114 fprintf (sched_dump, "\n");
1116 else
1118 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1122 /* Print candidates info, for debugging purposes. Callable from debugger. */
1124 void
1125 debug_candidates (int trg)
1127 int i;
1129 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1130 BB_TO_BLOCK (trg), trg);
1131 for (i = trg + 1; i < current_nr_blocks; i++)
1132 debug_candidate (i);
1135 /* Functions for speculative scheduling. */
1137 /* Return 0 if x is a set of a register alive in the beginning of one
1138 of the split-blocks of src, otherwise return 1. */
1140 static int
1141 check_live_1 (int src, rtx x)
1143 int i;
1144 int regno;
1145 rtx reg = SET_DEST (x);
1147 if (reg == 0)
1148 return 1;
1150 while (GET_CODE (reg) == SUBREG
1151 || GET_CODE (reg) == ZERO_EXTRACT
1152 || GET_CODE (reg) == STRICT_LOW_PART)
1153 reg = XEXP (reg, 0);
1155 if (GET_CODE (reg) == PARALLEL)
1157 int i;
1159 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1160 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1161 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1162 return 1;
1164 return 0;
1167 if (!REG_P (reg))
1168 return 1;
1170 regno = REGNO (reg);
1172 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1174 /* Global registers are assumed live. */
1175 return 0;
1177 else
1179 if (regno < FIRST_PSEUDO_REGISTER)
1181 /* Check for hard registers. */
1182 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1183 while (--j >= 0)
1185 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1187 basic_block b = candidate_table[src].split_bbs.first_member[i];
1189 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start,
1190 regno + j))
1192 return 0;
1197 else
1199 /* Check for pseudo registers. */
1200 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1202 basic_block b = candidate_table[src].split_bbs.first_member[i];
1204 if (REGNO_REG_SET_P (b->il.rtl->global_live_at_start, regno))
1206 return 0;
1212 return 1;
1215 /* If x is a set of a register R, mark that R is alive in the beginning
1216 of every update-block of src. */
1218 static void
1219 update_live_1 (int src, rtx x)
1221 int i;
1222 int regno;
1223 rtx reg = SET_DEST (x);
1225 if (reg == 0)
1226 return;
1228 while (GET_CODE (reg) == SUBREG
1229 || GET_CODE (reg) == ZERO_EXTRACT
1230 || GET_CODE (reg) == STRICT_LOW_PART)
1231 reg = XEXP (reg, 0);
1233 if (GET_CODE (reg) == PARALLEL)
1235 int i;
1237 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1238 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1239 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1241 return;
1244 if (!REG_P (reg))
1245 return;
1247 /* Global registers are always live, so the code below does not apply
1248 to them. */
1250 regno = REGNO (reg);
1252 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1254 if (regno < FIRST_PSEUDO_REGISTER)
1256 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1257 while (--j >= 0)
1259 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1261 basic_block b = candidate_table[src].update_bbs.first_member[i];
1263 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start,
1264 regno + j);
1268 else
1270 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1272 basic_block b = candidate_table[src].update_bbs.first_member[i];
1274 SET_REGNO_REG_SET (b->il.rtl->global_live_at_start, regno);
1280 /* Return 1 if insn can be speculatively moved from block src to trg,
1281 otherwise return 0. Called before first insertion of insn to
1282 ready-list or before the scheduling. */
1284 static int
1285 check_live (rtx insn, int src)
1287 /* Find the registers set by instruction. */
1288 if (GET_CODE (PATTERN (insn)) == SET
1289 || GET_CODE (PATTERN (insn)) == CLOBBER)
1290 return check_live_1 (src, PATTERN (insn));
1291 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1293 int j;
1294 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1295 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1296 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1297 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1298 return 0;
1300 return 1;
1303 return 1;
1306 /* Update the live registers info after insn was moved speculatively from
1307 block src to trg. */
1309 static void
1310 update_live (rtx insn, int src)
1312 /* Find the registers set by instruction. */
1313 if (GET_CODE (PATTERN (insn)) == SET
1314 || GET_CODE (PATTERN (insn)) == CLOBBER)
1315 update_live_1 (src, PATTERN (insn));
1316 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1318 int j;
1319 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1320 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1321 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1322 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1326 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1327 #define IS_REACHABLE(bb_from, bb_to) \
1328 (bb_from == bb_to \
1329 || IS_RGN_ENTRY (bb_from) \
1330 || (TEST_BIT (ancestor_edges[bb_to], \
1331 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1333 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1335 static void
1336 set_spec_fed (rtx load_insn)
1338 rtx link;
1340 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
1341 if (GET_MODE (link) == VOIDmode)
1342 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
1343 } /* set_spec_fed */
1345 /* On the path from the insn to load_insn_bb, find a conditional
1346 branch depending on insn, that guards the speculative load. */
1348 static int
1349 find_conditional_protection (rtx insn, int load_insn_bb)
1351 rtx link;
1353 /* Iterate through DEF-USE forward dependences. */
1354 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
1356 rtx next = XEXP (link, 0);
1357 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1358 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1359 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1360 && load_insn_bb != INSN_BB (next)
1361 && GET_MODE (link) == VOIDmode
1362 && (JUMP_P (next)
1363 || find_conditional_protection (next, load_insn_bb)))
1364 return 1;
1366 return 0;
1367 } /* find_conditional_protection */
1369 /* Returns 1 if the same insn1 that participates in the computation
1370 of load_insn's address is feeding a conditional branch that is
1371 guarding on load_insn. This is true if we find a the two DEF-USE
1372 chains:
1373 insn1 -> ... -> conditional-branch
1374 insn1 -> ... -> load_insn,
1375 and if a flow path exist:
1376 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1377 and if insn1 is on the path
1378 region-entry -> ... -> bb_trg -> ... load_insn.
1380 Locate insn1 by climbing on LOG_LINKS from load_insn.
1381 Locate the branch by following INSN_DEPEND from insn1. */
1383 static int
1384 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1386 rtx link;
1388 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
1390 rtx insn1 = XEXP (link, 0);
1392 /* Must be a DEF-USE dependence upon non-branch. */
1393 if (GET_MODE (link) != VOIDmode
1394 || JUMP_P (insn1))
1395 continue;
1397 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1398 if (INSN_BB (insn1) == bb_src
1399 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1400 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1401 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1402 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1403 continue;
1405 /* Now search for the conditional-branch. */
1406 if (find_conditional_protection (insn1, bb_src))
1407 return 1;
1409 /* Recursive step: search another insn1, "above" current insn1. */
1410 return is_conditionally_protected (insn1, bb_src, bb_trg);
1413 /* The chain does not exist. */
1414 return 0;
1415 } /* is_conditionally_protected */
1417 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1418 load_insn can move speculatively from bb_src to bb_trg. All the
1419 following must hold:
1421 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1422 (2) load_insn and load1 have a def-use dependence upon
1423 the same insn 'insn1'.
1424 (3) either load2 is in bb_trg, or:
1425 - there's only one split-block, and
1426 - load1 is on the escape path, and
1428 From all these we can conclude that the two loads access memory
1429 addresses that differ at most by a constant, and hence if moving
1430 load_insn would cause an exception, it would have been caused by
1431 load2 anyhow. */
1433 static int
1434 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1436 rtx back_link;
1437 candidate *candp = candidate_table + bb_src;
1439 if (candp->split_bbs.nr_members != 1)
1440 /* Must have exactly one escape block. */
1441 return 0;
1443 for (back_link = LOG_LINKS (load_insn);
1444 back_link; back_link = XEXP (back_link, 1))
1446 rtx insn1 = XEXP (back_link, 0);
1448 if (GET_MODE (back_link) == VOIDmode)
1450 /* Found a DEF-USE dependence (insn1, load_insn). */
1451 rtx fore_link;
1453 for (fore_link = INSN_DEPEND (insn1);
1454 fore_link; fore_link = XEXP (fore_link, 1))
1456 rtx insn2 = XEXP (fore_link, 0);
1457 if (GET_MODE (fore_link) == VOIDmode)
1459 /* Found a DEF-USE dependence (insn1, insn2). */
1460 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1461 /* insn2 not guaranteed to be a 1 base reg load. */
1462 continue;
1464 if (INSN_BB (insn2) == bb_trg)
1465 /* insn2 is the similar load, in the target block. */
1466 return 1;
1468 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1469 /* insn2 is a similar load, in a split-block. */
1470 return 1;
1476 /* Couldn't find a similar load. */
1477 return 0;
1478 } /* is_pfree */
1480 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1481 a load moved speculatively, or if load_insn is protected by
1482 a compare on load_insn's address). */
1484 static int
1485 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1487 if (FED_BY_SPEC_LOAD (load_insn))
1488 return 1;
1490 if (LOG_LINKS (load_insn) == NULL)
1491 /* Dependence may 'hide' out of the region. */
1492 return 1;
1494 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1495 return 1;
1497 return 0;
1500 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1501 Return 1 if insn is exception-free (and the motion is valid)
1502 and 0 otherwise. */
1504 static int
1505 is_exception_free (rtx insn, int bb_src, int bb_trg)
1507 int insn_class = haifa_classify_insn (insn);
1509 /* Handle non-load insns. */
1510 switch (insn_class)
1512 case TRAP_FREE:
1513 return 1;
1514 case TRAP_RISKY:
1515 return 0;
1516 default:;
1519 /* Handle loads. */
1520 if (!flag_schedule_speculative_load)
1521 return 0;
1522 IS_LOAD_INSN (insn) = 1;
1523 switch (insn_class)
1525 case IFREE:
1526 return (1);
1527 case IRISKY:
1528 return 0;
1529 case PFREE_CANDIDATE:
1530 if (is_pfree (insn, bb_src, bb_trg))
1531 return 1;
1532 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1533 case PRISKY_CANDIDATE:
1534 if (!flag_schedule_speculative_load_dangerous
1535 || is_prisky (insn, bb_src, bb_trg))
1536 return 0;
1537 break;
1538 default:;
1541 return flag_schedule_speculative_load_dangerous;
1544 /* The number of insns from the current block scheduled so far. */
1545 static int sched_target_n_insns;
1546 /* The number of insns from the current block to be scheduled in total. */
1547 static int target_n_insns;
1548 /* The number of insns from the entire region scheduled so far. */
1549 static int sched_n_insns;
1550 /* Nonzero if the last scheduled insn was a jump. */
1551 static int last_was_jump;
1553 /* Implementations of the sched_info functions for region scheduling. */
1554 static void init_ready_list (struct ready_list *);
1555 static int can_schedule_ready_p (rtx);
1556 static int new_ready (rtx);
1557 static int schedule_more_p (void);
1558 static const char *rgn_print_insn (rtx, int);
1559 static int rgn_rank (rtx, rtx);
1560 static int contributes_to_priority (rtx, rtx);
1561 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1563 /* Return nonzero if there are more insns that should be scheduled. */
1565 static int
1566 schedule_more_p (void)
1568 return ! last_was_jump && sched_target_n_insns < target_n_insns;
1571 /* Add all insns that are initially ready to the ready list READY. Called
1572 once before scheduling a set of insns. */
1574 static void
1575 init_ready_list (struct ready_list *ready)
1577 rtx prev_head = current_sched_info->prev_head;
1578 rtx next_tail = current_sched_info->next_tail;
1579 int bb_src;
1580 rtx insn;
1582 target_n_insns = 0;
1583 sched_target_n_insns = 0;
1584 sched_n_insns = 0;
1585 last_was_jump = 0;
1587 /* Print debugging information. */
1588 if (sched_verbose >= 5)
1589 debug_dependencies ();
1591 /* Prepare current target block info. */
1592 if (current_nr_blocks > 1)
1594 candidate_table = xmalloc (current_nr_blocks * sizeof (candidate));
1596 bblst_last = 0;
1597 /* bblst_table holds split blocks and update blocks for each block after
1598 the current one in the region. split blocks and update blocks are
1599 the TO blocks of region edges, so there can be at most rgn_nr_edges
1600 of them. */
1601 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1602 bblst_table = xmalloc (bblst_size * sizeof (basic_block));
1604 edgelst_last = 0;
1605 edgelst_table = xmalloc (rgn_nr_edges * sizeof (edge));
1607 compute_trg_info (target_bb);
1610 /* Initialize ready list with all 'ready' insns in target block.
1611 Count number of insns in the target block being scheduled. */
1612 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
1614 if (INSN_DEP_COUNT (insn) == 0)
1616 ready_add (ready, insn);
1618 if (targetm.sched.adjust_priority)
1619 INSN_PRIORITY (insn) =
1620 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1622 target_n_insns++;
1625 /* Add to ready list all 'ready' insns in valid source blocks.
1626 For speculative insns, check-live, exception-free, and
1627 issue-delay. */
1628 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
1629 if (IS_VALID (bb_src))
1631 rtx src_head;
1632 rtx src_next_tail;
1633 rtx tail, head;
1635 get_block_head_tail (BB_TO_BLOCK (bb_src), &head, &tail);
1636 src_next_tail = NEXT_INSN (tail);
1637 src_head = head;
1639 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
1641 if (! INSN_P (insn))
1642 continue;
1644 if (!CANT_MOVE (insn)
1645 && (!IS_SPECULATIVE_INSN (insn)
1646 || ((recog_memoized (insn) < 0
1647 || min_insn_conflict_delay (curr_state,
1648 insn, insn) <= 3)
1649 && check_live (insn, bb_src)
1650 && is_exception_free (insn, bb_src, target_bb))))
1651 if (INSN_DEP_COUNT (insn) == 0)
1653 ready_add (ready, insn);
1655 if (targetm.sched.adjust_priority)
1656 INSN_PRIORITY (insn) =
1657 targetm.sched.adjust_priority (insn, INSN_PRIORITY (insn));
1663 /* Called after taking INSN from the ready list. Returns nonzero if this
1664 insn can be scheduled, nonzero if we should silently discard it. */
1666 static int
1667 can_schedule_ready_p (rtx insn)
1669 if (JUMP_P (insn))
1670 last_was_jump = 1;
1672 /* An interblock motion? */
1673 if (INSN_BB (insn) != target_bb)
1675 basic_block b1;
1677 if (IS_SPECULATIVE_INSN (insn))
1679 if (!check_live (insn, INSN_BB (insn)))
1680 return 0;
1681 update_live (insn, INSN_BB (insn));
1683 /* For speculative load, mark insns fed by it. */
1684 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
1685 set_spec_fed (insn);
1687 nr_spec++;
1689 nr_inter++;
1691 /* Update source block boundaries. */
1692 b1 = BLOCK_FOR_INSN (insn);
1693 if (insn == BB_HEAD (b1) && insn == BB_END (b1))
1695 /* We moved all the insns in the basic block.
1696 Emit a note after the last insn and update the
1697 begin/end boundaries to point to the note. */
1698 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
1699 BB_HEAD (b1) = note;
1700 BB_END (b1) = note;
1702 else if (insn == BB_END (b1))
1704 /* We took insns from the end of the basic block,
1705 so update the end of block boundary so that it
1706 points to the first insn we did not move. */
1707 BB_END (b1) = PREV_INSN (insn);
1709 else if (insn == BB_HEAD (b1))
1711 /* We took insns from the start of the basic block,
1712 so update the start of block boundary so that
1713 it points to the first insn we did not move. */
1714 BB_HEAD (b1) = NEXT_INSN (insn);
1717 else
1719 /* In block motion. */
1720 sched_target_n_insns++;
1722 sched_n_insns++;
1724 return 1;
1727 /* Called after INSN has all its dependencies resolved. Return nonzero
1728 if it should be moved to the ready list or the queue, or zero if we
1729 should silently discard it. */
1730 static int
1731 new_ready (rtx next)
1733 /* For speculative insns, before inserting to ready/queue,
1734 check live, exception-free, and issue-delay. */
1735 if (INSN_BB (next) != target_bb
1736 && (!IS_VALID (INSN_BB (next))
1737 || CANT_MOVE (next)
1738 || (IS_SPECULATIVE_INSN (next)
1739 && ((recog_memoized (next) >= 0
1740 && min_insn_conflict_delay (curr_state, next, next) > 3)
1741 || !check_live (next, INSN_BB (next))
1742 || !is_exception_free (next, INSN_BB (next), target_bb)))))
1743 return 0;
1744 return 1;
1747 /* Return a string that contains the insn uid and optionally anything else
1748 necessary to identify this insn in an output. It's valid to use a
1749 static buffer for this. The ALIGNED parameter should cause the string
1750 to be formatted so that multiple output lines will line up nicely. */
1752 static const char *
1753 rgn_print_insn (rtx insn, int aligned)
1755 static char tmp[80];
1757 if (aligned)
1758 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
1759 else
1761 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
1762 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
1763 else
1764 sprintf (tmp, "%d", INSN_UID (insn));
1766 return tmp;
1769 /* Compare priority of two insns. Return a positive number if the second
1770 insn is to be preferred for scheduling, and a negative one if the first
1771 is to be preferred. Zero if they are equally good. */
1773 static int
1774 rgn_rank (rtx insn1, rtx insn2)
1776 /* Some comparison make sense in interblock scheduling only. */
1777 if (INSN_BB (insn1) != INSN_BB (insn2))
1779 int spec_val, prob_val;
1781 /* Prefer an inblock motion on an interblock motion. */
1782 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
1783 return 1;
1784 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
1785 return -1;
1787 /* Prefer a useful motion on a speculative one. */
1788 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
1789 if (spec_val)
1790 return spec_val;
1792 /* Prefer a more probable (speculative) insn. */
1793 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
1794 if (prob_val)
1795 return prob_val;
1797 return 0;
1800 /* NEXT is an instruction that depends on INSN (a backward dependence);
1801 return nonzero if we should include this dependence in priority
1802 calculations. */
1804 static int
1805 contributes_to_priority (rtx next, rtx insn)
1807 return BLOCK_NUM (next) == BLOCK_NUM (insn);
1810 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
1811 conditionally set before INSN. Store the set of registers that
1812 must be considered as used by this jump in USED and that of
1813 registers that must be considered as set in SET. */
1815 static void
1816 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
1817 regset cond_exec ATTRIBUTE_UNUSED,
1818 regset used ATTRIBUTE_UNUSED,
1819 regset set ATTRIBUTE_UNUSED)
1821 /* Nothing to do here, since we postprocess jumps in
1822 add_branch_dependences. */
1825 /* Used in schedule_insns to initialize current_sched_info for scheduling
1826 regions (or single basic blocks). */
1828 static struct sched_info region_sched_info =
1830 init_ready_list,
1831 can_schedule_ready_p,
1832 schedule_more_p,
1833 new_ready,
1834 rgn_rank,
1835 rgn_print_insn,
1836 contributes_to_priority,
1837 compute_jump_reg_dependencies,
1839 NULL, NULL,
1840 NULL, NULL,
1841 0, 0, 0
1844 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
1846 static bool
1847 sets_likely_spilled (rtx pat)
1849 bool ret = false;
1850 note_stores (pat, sets_likely_spilled_1, &ret);
1851 return ret;
1854 static void
1855 sets_likely_spilled_1 (rtx x, rtx pat, void *data)
1857 bool *ret = (bool *) data;
1859 if (GET_CODE (pat) == SET
1860 && REG_P (x)
1861 && REGNO (x) < FIRST_PSEUDO_REGISTER
1862 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
1863 *ret = true;
1866 /* Add dependences so that branches are scheduled to run last in their
1867 block. */
1869 static void
1870 add_branch_dependences (rtx head, rtx tail)
1872 rtx insn, last;
1874 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
1875 that can throw exceptions, force them to remain in order at the end of
1876 the block by adding dependencies and giving the last a high priority.
1877 There may be notes present, and prev_head may also be a note.
1879 Branches must obviously remain at the end. Calls should remain at the
1880 end since moving them results in worse register allocation. Uses remain
1881 at the end to ensure proper register allocation.
1883 cc0 setters remain at the end because they can't be moved away from
1884 their cc0 user.
1886 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
1888 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
1889 are not moved before reload because we can wind up with register
1890 allocation failures. */
1892 insn = tail;
1893 last = 0;
1894 while (CALL_P (insn)
1895 || JUMP_P (insn)
1896 || (NONJUMP_INSN_P (insn)
1897 && (GET_CODE (PATTERN (insn)) == USE
1898 || GET_CODE (PATTERN (insn)) == CLOBBER
1899 || can_throw_internal (insn)
1900 #ifdef HAVE_cc0
1901 || sets_cc0_p (PATTERN (insn))
1902 #endif
1903 || (!reload_completed
1904 && sets_likely_spilled (PATTERN (insn)))))
1905 || NOTE_P (insn))
1907 if (!NOTE_P (insn))
1909 if (last != 0 && !find_insn_list (insn, LOG_LINKS (last)))
1911 if (! sched_insns_conditions_mutex_p (last, insn))
1912 add_dependence (last, insn, REG_DEP_ANTI);
1913 INSN_REF_COUNT (insn)++;
1916 CANT_MOVE (insn) = 1;
1918 last = insn;
1921 /* Don't overrun the bounds of the basic block. */
1922 if (insn == head)
1923 break;
1925 insn = PREV_INSN (insn);
1928 /* Make sure these insns are scheduled last in their block. */
1929 insn = last;
1930 if (insn != 0)
1931 while (insn != head)
1933 insn = prev_nonnote_insn (insn);
1935 if (INSN_REF_COUNT (insn) != 0)
1936 continue;
1938 if (! sched_insns_conditions_mutex_p (last, insn))
1939 add_dependence (last, insn, REG_DEP_ANTI);
1940 INSN_REF_COUNT (insn) = 1;
1943 #ifdef HAVE_conditional_execution
1944 /* Finally, if the block ends in a jump, and we are doing intra-block
1945 scheduling, make sure that the branch depends on any COND_EXEC insns
1946 inside the block to avoid moving the COND_EXECs past the branch insn.
1948 We only have to do this after reload, because (1) before reload there
1949 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
1950 scheduler after reload.
1952 FIXME: We could in some cases move COND_EXEC insns past the branch if
1953 this scheduler would be a little smarter. Consider this code:
1955 T = [addr]
1956 C ? addr += 4
1957 !C ? X += 12
1958 C ? T += 1
1959 C ? jump foo
1961 On a target with a one cycle stall on a memory access the optimal
1962 sequence would be:
1964 T = [addr]
1965 C ? addr += 4
1966 C ? T += 1
1967 C ? jump foo
1968 !C ? X += 12
1970 We don't want to put the 'X += 12' before the branch because it just
1971 wastes a cycle of execution time when the branch is taken.
1973 Note that in the example "!C" will always be true. That is another
1974 possible improvement for handling COND_EXECs in this scheduler: it
1975 could remove always-true predicates. */
1977 if (!reload_completed || ! JUMP_P (tail))
1978 return;
1980 insn = tail;
1981 while (insn != head)
1983 insn = PREV_INSN (insn);
1985 /* Note that we want to add this dependency even when
1986 sched_insns_conditions_mutex_p returns true. The whole point
1987 is that we _want_ this dependency, even if these insns really
1988 are independent. */
1989 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
1990 add_dependence (tail, insn, REG_DEP_ANTI);
1992 #endif
1995 /* Data structures for the computation of data dependences in a regions. We
1996 keep one `deps' structure for every basic block. Before analyzing the
1997 data dependences for a bb, its variables are initialized as a function of
1998 the variables of its predecessors. When the analysis for a bb completes,
1999 we save the contents to the corresponding bb_deps[bb] variable. */
2001 static struct deps *bb_deps;
2003 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2005 static rtx
2006 concat_INSN_LIST (rtx copy, rtx old)
2008 rtx new = old;
2009 for (; copy ; copy = XEXP (copy, 1))
2010 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2011 return new;
2014 static void
2015 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2016 rtx *old_mems_p)
2018 rtx new_insns = *old_insns_p;
2019 rtx new_mems = *old_mems_p;
2021 while (copy_insns)
2023 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2024 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2025 copy_insns = XEXP (copy_insns, 1);
2026 copy_mems = XEXP (copy_mems, 1);
2029 *old_insns_p = new_insns;
2030 *old_mems_p = new_mems;
2033 /* After computing the dependencies for block BB, propagate the dependencies
2034 found in TMP_DEPS to the successors of the block. */
2035 static void
2036 propagate_deps (int bb, struct deps *pred_deps)
2038 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2039 edge_iterator ei;
2040 edge e;
2042 /* bb's structures are inherited by its successors. */
2043 FOR_EACH_EDGE (e, ei, block->succs)
2045 struct deps *succ_deps;
2046 unsigned reg;
2047 reg_set_iterator rsi;
2049 /* Only bbs "below" bb, in the same region, are interesting. */
2050 if (e->dest == EXIT_BLOCK_PTR
2051 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2052 || BLOCK_TO_BB (e->dest->index) <= bb)
2053 continue;
2055 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2057 /* The reg_last lists are inherited by successor. */
2058 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2060 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2061 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2063 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2064 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2065 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2066 succ_rl->clobbers);
2067 succ_rl->uses_length += pred_rl->uses_length;
2068 succ_rl->clobbers_length += pred_rl->clobbers_length;
2070 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2072 /* Mem read/write lists are inherited by successor. */
2073 concat_insn_mem_list (pred_deps->pending_read_insns,
2074 pred_deps->pending_read_mems,
2075 &succ_deps->pending_read_insns,
2076 &succ_deps->pending_read_mems);
2077 concat_insn_mem_list (pred_deps->pending_write_insns,
2078 pred_deps->pending_write_mems,
2079 &succ_deps->pending_write_insns,
2080 &succ_deps->pending_write_mems);
2082 succ_deps->last_pending_memory_flush
2083 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2084 succ_deps->last_pending_memory_flush);
2086 succ_deps->pending_lists_length += pred_deps->pending_lists_length;
2087 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2089 /* last_function_call is inherited by successor. */
2090 succ_deps->last_function_call
2091 = concat_INSN_LIST (pred_deps->last_function_call,
2092 succ_deps->last_function_call);
2094 /* sched_before_next_call is inherited by successor. */
2095 succ_deps->sched_before_next_call
2096 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2097 succ_deps->sched_before_next_call);
2100 /* These lists should point to the right place, for correct
2101 freeing later. */
2102 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2103 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2104 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2105 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2107 /* Can't allow these to be freed twice. */
2108 pred_deps->pending_read_insns = 0;
2109 pred_deps->pending_read_mems = 0;
2110 pred_deps->pending_write_insns = 0;
2111 pred_deps->pending_write_mems = 0;
2114 /* Compute backward dependences inside bb. In a multiple blocks region:
2115 (1) a bb is analyzed after its predecessors, and (2) the lists in
2116 effect at the end of bb (after analyzing for bb) are inherited by
2117 bb's successors.
2119 Specifically for reg-reg data dependences, the block insns are
2120 scanned by sched_analyze () top-to-bottom. Two lists are
2121 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2122 and reg_last[].uses for register USEs.
2124 When analysis is completed for bb, we update for its successors:
2125 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2126 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2128 The mechanism for computing mem-mem data dependence is very
2129 similar, and the result is interblock dependences in the region. */
2131 static void
2132 compute_block_backward_dependences (int bb)
2134 rtx head, tail;
2135 struct deps tmp_deps;
2137 tmp_deps = bb_deps[bb];
2139 /* Do the analysis for this block. */
2140 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2141 sched_analyze (&tmp_deps, head, tail);
2142 add_branch_dependences (head, tail);
2144 if (current_nr_blocks > 1)
2145 propagate_deps (bb, &tmp_deps);
2147 /* Free up the INSN_LISTs. */
2148 free_deps (&tmp_deps);
2151 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2152 them to the unused_*_list variables, so that they can be reused. */
2154 static void
2155 free_pending_lists (void)
2157 int bb;
2159 for (bb = 0; bb < current_nr_blocks; bb++)
2161 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2162 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2163 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2164 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2168 /* Print dependences for debugging, callable from debugger. */
2170 void
2171 debug_dependencies (void)
2173 int bb;
2175 fprintf (sched_dump, ";; --------------- forward dependences: ------------ \n");
2176 for (bb = 0; bb < current_nr_blocks; bb++)
2178 rtx head, tail;
2179 rtx next_tail;
2180 rtx insn;
2182 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2183 next_tail = NEXT_INSN (tail);
2184 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2185 BB_TO_BLOCK (bb), bb);
2187 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2188 "insn", "code", "bb", "dep", "prio", "cost",
2189 "reservation");
2190 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2191 "----", "----", "--", "---", "----", "----",
2192 "-----------");
2194 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2196 rtx link;
2198 if (! INSN_P (insn))
2200 int n;
2201 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2202 if (NOTE_P (insn))
2204 n = NOTE_LINE_NUMBER (insn);
2205 if (n < 0)
2206 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2207 else
2209 expanded_location xloc;
2210 NOTE_EXPANDED_LOCATION (xloc, insn);
2211 fprintf (sched_dump, "line %d, file %s\n",
2212 xloc.line, xloc.file);
2215 else
2216 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2217 continue;
2220 fprintf (sched_dump,
2221 ";; %s%5d%6d%6d%6d%6d%6d ",
2222 (SCHED_GROUP_P (insn) ? "+" : " "),
2223 INSN_UID (insn),
2224 INSN_CODE (insn),
2225 INSN_BB (insn),
2226 INSN_DEP_COUNT (insn),
2227 INSN_PRIORITY (insn),
2228 insn_cost (insn, 0, 0));
2230 if (recog_memoized (insn) < 0)
2231 fprintf (sched_dump, "nothing");
2232 else
2233 print_reservation (sched_dump, insn);
2235 fprintf (sched_dump, "\t: ");
2236 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2237 fprintf (sched_dump, "%d ", INSN_UID (XEXP (link, 0)));
2238 fprintf (sched_dump, "\n");
2241 fprintf (sched_dump, "\n");
2244 /* Returns true if all the basic blocks of the current region have
2245 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2246 static bool
2247 sched_is_disabled_for_current_region_p (void)
2249 int bb;
2251 for (bb = 0; bb < current_nr_blocks; bb++)
2252 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2253 return false;
2255 return true;
2258 /* Schedule a region. A region is either an inner loop, a loop-free
2259 subroutine, or a single basic block. Each bb in the region is
2260 scheduled after its flow predecessors. */
2262 static void
2263 schedule_region (int rgn)
2265 basic_block block;
2266 edge_iterator ei;
2267 edge e;
2268 int bb;
2269 int rgn_n_insns = 0;
2270 int sched_rgn_n_insns = 0;
2272 /* Set variables for the current region. */
2273 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2274 current_blocks = RGN_BLOCKS (rgn);
2276 /* Don't schedule region that is marked by
2277 NOTE_DISABLE_SCHED_OF_BLOCK. */
2278 if (sched_is_disabled_for_current_region_p ())
2279 return;
2281 init_deps_global ();
2283 /* Initializations for region data dependence analysis. */
2284 bb_deps = xmalloc (sizeof (struct deps) * current_nr_blocks);
2285 for (bb = 0; bb < current_nr_blocks; bb++)
2286 init_deps (bb_deps + bb);
2288 /* Compute LOG_LINKS. */
2289 for (bb = 0; bb < current_nr_blocks; bb++)
2290 compute_block_backward_dependences (bb);
2292 /* Compute INSN_DEPEND. */
2293 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
2295 rtx head, tail;
2296 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2298 compute_forward_dependences (head, tail);
2300 if (targetm.sched.dependencies_evaluation_hook)
2301 targetm.sched.dependencies_evaluation_hook (head, tail);
2305 /* Set priorities. */
2306 for (bb = 0; bb < current_nr_blocks; bb++)
2308 rtx head, tail;
2309 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2311 rgn_n_insns += set_priorities (head, tail);
2314 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2315 if (current_nr_blocks > 1)
2317 prob = xmalloc ((current_nr_blocks) * sizeof (float));
2319 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2320 sbitmap_vector_zero (dom, current_nr_blocks);
2322 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2323 rgn_nr_edges = 0;
2324 FOR_EACH_BB (block)
2326 if (CONTAINING_RGN (block->index) != rgn)
2327 continue;
2328 FOR_EACH_EDGE (e, ei, block->succs)
2329 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2332 rgn_edges = xmalloc (rgn_nr_edges * sizeof (edge));
2333 rgn_nr_edges = 0;
2334 FOR_EACH_BB (block)
2336 if (CONTAINING_RGN (block->index) != rgn)
2337 continue;
2338 FOR_EACH_EDGE (e, ei, block->succs)
2339 rgn_edges[rgn_nr_edges++] = e;
2342 /* Split edges. */
2343 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2344 sbitmap_vector_zero (pot_split, current_nr_blocks);
2345 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2346 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2348 /* Compute probabilities, dominators, split_edges. */
2349 for (bb = 0; bb < current_nr_blocks; bb++)
2350 compute_dom_prob_ps (bb);
2353 /* Now we can schedule all blocks. */
2354 for (bb = 0; bb < current_nr_blocks; bb++)
2356 rtx head, tail;
2357 int b = BB_TO_BLOCK (bb);
2359 get_block_head_tail (b, &head, &tail);
2361 if (no_real_insns_p (head, tail))
2362 continue;
2364 current_sched_info->prev_head = PREV_INSN (head);
2365 current_sched_info->next_tail = NEXT_INSN (tail);
2367 if (write_symbols != NO_DEBUG)
2369 save_line_notes (b, head, tail);
2370 rm_line_notes (head, tail);
2373 /* rm_other_notes only removes notes which are _inside_ the
2374 block---that is, it won't remove notes before the first real insn
2375 or after the last real insn of the block. So if the first insn
2376 has a REG_SAVE_NOTE which would otherwise be emitted before the
2377 insn, it is redundant with the note before the start of the
2378 block, and so we have to take it out. */
2379 if (INSN_P (head))
2381 rtx note;
2383 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2384 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2385 remove_note (head, note);
2388 /* Remove remaining note insns from the block, save them in
2389 note_list. These notes are restored at the end of
2390 schedule_block (). */
2391 rm_other_notes (head, tail);
2393 target_bb = bb;
2395 current_sched_info->queue_must_finish_empty
2396 = current_nr_blocks > 1 && !flag_schedule_interblock;
2398 schedule_block (b, rgn_n_insns);
2399 sched_rgn_n_insns += sched_n_insns;
2401 /* Update target block boundaries. */
2402 if (head == BB_HEAD (BASIC_BLOCK (b)))
2403 BB_HEAD (BASIC_BLOCK (b)) = current_sched_info->head;
2404 if (tail == BB_END (BASIC_BLOCK (b)))
2405 BB_END (BASIC_BLOCK (b)) = current_sched_info->tail;
2407 /* Clean up. */
2408 if (current_nr_blocks > 1)
2410 free (candidate_table);
2411 free (bblst_table);
2412 free (edgelst_table);
2416 /* Sanity check: verify that all region insns were scheduled. */
2417 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2419 /* Restore line notes. */
2420 if (write_symbols != NO_DEBUG)
2422 for (bb = 0; bb < current_nr_blocks; bb++)
2424 rtx head, tail;
2425 get_block_head_tail (BB_TO_BLOCK (bb), &head, &tail);
2426 restore_line_notes (head, tail);
2430 /* Done with this region. */
2431 free_pending_lists ();
2433 finish_deps_global ();
2435 free (bb_deps);
2437 if (current_nr_blocks > 1)
2439 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2440 FOR_EACH_BB (block)
2442 if (CONTAINING_RGN (block->index) != rgn)
2443 continue;
2444 FOR_EACH_EDGE (e, ei, block->succs)
2445 e->aux = NULL;
2448 free (prob);
2449 sbitmap_vector_free (dom);
2450 sbitmap_vector_free (pot_split);
2451 sbitmap_vector_free (ancestor_edges);
2452 free (rgn_edges);
2456 /* Indexed by region, holds the number of death notes found in that region.
2457 Used for consistency checks. */
2458 static int *deaths_in_region;
2460 /* Initialize data structures for region scheduling. */
2462 static void
2463 init_regions (void)
2465 sbitmap blocks;
2466 int rgn;
2468 nr_regions = 0;
2469 rgn_table = xmalloc ((n_basic_blocks) * sizeof (region));
2470 rgn_bb_table = xmalloc ((n_basic_blocks) * sizeof (int));
2471 block_to_bb = xmalloc ((last_basic_block) * sizeof (int));
2472 containing_rgn = xmalloc ((last_basic_block) * sizeof (int));
2474 /* Compute regions for scheduling. */
2475 if (reload_completed
2476 || n_basic_blocks == 1
2477 || !flag_schedule_interblock
2478 || is_cfg_nonregular ())
2480 find_single_block_region ();
2482 else
2484 /* Compute the dominators and post dominators. */
2485 calculate_dominance_info (CDI_DOMINATORS);
2487 /* Find regions. */
2488 find_rgns ();
2490 if (sched_verbose >= 3)
2491 debug_regions ();
2493 /* For now. This will move as more and more of haifa is converted
2494 to using the cfg code in flow.c. */
2495 free_dominance_info (CDI_DOMINATORS);
2499 if (CHECK_DEAD_NOTES)
2501 blocks = sbitmap_alloc (last_basic_block);
2502 deaths_in_region = xmalloc (sizeof (int) * nr_regions);
2503 /* Remove all death notes from the subroutine. */
2504 for (rgn = 0; rgn < nr_regions; rgn++)
2506 int b;
2508 sbitmap_zero (blocks);
2509 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
2510 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn) + b]);
2512 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
2514 sbitmap_free (blocks);
2516 else
2517 count_or_remove_death_notes (NULL, 1);
2520 /* The one entry point in this file. DUMP_FILE is the dump file for
2521 this pass. */
2523 void
2524 schedule_insns (FILE *dump_file)
2526 sbitmap large_region_blocks, blocks;
2527 int rgn;
2528 int any_large_regions;
2529 basic_block bb;
2531 /* Taking care of this degenerate case makes the rest of
2532 this code simpler. */
2533 if (n_basic_blocks == 0)
2534 return;
2536 nr_inter = 0;
2537 nr_spec = 0;
2539 sched_init (dump_file);
2541 init_regions ();
2543 current_sched_info = &region_sched_info;
2545 /* Schedule every region in the subroutine. */
2546 for (rgn = 0; rgn < nr_regions; rgn++)
2547 schedule_region (rgn);
2549 /* Update life analysis for the subroutine. Do single block regions
2550 first so that we can verify that live_at_start didn't change. Then
2551 do all other blocks. */
2552 /* ??? There is an outside possibility that update_life_info, or more
2553 to the point propagate_block, could get called with nonzero flags
2554 more than once for one basic block. This would be kinda bad if it
2555 were to happen, since REG_INFO would be accumulated twice for the
2556 block, and we'd have twice the REG_DEAD notes.
2558 I'm fairly certain that this _shouldn't_ happen, since I don't think
2559 that live_at_start should change at region heads. Not sure what the
2560 best way to test for this kind of thing... */
2562 allocate_reg_life_data ();
2563 compute_bb_for_insn ();
2565 any_large_regions = 0;
2566 large_region_blocks = sbitmap_alloc (last_basic_block);
2567 sbitmap_zero (large_region_blocks);
2568 FOR_EACH_BB (bb)
2569 SET_BIT (large_region_blocks, bb->index);
2571 blocks = sbitmap_alloc (last_basic_block);
2572 sbitmap_zero (blocks);
2574 /* Update life information. For regions consisting of multiple blocks
2575 we've possibly done interblock scheduling that affects global liveness.
2576 For regions consisting of single blocks we need to do only local
2577 liveness. */
2578 for (rgn = 0; rgn < nr_regions; rgn++)
2579 if (RGN_NR_BLOCKS (rgn) > 1)
2580 any_large_regions = 1;
2581 else
2583 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2584 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2587 /* Don't update reg info after reload, since that affects
2588 regs_ever_live, which should not change after reload. */
2589 update_life_info (blocks, UPDATE_LIFE_LOCAL,
2590 (reload_completed ? PROP_DEATH_NOTES
2591 : PROP_DEATH_NOTES | PROP_REG_INFO));
2592 if (any_large_regions)
2594 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
2595 PROP_DEATH_NOTES | PROP_REG_INFO);
2598 if (CHECK_DEAD_NOTES)
2600 /* Verify the counts of basic block notes in single the basic block
2601 regions. */
2602 for (rgn = 0; rgn < nr_regions; rgn++)
2603 if (RGN_NR_BLOCKS (rgn) == 1)
2605 sbitmap_zero (blocks);
2606 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
2608 gcc_assert (deaths_in_region[rgn]
2609 == count_or_remove_death_notes (blocks, 0));
2611 free (deaths_in_region);
2614 /* Reposition the prologue and epilogue notes in case we moved the
2615 prologue/epilogue insns. */
2616 if (reload_completed)
2617 reposition_prologue_and_epilogue_notes (get_insns ());
2619 /* Delete redundant line notes. */
2620 if (write_symbols != NO_DEBUG)
2621 rm_redundant_line_notes ();
2623 if (sched_verbose)
2625 if (reload_completed == 0 && flag_schedule_interblock)
2627 fprintf (sched_dump,
2628 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2629 nr_inter, nr_spec);
2631 else
2632 gcc_assert (nr_inter <= 0);
2633 fprintf (sched_dump, "\n\n");
2636 /* Clean up. */
2637 free (rgn_table);
2638 free (rgn_bb_table);
2639 free (block_to_bb);
2640 free (containing_rgn);
2642 sched_finish ();
2644 sbitmap_free (blocks);
2645 sbitmap_free (large_region_blocks);
2647 #endif
2649 static bool
2650 gate_handle_sched (void)
2652 #ifdef INSN_SCHEDULING
2653 return flag_schedule_insns;
2654 #else
2655 return 0;
2656 #endif
2659 /* Run instruction scheduler. */
2660 static void
2661 rest_of_handle_sched (void)
2663 #ifdef INSN_SCHEDULING
2664 /* Do control and data sched analysis,
2665 and write some of the results to dump file. */
2667 schedule_insns (dump_file);
2668 #endif
2671 static bool
2672 gate_handle_sched2 (void)
2674 #ifdef INSN_SCHEDULING
2675 return optimize > 0 && flag_schedule_insns_after_reload;
2676 #else
2677 return 0;
2678 #endif
2681 /* Run second scheduling pass after reload. */
2682 static void
2683 rest_of_handle_sched2 (void)
2685 #ifdef INSN_SCHEDULING
2686 /* Do control and data sched analysis again,
2687 and write some more of the results to dump file. */
2689 split_all_insns (1);
2691 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
2693 schedule_ebbs (dump_file);
2694 /* No liveness updating code yet, but it should be easy to do.
2695 reg-stack recomputes the liveness when needed for now. */
2696 count_or_remove_death_notes (NULL, 1);
2697 cleanup_cfg (CLEANUP_EXPENSIVE);
2699 else
2700 schedule_insns (dump_file);
2701 #endif
2704 struct tree_opt_pass pass_sched =
2706 "sched1", /* name */
2707 gate_handle_sched, /* gate */
2708 rest_of_handle_sched, /* execute */
2709 NULL, /* sub */
2710 NULL, /* next */
2711 0, /* static_pass_number */
2712 TV_SCHED, /* tv_id */
2713 0, /* properties_required */
2714 0, /* properties_provided */
2715 0, /* properties_destroyed */
2716 0, /* todo_flags_start */
2717 TODO_dump_func |
2718 TODO_ggc_collect, /* todo_flags_finish */
2719 'S' /* letter */
2722 struct tree_opt_pass pass_sched2 =
2724 "sched2", /* name */
2725 gate_handle_sched2, /* gate */
2726 rest_of_handle_sched2, /* execute */
2727 NULL, /* sub */
2728 NULL, /* next */
2729 0, /* static_pass_number */
2730 TV_SCHED2, /* tv_id */
2731 0, /* properties_required */
2732 0, /* properties_provided */
2733 0, /* properties_destroyed */
2734 0, /* todo_flags_start */
2735 TODO_dump_func |
2736 TODO_ggc_collect, /* todo_flags_finish */
2737 'R' /* letter */