PR c++/27177
[official-gcc.git] / gcc / sched-rgn.c
blobb340bd532e43e3d33517041ddc466a52e77520ed
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007
4 Free Software Foundation, Inc.
5 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
6 and currently maintained by, Jim Wilson (wilson@cygnus.com)
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 /* This pass implements list scheduling within basic blocks. It is
25 run twice: (1) after flow analysis, but before register allocation,
26 and (2) after register allocation.
28 The first run performs interblock scheduling, moving insns between
29 different blocks in the same "region", and the second runs only
30 basic block scheduling.
32 Interblock motions performed are useful motions and speculative
33 motions, including speculative loads. Motions requiring code
34 duplication are not supported. The identification of motion type
35 and the check for validity of speculative motions requires
36 construction and analysis of the function's control flow graph.
38 The main entry point for this pass is schedule_insns(), called for
39 each function. The work of the scheduler is organized in three
40 levels: (1) function level: insns are subject to splitting,
41 control-flow-graph is constructed, regions are computed (after
42 reload, each region is of one block), (2) region level: control
43 flow graph attributes required for interblock scheduling are
44 computed (dominators, reachability, etc.), data dependences and
45 priorities are computed, and (3) block level: insns in the block
46 are actually scheduled. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "tm.h"
52 #include "toplev.h"
53 #include "rtl.h"
54 #include "tm_p.h"
55 #include "hard-reg-set.h"
56 #include "regs.h"
57 #include "function.h"
58 #include "flags.h"
59 #include "insn-config.h"
60 #include "insn-attr.h"
61 #include "except.h"
62 #include "toplev.h"
63 #include "recog.h"
64 #include "cfglayout.h"
65 #include "params.h"
66 #include "sched-int.h"
67 #include "target.h"
68 #include "timevar.h"
69 #include "tree-pass.h"
70 #include "dbgcnt.h"
72 #ifdef INSN_SCHEDULING
73 /* Some accessor macros for h_i_d members only used within this file. */
74 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
75 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
76 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
78 /* nr_inter/spec counts interblock/speculative motion for the function. */
79 static int nr_inter, nr_spec;
81 static int is_cfg_nonregular (void);
82 static bool sched_is_disabled_for_current_region_p (void);
84 /* A region is the main entity for interblock scheduling: insns
85 are allowed to move between blocks in the same region, along
86 control flow graph edges, in the 'up' direction. */
87 typedef struct
89 /* Number of extended basic blocks in region. */
90 int rgn_nr_blocks;
91 /* cblocks in the region (actually index in rgn_bb_table). */
92 int rgn_blocks;
93 /* Dependencies for this region are already computed. Basically, indicates,
94 that this is a recovery block. */
95 unsigned int dont_calc_deps : 1;
96 /* This region has at least one non-trivial ebb. */
97 unsigned int has_real_ebb : 1;
99 region;
101 /* Number of regions in the procedure. */
102 static int nr_regions;
104 /* Table of region descriptions. */
105 static region *rgn_table;
107 /* Array of lists of regions' blocks. */
108 static int *rgn_bb_table;
110 /* Topological order of blocks in the region (if b2 is reachable from
111 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
112 always referred to by either block or b, while its topological
113 order name (in the region) is referred to by bb. */
114 static int *block_to_bb;
116 /* The number of the region containing a block. */
117 static int *containing_rgn;
119 /* The minimum probability of reaching a source block so that it will be
120 considered for speculative scheduling. */
121 static int min_spec_prob;
123 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
124 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
125 #define RGN_DONT_CALC_DEPS(rgn) (rgn_table[rgn].dont_calc_deps)
126 #define RGN_HAS_REAL_EBB(rgn) (rgn_table[rgn].has_real_ebb)
127 #define BLOCK_TO_BB(block) (block_to_bb[block])
128 #define CONTAINING_RGN(block) (containing_rgn[block])
130 void debug_regions (void);
131 static void find_single_block_region (void);
132 static void find_rgns (void);
133 static void extend_rgns (int *, int *, sbitmap, int *);
134 static bool too_large (int, int *, int *);
136 extern void debug_live (int, int);
138 /* Blocks of the current region being scheduled. */
139 static int current_nr_blocks;
140 static int current_blocks;
142 static int rgn_n_insns;
144 /* The mapping from ebb to block. */
145 /* ebb_head [i] - is index in rgn_bb_table, while
146 EBB_HEAD (i) - is basic block index.
147 BASIC_BLOCK (EBB_HEAD (i)) - head of ebb. */
148 #define BB_TO_BLOCK(ebb) (rgn_bb_table[ebb_head[ebb]])
149 #define EBB_FIRST_BB(ebb) BASIC_BLOCK (BB_TO_BLOCK (ebb))
150 #define EBB_LAST_BB(ebb) BASIC_BLOCK (rgn_bb_table[ebb_head[ebb + 1] - 1])
152 /* Target info declarations.
154 The block currently being scheduled is referred to as the "target" block,
155 while other blocks in the region from which insns can be moved to the
156 target are called "source" blocks. The candidate structure holds info
157 about such sources: are they valid? Speculative? Etc. */
158 typedef struct
160 basic_block *first_member;
161 int nr_members;
163 bblst;
165 typedef struct
167 char is_valid;
168 char is_speculative;
169 int src_prob;
170 bblst split_bbs;
171 bblst update_bbs;
173 candidate;
175 static candidate *candidate_table;
177 /* A speculative motion requires checking live information on the path
178 from 'source' to 'target'. The split blocks are those to be checked.
179 After a speculative motion, live information should be modified in
180 the 'update' blocks.
182 Lists of split and update blocks for each candidate of the current
183 target are in array bblst_table. */
184 static basic_block *bblst_table;
185 static int bblst_size, bblst_last;
187 #define IS_VALID(src) ( candidate_table[src].is_valid )
188 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
189 #define SRC_PROB(src) ( candidate_table[src].src_prob )
191 /* The bb being currently scheduled. */
192 static int target_bb;
194 /* List of edges. */
195 typedef struct
197 edge *first_member;
198 int nr_members;
200 edgelst;
202 static edge *edgelst_table;
203 static int edgelst_last;
205 static void extract_edgelst (sbitmap, edgelst *);
208 /* Target info functions. */
209 static void split_edges (int, int, edgelst *);
210 static void compute_trg_info (int);
211 void debug_candidate (int);
212 void debug_candidates (int);
214 /* Dominators array: dom[i] contains the sbitmap of dominators of
215 bb i in the region. */
216 static sbitmap *dom;
218 /* bb 0 is the only region entry. */
219 #define IS_RGN_ENTRY(bb) (!bb)
221 /* Is bb_src dominated by bb_trg. */
222 #define IS_DOMINATED(bb_src, bb_trg) \
223 ( TEST_BIT (dom[bb_src], bb_trg) )
225 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
226 the probability of bb i relative to the region entry. */
227 static int *prob;
229 /* Bit-set of edges, where bit i stands for edge i. */
230 typedef sbitmap edgeset;
232 /* Number of edges in the region. */
233 static int rgn_nr_edges;
235 /* Array of size rgn_nr_edges. */
236 static edge *rgn_edges;
238 /* Mapping from each edge in the graph to its number in the rgn. */
239 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
240 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
242 /* The split edges of a source bb is different for each target
243 bb. In order to compute this efficiently, the 'potential-split edges'
244 are computed for each bb prior to scheduling a region. This is actually
245 the split edges of each bb relative to the region entry.
247 pot_split[bb] is the set of potential split edges of bb. */
248 static edgeset *pot_split;
250 /* For every bb, a set of its ancestor edges. */
251 static edgeset *ancestor_edges;
253 /* Array of EBBs sizes. Currently we can get a ebb only through
254 splitting of currently scheduling block, therefore, we don't need
255 ebb_head array for every region, its sufficient to hold it only
256 for current one. */
257 static int *ebb_head;
259 static void compute_dom_prob_ps (int);
261 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
262 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
263 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
265 /* Speculative scheduling functions. */
266 static int check_live_1 (int, rtx);
267 static void update_live_1 (int, rtx);
268 static int check_live (rtx, int);
269 static void update_live (rtx, int);
270 static void set_spec_fed (rtx);
271 static int is_pfree (rtx, int, int);
272 static int find_conditional_protection (rtx, int);
273 static int is_conditionally_protected (rtx, int, int);
274 static int is_prisky (rtx, int, int);
275 static int is_exception_free (rtx, int, int);
277 static bool sets_likely_spilled (rtx);
278 static void sets_likely_spilled_1 (rtx, const_rtx, void *);
279 static void add_branch_dependences (rtx, rtx);
280 static void compute_block_dependences (int);
282 static void init_regions (void);
283 static void schedule_region (int);
284 static rtx concat_INSN_LIST (rtx, rtx);
285 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
286 static void propagate_deps (int, struct deps *);
287 static void free_pending_lists (void);
289 /* Functions for construction of the control flow graph. */
291 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
293 We decide not to build the control flow graph if there is possibly more
294 than one entry to the function, if computed branches exist, if we
295 have nonlocal gotos, or if we have an unreachable loop. */
297 static int
298 is_cfg_nonregular (void)
300 basic_block b;
301 rtx insn;
303 /* If we have a label that could be the target of a nonlocal goto, then
304 the cfg is not well structured. */
305 if (nonlocal_goto_handler_labels)
306 return 1;
308 /* If we have any forced labels, then the cfg is not well structured. */
309 if (forced_labels)
310 return 1;
312 /* If we have exception handlers, then we consider the cfg not well
313 structured. ?!? We should be able to handle this now that we
314 compute an accurate cfg for EH. */
315 if (current_function_has_exception_handlers ())
316 return 1;
318 /* If we have insns which refer to labels as non-jumped-to operands,
319 then we consider the cfg not well structured. */
320 FOR_EACH_BB (b)
321 FOR_BB_INSNS (b, insn)
323 rtx note, next, set, dest;
325 /* If this function has a computed jump, then we consider the cfg
326 not well structured. */
327 if (JUMP_P (insn) && computed_jump_p (insn))
328 return 1;
330 if (!INSN_P (insn))
331 continue;
333 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
334 if (note == NULL_RTX)
335 continue;
337 /* For that label not to be seen as a referred-to label, this
338 must be a single-set which is feeding a jump *only*. This
339 could be a conditional jump with the label split off for
340 machine-specific reasons or a casesi/tablejump. */
341 next = next_nonnote_insn (insn);
342 if (next == NULL_RTX
343 || !JUMP_P (next)
344 || (JUMP_LABEL (next) != XEXP (note, 0)
345 && find_reg_note (next, REG_LABEL_TARGET,
346 XEXP (note, 0)) == NULL_RTX)
347 || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
348 return 1;
350 set = single_set (insn);
351 if (set == NULL_RTX)
352 return 1;
354 dest = SET_DEST (set);
355 if (!REG_P (dest) || !dead_or_set_p (next, dest))
356 return 1;
359 /* Unreachable loops with more than one basic block are detected
360 during the DFS traversal in find_rgns.
362 Unreachable loops with a single block are detected here. This
363 test is redundant with the one in find_rgns, but it's much
364 cheaper to go ahead and catch the trivial case here. */
365 FOR_EACH_BB (b)
367 if (EDGE_COUNT (b->preds) == 0
368 || (single_pred_p (b)
369 && single_pred (b) == b))
370 return 1;
373 /* All the tests passed. Consider the cfg well structured. */
374 return 0;
377 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
379 static void
380 extract_edgelst (sbitmap set, edgelst *el)
382 unsigned int i = 0;
383 sbitmap_iterator sbi;
385 /* edgelst table space is reused in each call to extract_edgelst. */
386 edgelst_last = 0;
388 el->first_member = &edgelst_table[edgelst_last];
389 el->nr_members = 0;
391 /* Iterate over each word in the bitset. */
392 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
394 edgelst_table[edgelst_last++] = rgn_edges[i];
395 el->nr_members++;
399 /* Functions for the construction of regions. */
401 /* Print the regions, for debugging purposes. Callable from debugger. */
403 void
404 debug_regions (void)
406 int rgn, bb;
408 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
409 for (rgn = 0; rgn < nr_regions; rgn++)
411 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
412 rgn_table[rgn].rgn_nr_blocks);
413 fprintf (sched_dump, ";;\tbb/block: ");
415 /* We don't have ebb_head initialized yet, so we can't use
416 BB_TO_BLOCK (). */
417 current_blocks = RGN_BLOCKS (rgn);
419 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
420 fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
422 fprintf (sched_dump, "\n\n");
426 /* Build a single block region for each basic block in the function.
427 This allows for using the same code for interblock and basic block
428 scheduling. */
430 static void
431 find_single_block_region (void)
433 basic_block bb;
435 nr_regions = 0;
437 FOR_EACH_BB (bb)
439 rgn_bb_table[nr_regions] = bb->index;
440 RGN_NR_BLOCKS (nr_regions) = 1;
441 RGN_BLOCKS (nr_regions) = nr_regions;
442 RGN_DONT_CALC_DEPS (nr_regions) = 0;
443 RGN_HAS_REAL_EBB (nr_regions) = 0;
444 CONTAINING_RGN (bb->index) = nr_regions;
445 BLOCK_TO_BB (bb->index) = 0;
446 nr_regions++;
450 /* Update number of blocks and the estimate for number of insns
451 in the region. Return true if the region is "too large" for interblock
452 scheduling (compile time considerations). */
454 static bool
455 too_large (int block, int *num_bbs, int *num_insns)
457 (*num_bbs)++;
458 (*num_insns) += (INSN_LUID (BB_END (BASIC_BLOCK (block)))
459 - INSN_LUID (BB_HEAD (BASIC_BLOCK (block))));
461 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
462 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
465 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
466 is still an inner loop. Put in max_hdr[blk] the header of the most inner
467 loop containing blk. */
468 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
470 if (max_hdr[blk] == -1) \
471 max_hdr[blk] = hdr; \
472 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
473 RESET_BIT (inner, hdr); \
474 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
476 RESET_BIT (inner,max_hdr[blk]); \
477 max_hdr[blk] = hdr; \
481 /* Find regions for interblock scheduling.
483 A region for scheduling can be:
485 * A loop-free procedure, or
487 * A reducible inner loop, or
489 * A basic block not contained in any other region.
491 ?!? In theory we could build other regions based on extended basic
492 blocks or reverse extended basic blocks. Is it worth the trouble?
494 Loop blocks that form a region are put into the region's block list
495 in topological order.
497 This procedure stores its results into the following global (ick) variables
499 * rgn_nr
500 * rgn_table
501 * rgn_bb_table
502 * block_to_bb
503 * containing region
505 We use dominator relationships to avoid making regions out of non-reducible
506 loops.
508 This procedure needs to be converted to work on pred/succ lists instead
509 of edge tables. That would simplify it somewhat. */
511 static void
512 find_rgns (void)
514 int *max_hdr, *dfs_nr, *degree;
515 char no_loops = 1;
516 int node, child, loop_head, i, head, tail;
517 int count = 0, sp, idx = 0;
518 edge_iterator current_edge;
519 edge_iterator *stack;
520 int num_bbs, num_insns, unreachable;
521 int too_large_failure;
522 basic_block bb;
524 /* Note if a block is a natural loop header. */
525 sbitmap header;
527 /* Note if a block is a natural inner loop header. */
528 sbitmap inner;
530 /* Note if a block is in the block queue. */
531 sbitmap in_queue;
533 /* Note if a block is in the block queue. */
534 sbitmap in_stack;
536 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
537 and a mapping from block to its loop header (if the block is contained
538 in a loop, else -1).
540 Store results in HEADER, INNER, and MAX_HDR respectively, these will
541 be used as inputs to the second traversal.
543 STACK, SP and DFS_NR are only used during the first traversal. */
545 /* Allocate and initialize variables for the first traversal. */
546 max_hdr = XNEWVEC (int, last_basic_block);
547 dfs_nr = XCNEWVEC (int, last_basic_block);
548 stack = XNEWVEC (edge_iterator, n_edges);
550 inner = sbitmap_alloc (last_basic_block);
551 sbitmap_ones (inner);
553 header = sbitmap_alloc (last_basic_block);
554 sbitmap_zero (header);
556 in_queue = sbitmap_alloc (last_basic_block);
557 sbitmap_zero (in_queue);
559 in_stack = sbitmap_alloc (last_basic_block);
560 sbitmap_zero (in_stack);
562 for (i = 0; i < last_basic_block; i++)
563 max_hdr[i] = -1;
565 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
566 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
568 /* DFS traversal to find inner loops in the cfg. */
570 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
571 sp = -1;
573 while (1)
575 if (EDGE_PASSED (current_edge))
577 /* We have reached a leaf node or a node that was already
578 processed. Pop edges off the stack until we find
579 an edge that has not yet been processed. */
580 while (sp >= 0 && EDGE_PASSED (current_edge))
582 /* Pop entry off the stack. */
583 current_edge = stack[sp--];
584 node = ei_edge (current_edge)->src->index;
585 gcc_assert (node != ENTRY_BLOCK);
586 child = ei_edge (current_edge)->dest->index;
587 gcc_assert (child != EXIT_BLOCK);
588 RESET_BIT (in_stack, child);
589 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
590 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
591 ei_next (&current_edge);
594 /* See if have finished the DFS tree traversal. */
595 if (sp < 0 && EDGE_PASSED (current_edge))
596 break;
598 /* Nope, continue the traversal with the popped node. */
599 continue;
602 /* Process a node. */
603 node = ei_edge (current_edge)->src->index;
604 gcc_assert (node != ENTRY_BLOCK);
605 SET_BIT (in_stack, node);
606 dfs_nr[node] = ++count;
608 /* We don't traverse to the exit block. */
609 child = ei_edge (current_edge)->dest->index;
610 if (child == EXIT_BLOCK)
612 SET_EDGE_PASSED (current_edge);
613 ei_next (&current_edge);
614 continue;
617 /* If the successor is in the stack, then we've found a loop.
618 Mark the loop, if it is not a natural loop, then it will
619 be rejected during the second traversal. */
620 if (TEST_BIT (in_stack, child))
622 no_loops = 0;
623 SET_BIT (header, child);
624 UPDATE_LOOP_RELATIONS (node, child);
625 SET_EDGE_PASSED (current_edge);
626 ei_next (&current_edge);
627 continue;
630 /* If the child was already visited, then there is no need to visit
631 it again. Just update the loop relationships and restart
632 with a new edge. */
633 if (dfs_nr[child])
635 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
636 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
637 SET_EDGE_PASSED (current_edge);
638 ei_next (&current_edge);
639 continue;
642 /* Push an entry on the stack and continue DFS traversal. */
643 stack[++sp] = current_edge;
644 SET_EDGE_PASSED (current_edge);
645 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
648 /* Reset ->aux field used by EDGE_PASSED. */
649 FOR_ALL_BB (bb)
651 edge_iterator ei;
652 edge e;
653 FOR_EACH_EDGE (e, ei, bb->succs)
654 e->aux = NULL;
658 /* Another check for unreachable blocks. The earlier test in
659 is_cfg_nonregular only finds unreachable blocks that do not
660 form a loop.
662 The DFS traversal will mark every block that is reachable from
663 the entry node by placing a nonzero value in dfs_nr. Thus if
664 dfs_nr is zero for any block, then it must be unreachable. */
665 unreachable = 0;
666 FOR_EACH_BB (bb)
667 if (dfs_nr[bb->index] == 0)
669 unreachable = 1;
670 break;
673 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
674 to hold degree counts. */
675 degree = dfs_nr;
677 FOR_EACH_BB (bb)
678 degree[bb->index] = EDGE_COUNT (bb->preds);
680 /* Do not perform region scheduling if there are any unreachable
681 blocks. */
682 if (!unreachable)
684 int *queue, *degree1 = NULL;
685 /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
686 there basic blocks, which are forced to be region heads.
687 This is done to try to assemble few smaller regions
688 from a too_large region. */
689 sbitmap extended_rgn_header = NULL;
690 bool extend_regions_p;
692 if (no_loops)
693 SET_BIT (header, 0);
695 /* Second traversal:find reducible inner loops and topologically sort
696 block of each region. */
698 queue = XNEWVEC (int, n_basic_blocks);
700 extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
701 if (extend_regions_p)
703 degree1 = xmalloc (last_basic_block * sizeof (int));
704 extended_rgn_header = sbitmap_alloc (last_basic_block);
705 sbitmap_zero (extended_rgn_header);
708 /* Find blocks which are inner loop headers. We still have non-reducible
709 loops to consider at this point. */
710 FOR_EACH_BB (bb)
712 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
714 edge e;
715 edge_iterator ei;
716 basic_block jbb;
718 /* Now check that the loop is reducible. We do this separate
719 from finding inner loops so that we do not find a reducible
720 loop which contains an inner non-reducible loop.
722 A simple way to find reducible/natural loops is to verify
723 that each block in the loop is dominated by the loop
724 header.
726 If there exists a block that is not dominated by the loop
727 header, then the block is reachable from outside the loop
728 and thus the loop is not a natural loop. */
729 FOR_EACH_BB (jbb)
731 /* First identify blocks in the loop, except for the loop
732 entry block. */
733 if (bb->index == max_hdr[jbb->index] && bb != jbb)
735 /* Now verify that the block is dominated by the loop
736 header. */
737 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
738 break;
742 /* If we exited the loop early, then I is the header of
743 a non-reducible loop and we should quit processing it
744 now. */
745 if (jbb != EXIT_BLOCK_PTR)
746 continue;
748 /* I is a header of an inner loop, or block 0 in a subroutine
749 with no loops at all. */
750 head = tail = -1;
751 too_large_failure = 0;
752 loop_head = max_hdr[bb->index];
754 if (extend_regions_p)
755 /* We save degree in case when we meet a too_large region
756 and cancel it. We need a correct degree later when
757 calling extend_rgns. */
758 memcpy (degree1, degree, last_basic_block * sizeof (int));
760 /* Decrease degree of all I's successors for topological
761 ordering. */
762 FOR_EACH_EDGE (e, ei, bb->succs)
763 if (e->dest != EXIT_BLOCK_PTR)
764 --degree[e->dest->index];
766 /* Estimate # insns, and count # blocks in the region. */
767 num_bbs = 1;
768 num_insns = (INSN_LUID (BB_END (bb))
769 - INSN_LUID (BB_HEAD (bb)));
771 /* Find all loop latches (blocks with back edges to the loop
772 header) or all the leaf blocks in the cfg has no loops.
774 Place those blocks into the queue. */
775 if (no_loops)
777 FOR_EACH_BB (jbb)
778 /* Leaf nodes have only a single successor which must
779 be EXIT_BLOCK. */
780 if (single_succ_p (jbb)
781 && single_succ (jbb) == EXIT_BLOCK_PTR)
783 queue[++tail] = jbb->index;
784 SET_BIT (in_queue, jbb->index);
786 if (too_large (jbb->index, &num_bbs, &num_insns))
788 too_large_failure = 1;
789 break;
793 else
795 edge e;
797 FOR_EACH_EDGE (e, ei, bb->preds)
799 if (e->src == ENTRY_BLOCK_PTR)
800 continue;
802 node = e->src->index;
804 if (max_hdr[node] == loop_head && node != bb->index)
806 /* This is a loop latch. */
807 queue[++tail] = node;
808 SET_BIT (in_queue, node);
810 if (too_large (node, &num_bbs, &num_insns))
812 too_large_failure = 1;
813 break;
819 /* Now add all the blocks in the loop to the queue.
821 We know the loop is a natural loop; however the algorithm
822 above will not always mark certain blocks as being in the
823 loop. Consider:
824 node children
825 a b,c
827 c a,d
830 The algorithm in the DFS traversal may not mark B & D as part
831 of the loop (i.e. they will not have max_hdr set to A).
833 We know they can not be loop latches (else they would have
834 had max_hdr set since they'd have a backedge to a dominator
835 block). So we don't need them on the initial queue.
837 We know they are part of the loop because they are dominated
838 by the loop header and can be reached by a backwards walk of
839 the edges starting with nodes on the initial queue.
841 It is safe and desirable to include those nodes in the
842 loop/scheduling region. To do so we would need to decrease
843 the degree of a node if it is the target of a backedge
844 within the loop itself as the node is placed in the queue.
846 We do not do this because I'm not sure that the actual
847 scheduling code will properly handle this case. ?!? */
849 while (head < tail && !too_large_failure)
851 edge e;
852 child = queue[++head];
854 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
856 node = e->src->index;
858 /* See discussion above about nodes not marked as in
859 this loop during the initial DFS traversal. */
860 if (e->src == ENTRY_BLOCK_PTR
861 || max_hdr[node] != loop_head)
863 tail = -1;
864 break;
866 else if (!TEST_BIT (in_queue, node) && node != bb->index)
868 queue[++tail] = node;
869 SET_BIT (in_queue, node);
871 if (too_large (node, &num_bbs, &num_insns))
873 too_large_failure = 1;
874 break;
880 if (tail >= 0 && !too_large_failure)
882 /* Place the loop header into list of region blocks. */
883 degree[bb->index] = -1;
884 rgn_bb_table[idx] = bb->index;
885 RGN_NR_BLOCKS (nr_regions) = num_bbs;
886 RGN_BLOCKS (nr_regions) = idx++;
887 RGN_DONT_CALC_DEPS (nr_regions) = 0;
888 RGN_HAS_REAL_EBB (nr_regions) = 0;
889 CONTAINING_RGN (bb->index) = nr_regions;
890 BLOCK_TO_BB (bb->index) = count = 0;
892 /* Remove blocks from queue[] when their in degree
893 becomes zero. Repeat until no blocks are left on the
894 list. This produces a topological list of blocks in
895 the region. */
896 while (tail >= 0)
898 if (head < 0)
899 head = tail;
900 child = queue[head];
901 if (degree[child] == 0)
903 edge e;
905 degree[child] = -1;
906 rgn_bb_table[idx++] = child;
907 BLOCK_TO_BB (child) = ++count;
908 CONTAINING_RGN (child) = nr_regions;
909 queue[head] = queue[tail--];
911 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
912 if (e->dest != EXIT_BLOCK_PTR)
913 --degree[e->dest->index];
915 else
916 --head;
918 ++nr_regions;
920 else if (extend_regions_p)
922 /* Restore DEGREE. */
923 int *t = degree;
925 degree = degree1;
926 degree1 = t;
928 /* And force successors of BB to be region heads.
929 This may provide several smaller regions instead
930 of one too_large region. */
931 FOR_EACH_EDGE (e, ei, bb->succs)
932 if (e->dest != EXIT_BLOCK_PTR)
933 SET_BIT (extended_rgn_header, e->dest->index);
937 free (queue);
939 if (extend_regions_p)
941 free (degree1);
943 sbitmap_a_or_b (header, header, extended_rgn_header);
944 sbitmap_free (extended_rgn_header);
946 extend_rgns (degree, &idx, header, max_hdr);
950 /* Any block that did not end up in a region is placed into a region
951 by itself. */
952 FOR_EACH_BB (bb)
953 if (degree[bb->index] >= 0)
955 rgn_bb_table[idx] = bb->index;
956 RGN_NR_BLOCKS (nr_regions) = 1;
957 RGN_BLOCKS (nr_regions) = idx++;
958 RGN_DONT_CALC_DEPS (nr_regions) = 0;
959 RGN_HAS_REAL_EBB (nr_regions) = 0;
960 CONTAINING_RGN (bb->index) = nr_regions++;
961 BLOCK_TO_BB (bb->index) = 0;
964 free (max_hdr);
965 free (degree);
966 free (stack);
967 sbitmap_free (header);
968 sbitmap_free (inner);
969 sbitmap_free (in_queue);
970 sbitmap_free (in_stack);
973 static int gather_region_statistics (int **);
974 static void print_region_statistics (int *, int, int *, int);
976 /* Calculate the histogram that shows the number of regions having the
977 given number of basic blocks, and store it in the RSP array. Return
978 the size of this array. */
979 static int
980 gather_region_statistics (int **rsp)
982 int i, *a = 0, a_sz = 0;
984 /* a[i] is the number of regions that have (i + 1) basic blocks. */
985 for (i = 0; i < nr_regions; i++)
987 int nr_blocks = RGN_NR_BLOCKS (i);
989 gcc_assert (nr_blocks >= 1);
991 if (nr_blocks > a_sz)
993 a = xrealloc (a, nr_blocks * sizeof (*a));
995 a[a_sz++] = 0;
996 while (a_sz != nr_blocks);
999 a[nr_blocks - 1]++;
1002 *rsp = a;
1003 return a_sz;
1006 /* Print regions statistics. S1 and S2 denote the data before and after
1007 calling extend_rgns, respectively. */
1008 static void
1009 print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
1011 int i;
1013 /* We iterate until s2_sz because extend_rgns does not decrease
1014 the maximal region size. */
1015 for (i = 1; i < s2_sz; i++)
1017 int n1, n2;
1019 n2 = s2[i];
1021 if (n2 == 0)
1022 continue;
1024 if (i >= s1_sz)
1025 n1 = 0;
1026 else
1027 n1 = s1[i];
1029 fprintf (sched_dump, ";; Region extension statistics: size %d: " \
1030 "was %d + %d more\n", i + 1, n1, n2 - n1);
1034 /* Extend regions.
1035 DEGREE - Array of incoming edge count, considering only
1036 the edges, that don't have their sources in formed regions yet.
1037 IDXP - pointer to the next available index in rgn_bb_table.
1038 HEADER - set of all region heads.
1039 LOOP_HDR - mapping from block to the containing loop
1040 (two blocks can reside within one region if they have
1041 the same loop header). */
1042 static void
1043 extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
1045 int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
1046 int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
1048 max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
1050 max_hdr = xmalloc (last_basic_block * sizeof (*max_hdr));
1052 order = xmalloc (last_basic_block * sizeof (*order));
1053 post_order_compute (order, false, false);
1055 for (i = nblocks - 1; i >= 0; i--)
1057 int bbn = order[i];
1058 if (degree[bbn] >= 0)
1060 max_hdr[bbn] = bbn;
1061 rescan = 1;
1063 else
1064 /* This block already was processed in find_rgns. */
1065 max_hdr[bbn] = -1;
1068 /* The idea is to topologically walk through CFG in top-down order.
1069 During the traversal, if all the predecessors of a node are
1070 marked to be in the same region (they all have the same max_hdr),
1071 then current node is also marked to be a part of that region.
1072 Otherwise the node starts its own region.
1073 CFG should be traversed until no further changes are made. On each
1074 iteration the set of the region heads is extended (the set of those
1075 blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
1076 set of all basic blocks, thus the algorithm is guaranteed to terminate. */
1078 while (rescan && iter < max_iter)
1080 rescan = 0;
1082 for (i = nblocks - 1; i >= 0; i--)
1084 edge e;
1085 edge_iterator ei;
1086 int bbn = order[i];
1088 if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
1090 int hdr = -1;
1092 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
1094 int predn = e->src->index;
1096 if (predn != ENTRY_BLOCK
1097 /* If pred wasn't processed in find_rgns. */
1098 && max_hdr[predn] != -1
1099 /* And pred and bb reside in the same loop.
1100 (Or out of any loop). */
1101 && loop_hdr[bbn] == loop_hdr[predn])
1103 if (hdr == -1)
1104 /* Then bb extends the containing region of pred. */
1105 hdr = max_hdr[predn];
1106 else if (hdr != max_hdr[predn])
1107 /* Too bad, there are at least two predecessors
1108 that reside in different regions. Thus, BB should
1109 begin its own region. */
1111 hdr = bbn;
1112 break;
1115 else
1116 /* BB starts its own region. */
1118 hdr = bbn;
1119 break;
1123 if (hdr == bbn)
1125 /* If BB start its own region,
1126 update set of headers with BB. */
1127 SET_BIT (header, bbn);
1128 rescan = 1;
1130 else
1131 gcc_assert (hdr != -1);
1133 max_hdr[bbn] = hdr;
1137 iter++;
1140 /* Statistics were gathered on the SPEC2000 package of tests with
1141 mainline weekly snapshot gcc-4.1-20051015 on ia64.
1143 Statistics for SPECint:
1144 1 iteration : 1751 cases (38.7%)
1145 2 iterations: 2770 cases (61.3%)
1146 Blocks wrapped in regions by find_rgns without extension: 18295 blocks
1147 Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
1148 (We don't count single block regions here).
1150 Statistics for SPECfp:
1151 1 iteration : 621 cases (35.9%)
1152 2 iterations: 1110 cases (64.1%)
1153 Blocks wrapped in regions by find_rgns without extension: 6476 blocks
1154 Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
1155 (We don't count single block regions here).
1157 By default we do at most 2 iterations.
1158 This can be overridden with max-sched-extend-regions-iters parameter:
1159 0 - disable region extension,
1160 N > 0 - do at most N iterations. */
1162 if (sched_verbose && iter != 0)
1163 fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
1164 rescan ? "... failed" : "");
1166 if (!rescan && iter != 0)
1168 int *s1 = NULL, s1_sz = 0;
1170 /* Save the old statistics for later printout. */
1171 if (sched_verbose >= 6)
1172 s1_sz = gather_region_statistics (&s1);
1174 /* We have succeeded. Now assemble the regions. */
1175 for (i = nblocks - 1; i >= 0; i--)
1177 int bbn = order[i];
1179 if (max_hdr[bbn] == bbn)
1180 /* BBN is a region head. */
1182 edge e;
1183 edge_iterator ei;
1184 int num_bbs = 0, j, num_insns = 0, large;
1186 large = too_large (bbn, &num_bbs, &num_insns);
1188 degree[bbn] = -1;
1189 rgn_bb_table[idx] = bbn;
1190 RGN_BLOCKS (nr_regions) = idx++;
1191 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1192 RGN_HAS_REAL_EBB (nr_regions) = 0;
1193 CONTAINING_RGN (bbn) = nr_regions;
1194 BLOCK_TO_BB (bbn) = 0;
1196 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
1197 if (e->dest != EXIT_BLOCK_PTR)
1198 degree[e->dest->index]--;
1200 if (!large)
1201 /* Here we check whether the region is too_large. */
1202 for (j = i - 1; j >= 0; j--)
1204 int succn = order[j];
1205 if (max_hdr[succn] == bbn)
1207 if ((large = too_large (succn, &num_bbs, &num_insns)))
1208 break;
1212 if (large)
1213 /* If the region is too_large, then wrap every block of
1214 the region into single block region.
1215 Here we wrap region head only. Other blocks are
1216 processed in the below cycle. */
1218 RGN_NR_BLOCKS (nr_regions) = 1;
1219 nr_regions++;
1222 num_bbs = 1;
1224 for (j = i - 1; j >= 0; j--)
1226 int succn = order[j];
1228 if (max_hdr[succn] == bbn)
1229 /* This cycle iterates over all basic blocks, that
1230 are supposed to be in the region with head BBN,
1231 and wraps them into that region (or in single
1232 block region). */
1234 gcc_assert (degree[succn] == 0);
1236 degree[succn] = -1;
1237 rgn_bb_table[idx] = succn;
1238 BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
1239 CONTAINING_RGN (succn) = nr_regions;
1241 if (large)
1242 /* Wrap SUCCN into single block region. */
1244 RGN_BLOCKS (nr_regions) = idx;
1245 RGN_NR_BLOCKS (nr_regions) = 1;
1246 RGN_DONT_CALC_DEPS (nr_regions) = 0;
1247 RGN_HAS_REAL_EBB (nr_regions) = 0;
1248 nr_regions++;
1251 idx++;
1253 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
1254 if (e->dest != EXIT_BLOCK_PTR)
1255 degree[e->dest->index]--;
1259 if (!large)
1261 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1262 nr_regions++;
1267 if (sched_verbose >= 6)
1269 int *s2, s2_sz;
1271 /* Get the new statistics and print the comparison with the
1272 one before calling this function. */
1273 s2_sz = gather_region_statistics (&s2);
1274 print_region_statistics (s1, s1_sz, s2, s2_sz);
1275 free (s1);
1276 free (s2);
1280 free (order);
1281 free (max_hdr);
1283 *idxp = idx;
1286 /* Functions for regions scheduling information. */
1288 /* Compute dominators, probability, and potential-split-edges of bb.
1289 Assume that these values were already computed for bb's predecessors. */
1291 static void
1292 compute_dom_prob_ps (int bb)
1294 edge_iterator in_ei;
1295 edge in_edge;
1297 /* We shouldn't have any real ebbs yet. */
1298 gcc_assert (ebb_head [bb] == bb + current_blocks);
1300 if (IS_RGN_ENTRY (bb))
1302 SET_BIT (dom[bb], 0);
1303 prob[bb] = REG_BR_PROB_BASE;
1304 return;
1307 prob[bb] = 0;
1309 /* Initialize dom[bb] to '111..1'. */
1310 sbitmap_ones (dom[bb]);
1312 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
1314 int pred_bb;
1315 edge out_edge;
1316 edge_iterator out_ei;
1318 if (in_edge->src == ENTRY_BLOCK_PTR)
1319 continue;
1321 pred_bb = BLOCK_TO_BB (in_edge->src->index);
1322 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
1323 sbitmap_a_or_b (ancestor_edges[bb],
1324 ancestor_edges[bb], ancestor_edges[pred_bb]);
1326 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
1328 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
1330 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
1331 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
1333 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
1336 SET_BIT (dom[bb], bb);
1337 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
1339 if (sched_verbose >= 2)
1340 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
1341 (100 * prob[bb]) / REG_BR_PROB_BASE);
1344 /* Functions for target info. */
1346 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1347 Note that bb_trg dominates bb_src. */
1349 static void
1350 split_edges (int bb_src, int bb_trg, edgelst *bl)
1352 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
1353 sbitmap_copy (src, pot_split[bb_src]);
1355 sbitmap_difference (src, src, pot_split[bb_trg]);
1356 extract_edgelst (src, bl);
1357 sbitmap_free (src);
1360 /* Find the valid candidate-source-blocks for the target block TRG, compute
1361 their probability, and check if they are speculative or not.
1362 For speculative sources, compute their update-blocks and split-blocks. */
1364 static void
1365 compute_trg_info (int trg)
1367 candidate *sp;
1368 edgelst el = { NULL, 0 };
1369 int i, j, k, update_idx;
1370 basic_block block;
1371 sbitmap visited;
1372 edge_iterator ei;
1373 edge e;
1375 /* Define some of the fields for the target bb as well. */
1376 sp = candidate_table + trg;
1377 sp->is_valid = 1;
1378 sp->is_speculative = 0;
1379 sp->src_prob = REG_BR_PROB_BASE;
1381 visited = sbitmap_alloc (last_basic_block);
1383 for (i = trg + 1; i < current_nr_blocks; i++)
1385 sp = candidate_table + i;
1387 sp->is_valid = IS_DOMINATED (i, trg);
1388 if (sp->is_valid)
1390 int tf = prob[trg], cf = prob[i];
1392 /* In CFGs with low probability edges TF can possibly be zero. */
1393 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
1394 sp->is_valid = (sp->src_prob >= min_spec_prob);
1397 if (sp->is_valid)
1399 split_edges (i, trg, &el);
1400 sp->is_speculative = (el.nr_members) ? 1 : 0;
1401 if (sp->is_speculative && !flag_schedule_speculative)
1402 sp->is_valid = 0;
1405 if (sp->is_valid)
1407 /* Compute split blocks and store them in bblst_table.
1408 The TO block of every split edge is a split block. */
1409 sp->split_bbs.first_member = &bblst_table[bblst_last];
1410 sp->split_bbs.nr_members = el.nr_members;
1411 for (j = 0; j < el.nr_members; bblst_last++, j++)
1412 bblst_table[bblst_last] = el.first_member[j]->dest;
1413 sp->update_bbs.first_member = &bblst_table[bblst_last];
1415 /* Compute update blocks and store them in bblst_table.
1416 For every split edge, look at the FROM block, and check
1417 all out edges. For each out edge that is not a split edge,
1418 add the TO block to the update block list. This list can end
1419 up with a lot of duplicates. We need to weed them out to avoid
1420 overrunning the end of the bblst_table. */
1422 update_idx = 0;
1423 sbitmap_zero (visited);
1424 for (j = 0; j < el.nr_members; j++)
1426 block = el.first_member[j]->src;
1427 FOR_EACH_EDGE (e, ei, block->succs)
1429 if (!TEST_BIT (visited, e->dest->index))
1431 for (k = 0; k < el.nr_members; k++)
1432 if (e == el.first_member[k])
1433 break;
1435 if (k >= el.nr_members)
1437 bblst_table[bblst_last++] = e->dest;
1438 SET_BIT (visited, e->dest->index);
1439 update_idx++;
1444 sp->update_bbs.nr_members = update_idx;
1446 /* Make sure we didn't overrun the end of bblst_table. */
1447 gcc_assert (bblst_last <= bblst_size);
1449 else
1451 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1453 sp->is_speculative = 0;
1454 sp->src_prob = 0;
1458 sbitmap_free (visited);
1461 /* Print candidates info, for debugging purposes. Callable from debugger. */
1463 void
1464 debug_candidate (int i)
1466 if (!candidate_table[i].is_valid)
1467 return;
1469 if (candidate_table[i].is_speculative)
1471 int j;
1472 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1474 fprintf (sched_dump, "split path: ");
1475 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1477 int b = candidate_table[i].split_bbs.first_member[j]->index;
1479 fprintf (sched_dump, " %d ", b);
1481 fprintf (sched_dump, "\n");
1483 fprintf (sched_dump, "update path: ");
1484 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
1486 int b = candidate_table[i].update_bbs.first_member[j]->index;
1488 fprintf (sched_dump, " %d ", b);
1490 fprintf (sched_dump, "\n");
1492 else
1494 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
1498 /* Print candidates info, for debugging purposes. Callable from debugger. */
1500 void
1501 debug_candidates (int trg)
1503 int i;
1505 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
1506 BB_TO_BLOCK (trg), trg);
1507 for (i = trg + 1; i < current_nr_blocks; i++)
1508 debug_candidate (i);
1511 /* Functions for speculative scheduling. */
1513 static bitmap_head not_in_df;
1515 /* Return 0 if x is a set of a register alive in the beginning of one
1516 of the split-blocks of src, otherwise return 1. */
1518 static int
1519 check_live_1 (int src, rtx x)
1521 int i;
1522 int regno;
1523 rtx reg = SET_DEST (x);
1525 if (reg == 0)
1526 return 1;
1528 while (GET_CODE (reg) == SUBREG
1529 || GET_CODE (reg) == ZERO_EXTRACT
1530 || GET_CODE (reg) == STRICT_LOW_PART)
1531 reg = XEXP (reg, 0);
1533 if (GET_CODE (reg) == PARALLEL)
1535 int i;
1537 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1538 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1539 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
1540 return 1;
1542 return 0;
1545 if (!REG_P (reg))
1546 return 1;
1548 regno = REGNO (reg);
1550 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
1552 /* Global registers are assumed live. */
1553 return 0;
1555 else
1557 if (regno < FIRST_PSEUDO_REGISTER)
1559 /* Check for hard registers. */
1560 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1561 while (--j >= 0)
1563 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1565 basic_block b = candidate_table[src].split_bbs.first_member[i];
1566 int t = bitmap_bit_p (&not_in_df, b->index);
1568 /* We can have split blocks, that were recently generated.
1569 such blocks are always outside current region. */
1570 gcc_assert (!t || (CONTAINING_RGN (b->index)
1571 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1573 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
1574 return 0;
1578 else
1580 /* Check for pseudo registers. */
1581 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
1583 basic_block b = candidate_table[src].split_bbs.first_member[i];
1584 int t = bitmap_bit_p (&not_in_df, b->index);
1586 gcc_assert (!t || (CONTAINING_RGN (b->index)
1587 != CONTAINING_RGN (BB_TO_BLOCK (src))));
1589 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
1590 return 0;
1595 return 1;
1598 /* If x is a set of a register R, mark that R is alive in the beginning
1599 of every update-block of src. */
1601 static void
1602 update_live_1 (int src, rtx x)
1604 int i;
1605 int regno;
1606 rtx reg = SET_DEST (x);
1608 if (reg == 0)
1609 return;
1611 while (GET_CODE (reg) == SUBREG
1612 || GET_CODE (reg) == ZERO_EXTRACT
1613 || GET_CODE (reg) == STRICT_LOW_PART)
1614 reg = XEXP (reg, 0);
1616 if (GET_CODE (reg) == PARALLEL)
1618 int i;
1620 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
1621 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
1622 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
1624 return;
1627 if (!REG_P (reg))
1628 return;
1630 /* Global registers are always live, so the code below does not apply
1631 to them. */
1633 regno = REGNO (reg);
1635 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
1637 if (regno < FIRST_PSEUDO_REGISTER)
1639 int j = hard_regno_nregs[regno][GET_MODE (reg)];
1640 while (--j >= 0)
1642 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1644 basic_block b = candidate_table[src].update_bbs.first_member[i];
1646 SET_REGNO_REG_SET (df_get_live_in (b), regno + j);
1650 else
1652 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
1654 basic_block b = candidate_table[src].update_bbs.first_member[i];
1656 SET_REGNO_REG_SET (df_get_live_in (b), regno);
1662 /* Return 1 if insn can be speculatively moved from block src to trg,
1663 otherwise return 0. Called before first insertion of insn to
1664 ready-list or before the scheduling. */
1666 static int
1667 check_live (rtx insn, int src)
1669 /* Find the registers set by instruction. */
1670 if (GET_CODE (PATTERN (insn)) == SET
1671 || GET_CODE (PATTERN (insn)) == CLOBBER)
1672 return check_live_1 (src, PATTERN (insn));
1673 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1675 int j;
1676 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1677 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1678 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1679 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
1680 return 0;
1682 return 1;
1685 return 1;
1688 /* Update the live registers info after insn was moved speculatively from
1689 block src to trg. */
1691 static void
1692 update_live (rtx insn, int src)
1694 /* Find the registers set by instruction. */
1695 if (GET_CODE (PATTERN (insn)) == SET
1696 || GET_CODE (PATTERN (insn)) == CLOBBER)
1697 update_live_1 (src, PATTERN (insn));
1698 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
1700 int j;
1701 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
1702 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
1703 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
1704 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
1708 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
1709 #define IS_REACHABLE(bb_from, bb_to) \
1710 (bb_from == bb_to \
1711 || IS_RGN_ENTRY (bb_from) \
1712 || (TEST_BIT (ancestor_edges[bb_to], \
1713 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
1715 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
1717 static void
1718 set_spec_fed (rtx load_insn)
1720 sd_iterator_def sd_it;
1721 dep_t dep;
1723 FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
1724 if (DEP_TYPE (dep) == REG_DEP_TRUE)
1725 FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
1728 /* On the path from the insn to load_insn_bb, find a conditional
1729 branch depending on insn, that guards the speculative load. */
1731 static int
1732 find_conditional_protection (rtx insn, int load_insn_bb)
1734 sd_iterator_def sd_it;
1735 dep_t dep;
1737 /* Iterate through DEF-USE forward dependences. */
1738 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
1740 rtx next = DEP_CON (dep);
1742 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
1743 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
1744 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
1745 && load_insn_bb != INSN_BB (next)
1746 && DEP_TYPE (dep) == REG_DEP_TRUE
1747 && (JUMP_P (next)
1748 || find_conditional_protection (next, load_insn_bb)))
1749 return 1;
1751 return 0;
1752 } /* find_conditional_protection */
1754 /* Returns 1 if the same insn1 that participates in the computation
1755 of load_insn's address is feeding a conditional branch that is
1756 guarding on load_insn. This is true if we find a the two DEF-USE
1757 chains:
1758 insn1 -> ... -> conditional-branch
1759 insn1 -> ... -> load_insn,
1760 and if a flow path exist:
1761 insn1 -> ... -> conditional-branch -> ... -> load_insn,
1762 and if insn1 is on the path
1763 region-entry -> ... -> bb_trg -> ... load_insn.
1765 Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
1766 Locate the branch by following INSN_FORW_DEPS from insn1. */
1768 static int
1769 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
1771 sd_iterator_def sd_it;
1772 dep_t dep;
1774 FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
1776 rtx insn1 = DEP_PRO (dep);
1778 /* Must be a DEF-USE dependence upon non-branch. */
1779 if (DEP_TYPE (dep) != REG_DEP_TRUE
1780 || JUMP_P (insn1))
1781 continue;
1783 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
1784 if (INSN_BB (insn1) == bb_src
1785 || (CONTAINING_RGN (BLOCK_NUM (insn1))
1786 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
1787 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
1788 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
1789 continue;
1791 /* Now search for the conditional-branch. */
1792 if (find_conditional_protection (insn1, bb_src))
1793 return 1;
1795 /* Recursive step: search another insn1, "above" current insn1. */
1796 return is_conditionally_protected (insn1, bb_src, bb_trg);
1799 /* The chain does not exist. */
1800 return 0;
1801 } /* is_conditionally_protected */
1803 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
1804 load_insn can move speculatively from bb_src to bb_trg. All the
1805 following must hold:
1807 (1) both loads have 1 base register (PFREE_CANDIDATEs).
1808 (2) load_insn and load1 have a def-use dependence upon
1809 the same insn 'insn1'.
1810 (3) either load2 is in bb_trg, or:
1811 - there's only one split-block, and
1812 - load1 is on the escape path, and
1814 From all these we can conclude that the two loads access memory
1815 addresses that differ at most by a constant, and hence if moving
1816 load_insn would cause an exception, it would have been caused by
1817 load2 anyhow. */
1819 static int
1820 is_pfree (rtx load_insn, int bb_src, int bb_trg)
1822 sd_iterator_def back_sd_it;
1823 dep_t back_dep;
1824 candidate *candp = candidate_table + bb_src;
1826 if (candp->split_bbs.nr_members != 1)
1827 /* Must have exactly one escape block. */
1828 return 0;
1830 FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
1832 rtx insn1 = DEP_PRO (back_dep);
1834 if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
1835 /* Found a DEF-USE dependence (insn1, load_insn). */
1837 sd_iterator_def fore_sd_it;
1838 dep_t fore_dep;
1840 FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
1842 rtx insn2 = DEP_CON (fore_dep);
1844 if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
1846 /* Found a DEF-USE dependence (insn1, insn2). */
1847 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
1848 /* insn2 not guaranteed to be a 1 base reg load. */
1849 continue;
1851 if (INSN_BB (insn2) == bb_trg)
1852 /* insn2 is the similar load, in the target block. */
1853 return 1;
1855 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
1856 /* insn2 is a similar load, in a split-block. */
1857 return 1;
1863 /* Couldn't find a similar load. */
1864 return 0;
1865 } /* is_pfree */
1867 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
1868 a load moved speculatively, or if load_insn is protected by
1869 a compare on load_insn's address). */
1871 static int
1872 is_prisky (rtx load_insn, int bb_src, int bb_trg)
1874 if (FED_BY_SPEC_LOAD (load_insn))
1875 return 1;
1877 if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
1878 /* Dependence may 'hide' out of the region. */
1879 return 1;
1881 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
1882 return 1;
1884 return 0;
1887 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
1888 Return 1 if insn is exception-free (and the motion is valid)
1889 and 0 otherwise. */
1891 static int
1892 is_exception_free (rtx insn, int bb_src, int bb_trg)
1894 int insn_class = haifa_classify_insn (insn);
1896 /* Handle non-load insns. */
1897 switch (insn_class)
1899 case TRAP_FREE:
1900 return 1;
1901 case TRAP_RISKY:
1902 return 0;
1903 default:;
1906 /* Handle loads. */
1907 if (!flag_schedule_speculative_load)
1908 return 0;
1909 IS_LOAD_INSN (insn) = 1;
1910 switch (insn_class)
1912 case IFREE:
1913 return (1);
1914 case IRISKY:
1915 return 0;
1916 case PFREE_CANDIDATE:
1917 if (is_pfree (insn, bb_src, bb_trg))
1918 return 1;
1919 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
1920 case PRISKY_CANDIDATE:
1921 if (!flag_schedule_speculative_load_dangerous
1922 || is_prisky (insn, bb_src, bb_trg))
1923 return 0;
1924 break;
1925 default:;
1928 return flag_schedule_speculative_load_dangerous;
1931 /* The number of insns from the current block scheduled so far. */
1932 static int sched_target_n_insns;
1933 /* The number of insns from the current block to be scheduled in total. */
1934 static int target_n_insns;
1935 /* The number of insns from the entire region scheduled so far. */
1936 static int sched_n_insns;
1938 /* Implementations of the sched_info functions for region scheduling. */
1939 static void init_ready_list (void);
1940 static int can_schedule_ready_p (rtx);
1941 static void begin_schedule_ready (rtx, rtx);
1942 static ds_t new_ready (rtx, ds_t);
1943 static int schedule_more_p (void);
1944 static const char *rgn_print_insn (rtx, int);
1945 static int rgn_rank (rtx, rtx);
1946 static int contributes_to_priority (rtx, rtx);
1947 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
1949 /* Functions for speculative scheduling. */
1950 static void add_remove_insn (rtx, int);
1951 static void extend_regions (void);
1952 static void add_block1 (basic_block, basic_block);
1953 static void fix_recovery_cfg (int, int, int);
1954 static basic_block advance_target_bb (basic_block, rtx);
1956 static void debug_rgn_dependencies (int);
1958 /* Return nonzero if there are more insns that should be scheduled. */
1960 static int
1961 schedule_more_p (void)
1963 return sched_target_n_insns < target_n_insns;
1966 /* Add all insns that are initially ready to the ready list READY. Called
1967 once before scheduling a set of insns. */
1969 static void
1970 init_ready_list (void)
1972 rtx prev_head = current_sched_info->prev_head;
1973 rtx next_tail = current_sched_info->next_tail;
1974 int bb_src;
1975 rtx insn;
1977 target_n_insns = 0;
1978 sched_target_n_insns = 0;
1979 sched_n_insns = 0;
1981 /* Print debugging information. */
1982 if (sched_verbose >= 5)
1983 debug_rgn_dependencies (target_bb);
1985 /* Prepare current target block info. */
1986 if (current_nr_blocks > 1)
1988 candidate_table = XNEWVEC (candidate, current_nr_blocks);
1990 bblst_last = 0;
1991 /* bblst_table holds split blocks and update blocks for each block after
1992 the current one in the region. split blocks and update blocks are
1993 the TO blocks of region edges, so there can be at most rgn_nr_edges
1994 of them. */
1995 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
1996 bblst_table = XNEWVEC (basic_block, bblst_size);
1998 edgelst_last = 0;
1999 edgelst_table = XNEWVEC (edge, rgn_nr_edges);
2001 compute_trg_info (target_bb);
2004 /* Initialize ready list with all 'ready' insns in target block.
2005 Count number of insns in the target block being scheduled. */
2006 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
2008 try_ready (insn);
2009 target_n_insns++;
2011 gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
2014 /* Add to ready list all 'ready' insns in valid source blocks.
2015 For speculative insns, check-live, exception-free, and
2016 issue-delay. */
2017 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
2018 if (IS_VALID (bb_src))
2020 rtx src_head;
2021 rtx src_next_tail;
2022 rtx tail, head;
2024 get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
2025 &head, &tail);
2026 src_next_tail = NEXT_INSN (tail);
2027 src_head = head;
2029 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
2030 if (INSN_P (insn))
2031 try_ready (insn);
2035 /* Called after taking INSN from the ready list. Returns nonzero if this
2036 insn can be scheduled, nonzero if we should silently discard it. */
2038 static int
2039 can_schedule_ready_p (rtx insn)
2041 /* An interblock motion? */
2042 if (INSN_BB (insn) != target_bb
2043 && IS_SPECULATIVE_INSN (insn)
2044 && !check_live (insn, INSN_BB (insn)))
2045 return 0;
2046 else
2047 return 1;
2050 /* Updates counter and other information. Split from can_schedule_ready_p ()
2051 because when we schedule insn speculatively then insn passed to
2052 can_schedule_ready_p () differs from the one passed to
2053 begin_schedule_ready (). */
2054 static void
2055 begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED)
2057 /* An interblock motion? */
2058 if (INSN_BB (insn) != target_bb)
2060 if (IS_SPECULATIVE_INSN (insn))
2062 gcc_assert (check_live (insn, INSN_BB (insn)));
2064 update_live (insn, INSN_BB (insn));
2066 /* For speculative load, mark insns fed by it. */
2067 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
2068 set_spec_fed (insn);
2070 nr_spec++;
2072 nr_inter++;
2074 else
2076 /* In block motion. */
2077 sched_target_n_insns++;
2079 sched_n_insns++;
2082 /* Called after INSN has all its hard dependencies resolved and the speculation
2083 of type TS is enough to overcome them all.
2084 Return nonzero if it should be moved to the ready list or the queue, or zero
2085 if we should silently discard it. */
2086 static ds_t
2087 new_ready (rtx next, ds_t ts)
2089 if (INSN_BB (next) != target_bb)
2091 int not_ex_free = 0;
2093 /* For speculative insns, before inserting to ready/queue,
2094 check live, exception-free, and issue-delay. */
2095 if (!IS_VALID (INSN_BB (next))
2096 || CANT_MOVE (next)
2097 || (IS_SPECULATIVE_INSN (next)
2098 && ((recog_memoized (next) >= 0
2099 && min_insn_conflict_delay (curr_state, next, next)
2100 > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
2101 || IS_SPECULATION_CHECK_P (next)
2102 || !check_live (next, INSN_BB (next))
2103 || (not_ex_free = !is_exception_free (next, INSN_BB (next),
2104 target_bb)))))
2106 if (not_ex_free
2107 /* We are here because is_exception_free () == false.
2108 But we possibly can handle that with control speculation. */
2109 && (current_sched_info->flags & DO_SPECULATION)
2110 && (spec_info->mask & BEGIN_CONTROL))
2111 /* Here we got new control-speculative instruction. */
2112 ts = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
2113 else
2114 ts = (ts & ~SPECULATIVE) | HARD_DEP;
2118 return ts;
2121 /* Return a string that contains the insn uid and optionally anything else
2122 necessary to identify this insn in an output. It's valid to use a
2123 static buffer for this. The ALIGNED parameter should cause the string
2124 to be formatted so that multiple output lines will line up nicely. */
2126 static const char *
2127 rgn_print_insn (rtx insn, int aligned)
2129 static char tmp[80];
2131 if (aligned)
2132 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
2133 else
2135 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
2136 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
2137 else
2138 sprintf (tmp, "%d", INSN_UID (insn));
2140 return tmp;
2143 /* Compare priority of two insns. Return a positive number if the second
2144 insn is to be preferred for scheduling, and a negative one if the first
2145 is to be preferred. Zero if they are equally good. */
2147 static int
2148 rgn_rank (rtx insn1, rtx insn2)
2150 /* Some comparison make sense in interblock scheduling only. */
2151 if (INSN_BB (insn1) != INSN_BB (insn2))
2153 int spec_val, prob_val;
2155 /* Prefer an inblock motion on an interblock motion. */
2156 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
2157 return 1;
2158 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
2159 return -1;
2161 /* Prefer a useful motion on a speculative one. */
2162 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
2163 if (spec_val)
2164 return spec_val;
2166 /* Prefer a more probable (speculative) insn. */
2167 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
2168 if (prob_val)
2169 return prob_val;
2171 return 0;
2174 /* NEXT is an instruction that depends on INSN (a backward dependence);
2175 return nonzero if we should include this dependence in priority
2176 calculations. */
2178 static int
2179 contributes_to_priority (rtx next, rtx insn)
2181 /* NEXT and INSN reside in one ebb. */
2182 return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
2185 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
2186 conditionally set before INSN. Store the set of registers that
2187 must be considered as used by this jump in USED and that of
2188 registers that must be considered as set in SET. */
2190 static void
2191 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
2192 regset cond_exec ATTRIBUTE_UNUSED,
2193 regset used ATTRIBUTE_UNUSED,
2194 regset set ATTRIBUTE_UNUSED)
2196 /* Nothing to do here, since we postprocess jumps in
2197 add_branch_dependences. */
2200 /* Used in schedule_insns to initialize current_sched_info for scheduling
2201 regions (or single basic blocks). */
2203 static struct sched_info region_sched_info =
2205 init_ready_list,
2206 can_schedule_ready_p,
2207 schedule_more_p,
2208 new_ready,
2209 rgn_rank,
2210 rgn_print_insn,
2211 contributes_to_priority,
2212 compute_jump_reg_dependencies,
2214 NULL, NULL,
2215 NULL, NULL,
2216 0, 0, 0,
2218 add_remove_insn,
2219 begin_schedule_ready,
2220 add_block1,
2221 advance_target_bb,
2222 fix_recovery_cfg,
2223 SCHED_RGN
2226 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
2228 static bool
2229 sets_likely_spilled (rtx pat)
2231 bool ret = false;
2232 note_stores (pat, sets_likely_spilled_1, &ret);
2233 return ret;
2236 static void
2237 sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
2239 bool *ret = (bool *) data;
2241 if (GET_CODE (pat) == SET
2242 && REG_P (x)
2243 && REGNO (x) < FIRST_PSEUDO_REGISTER
2244 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
2245 *ret = true;
2248 /* Add dependences so that branches are scheduled to run last in their
2249 block. */
2251 static void
2252 add_branch_dependences (rtx head, rtx tail)
2254 rtx insn, last;
2256 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
2257 that can throw exceptions, force them to remain in order at the end of
2258 the block by adding dependencies and giving the last a high priority.
2259 There may be notes present, and prev_head may also be a note.
2261 Branches must obviously remain at the end. Calls should remain at the
2262 end since moving them results in worse register allocation. Uses remain
2263 at the end to ensure proper register allocation.
2265 cc0 setters remain at the end because they can't be moved away from
2266 their cc0 user.
2268 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
2270 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
2271 are not moved before reload because we can wind up with register
2272 allocation failures. */
2274 insn = tail;
2275 last = 0;
2276 while (CALL_P (insn)
2277 || JUMP_P (insn)
2278 || (NONJUMP_INSN_P (insn)
2279 && (GET_CODE (PATTERN (insn)) == USE
2280 || GET_CODE (PATTERN (insn)) == CLOBBER
2281 || can_throw_internal (insn)
2282 #ifdef HAVE_cc0
2283 || sets_cc0_p (PATTERN (insn))
2284 #endif
2285 || (!reload_completed
2286 && sets_likely_spilled (PATTERN (insn)))))
2287 || NOTE_P (insn))
2289 if (!NOTE_P (insn))
2291 if (last != 0
2292 && sd_find_dep_between (insn, last, false) == NULL)
2294 if (! sched_insns_conditions_mutex_p (last, insn))
2295 add_dependence (last, insn, REG_DEP_ANTI);
2296 INSN_REF_COUNT (insn)++;
2299 CANT_MOVE (insn) = 1;
2301 last = insn;
2304 /* Don't overrun the bounds of the basic block. */
2305 if (insn == head)
2306 break;
2308 insn = PREV_INSN (insn);
2311 /* Make sure these insns are scheduled last in their block. */
2312 insn = last;
2313 if (insn != 0)
2314 while (insn != head)
2316 insn = prev_nonnote_insn (insn);
2318 if (INSN_REF_COUNT (insn) != 0)
2319 continue;
2321 if (! sched_insns_conditions_mutex_p (last, insn))
2322 add_dependence (last, insn, REG_DEP_ANTI);
2323 INSN_REF_COUNT (insn) = 1;
2326 #ifdef HAVE_conditional_execution
2327 /* Finally, if the block ends in a jump, and we are doing intra-block
2328 scheduling, make sure that the branch depends on any COND_EXEC insns
2329 inside the block to avoid moving the COND_EXECs past the branch insn.
2331 We only have to do this after reload, because (1) before reload there
2332 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
2333 scheduler after reload.
2335 FIXME: We could in some cases move COND_EXEC insns past the branch if
2336 this scheduler would be a little smarter. Consider this code:
2338 T = [addr]
2339 C ? addr += 4
2340 !C ? X += 12
2341 C ? T += 1
2342 C ? jump foo
2344 On a target with a one cycle stall on a memory access the optimal
2345 sequence would be:
2347 T = [addr]
2348 C ? addr += 4
2349 C ? T += 1
2350 C ? jump foo
2351 !C ? X += 12
2353 We don't want to put the 'X += 12' before the branch because it just
2354 wastes a cycle of execution time when the branch is taken.
2356 Note that in the example "!C" will always be true. That is another
2357 possible improvement for handling COND_EXECs in this scheduler: it
2358 could remove always-true predicates. */
2360 if (!reload_completed || ! JUMP_P (tail))
2361 return;
2363 insn = tail;
2364 while (insn != head)
2366 insn = PREV_INSN (insn);
2368 /* Note that we want to add this dependency even when
2369 sched_insns_conditions_mutex_p returns true. The whole point
2370 is that we _want_ this dependency, even if these insns really
2371 are independent. */
2372 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
2373 add_dependence (tail, insn, REG_DEP_ANTI);
2375 #endif
2378 /* Data structures for the computation of data dependences in a regions. We
2379 keep one `deps' structure for every basic block. Before analyzing the
2380 data dependences for a bb, its variables are initialized as a function of
2381 the variables of its predecessors. When the analysis for a bb completes,
2382 we save the contents to the corresponding bb_deps[bb] variable. */
2384 static struct deps *bb_deps;
2386 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
2388 static rtx
2389 concat_INSN_LIST (rtx copy, rtx old)
2391 rtx new = old;
2392 for (; copy ; copy = XEXP (copy, 1))
2393 new = alloc_INSN_LIST (XEXP (copy, 0), new);
2394 return new;
2397 static void
2398 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
2399 rtx *old_mems_p)
2401 rtx new_insns = *old_insns_p;
2402 rtx new_mems = *old_mems_p;
2404 while (copy_insns)
2406 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
2407 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
2408 copy_insns = XEXP (copy_insns, 1);
2409 copy_mems = XEXP (copy_mems, 1);
2412 *old_insns_p = new_insns;
2413 *old_mems_p = new_mems;
2416 /* After computing the dependencies for block BB, propagate the dependencies
2417 found in TMP_DEPS to the successors of the block. */
2418 static void
2419 propagate_deps (int bb, struct deps *pred_deps)
2421 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
2422 edge_iterator ei;
2423 edge e;
2425 /* bb's structures are inherited by its successors. */
2426 FOR_EACH_EDGE (e, ei, block->succs)
2428 struct deps *succ_deps;
2429 unsigned reg;
2430 reg_set_iterator rsi;
2432 /* Only bbs "below" bb, in the same region, are interesting. */
2433 if (e->dest == EXIT_BLOCK_PTR
2434 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
2435 || BLOCK_TO_BB (e->dest->index) <= bb)
2436 continue;
2438 succ_deps = bb_deps + BLOCK_TO_BB (e->dest->index);
2440 /* The reg_last lists are inherited by successor. */
2441 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
2443 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
2444 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
2446 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
2447 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
2448 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
2449 succ_rl->clobbers);
2450 succ_rl->uses_length += pred_rl->uses_length;
2451 succ_rl->clobbers_length += pred_rl->clobbers_length;
2453 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
2455 /* Mem read/write lists are inherited by successor. */
2456 concat_insn_mem_list (pred_deps->pending_read_insns,
2457 pred_deps->pending_read_mems,
2458 &succ_deps->pending_read_insns,
2459 &succ_deps->pending_read_mems);
2460 concat_insn_mem_list (pred_deps->pending_write_insns,
2461 pred_deps->pending_write_mems,
2462 &succ_deps->pending_write_insns,
2463 &succ_deps->pending_write_mems);
2465 succ_deps->last_pending_memory_flush
2466 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
2467 succ_deps->last_pending_memory_flush);
2469 succ_deps->pending_read_list_length
2470 += pred_deps->pending_read_list_length;
2471 succ_deps->pending_write_list_length
2472 += pred_deps->pending_write_list_length;
2473 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
2475 /* last_function_call is inherited by successor. */
2476 succ_deps->last_function_call
2477 = concat_INSN_LIST (pred_deps->last_function_call,
2478 succ_deps->last_function_call);
2480 /* sched_before_next_call is inherited by successor. */
2481 succ_deps->sched_before_next_call
2482 = concat_INSN_LIST (pred_deps->sched_before_next_call,
2483 succ_deps->sched_before_next_call);
2486 /* These lists should point to the right place, for correct
2487 freeing later. */
2488 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
2489 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
2490 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
2491 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
2493 /* Can't allow these to be freed twice. */
2494 pred_deps->pending_read_insns = 0;
2495 pred_deps->pending_read_mems = 0;
2496 pred_deps->pending_write_insns = 0;
2497 pred_deps->pending_write_mems = 0;
2500 /* Compute dependences inside bb. In a multiple blocks region:
2501 (1) a bb is analyzed after its predecessors, and (2) the lists in
2502 effect at the end of bb (after analyzing for bb) are inherited by
2503 bb's successors.
2505 Specifically for reg-reg data dependences, the block insns are
2506 scanned by sched_analyze () top-to-bottom. Two lists are
2507 maintained by sched_analyze (): reg_last[].sets for register DEFs,
2508 and reg_last[].uses for register USEs.
2510 When analysis is completed for bb, we update for its successors:
2511 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
2512 ; - USES[succ] = Union (USES [succ], DEFS [bb])
2514 The mechanism for computing mem-mem data dependence is very
2515 similar, and the result is interblock dependences in the region. */
2517 static void
2518 compute_block_dependences (int bb)
2520 rtx head, tail;
2521 struct deps tmp_deps;
2523 tmp_deps = bb_deps[bb];
2525 /* Do the analysis for this block. */
2526 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2527 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2529 sched_analyze (&tmp_deps, head, tail);
2530 add_branch_dependences (head, tail);
2532 if (current_nr_blocks > 1)
2533 propagate_deps (bb, &tmp_deps);
2535 /* Free up the INSN_LISTs. */
2536 free_deps (&tmp_deps);
2538 if (targetm.sched.dependencies_evaluation_hook)
2539 targetm.sched.dependencies_evaluation_hook (head, tail);
2542 /* Free dependencies of instructions inside BB. */
2543 static void
2544 free_block_dependencies (int bb)
2546 rtx head;
2547 rtx tail;
2549 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2551 sched_free_deps (head, tail, true);
2554 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
2555 them to the unused_*_list variables, so that they can be reused. */
2557 static void
2558 free_pending_lists (void)
2560 int bb;
2562 for (bb = 0; bb < current_nr_blocks; bb++)
2564 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
2565 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
2566 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
2567 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
2571 /* Print dependences for debugging starting from FROM_BB.
2572 Callable from debugger. */
2573 /* Print dependences for debugging starting from FROM_BB.
2574 Callable from debugger. */
2575 void
2576 debug_rgn_dependencies (int from_bb)
2578 int bb;
2580 fprintf (sched_dump,
2581 ";; --------------- forward dependences: ------------ \n");
2583 for (bb = from_bb; bb < current_nr_blocks; bb++)
2585 rtx head, tail;
2587 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2588 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2589 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
2590 BB_TO_BLOCK (bb), bb);
2592 debug_dependencies (head, tail);
2596 /* Print dependencies information for instructions between HEAD and TAIL.
2597 ??? This function would probably fit best in haifa-sched.c. */
2598 void debug_dependencies (rtx head, rtx tail)
2600 rtx insn;
2601 rtx next_tail = NEXT_INSN (tail);
2603 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2604 "insn", "code", "bb", "dep", "prio", "cost",
2605 "reservation");
2606 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
2607 "----", "----", "--", "---", "----", "----",
2608 "-----------");
2610 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
2612 if (! INSN_P (insn))
2614 int n;
2615 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
2616 if (NOTE_P (insn))
2618 n = NOTE_KIND (insn);
2619 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
2621 else
2622 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
2623 continue;
2626 fprintf (sched_dump,
2627 ";; %s%5d%6d%6d%6d%6d%6d ",
2628 (SCHED_GROUP_P (insn) ? "+" : " "),
2629 INSN_UID (insn),
2630 INSN_CODE (insn),
2631 BLOCK_NUM (insn),
2632 sd_lists_size (insn, SD_LIST_BACK),
2633 INSN_PRIORITY (insn),
2634 insn_cost (insn));
2636 if (recog_memoized (insn) < 0)
2637 fprintf (sched_dump, "nothing");
2638 else
2639 print_reservation (sched_dump, insn);
2641 fprintf (sched_dump, "\t: ");
2643 sd_iterator_def sd_it;
2644 dep_t dep;
2646 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
2647 fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep)));
2649 fprintf (sched_dump, "\n");
2652 fprintf (sched_dump, "\n");
2655 /* Returns true if all the basic blocks of the current region have
2656 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
2657 static bool
2658 sched_is_disabled_for_current_region_p (void)
2660 int bb;
2662 for (bb = 0; bb < current_nr_blocks; bb++)
2663 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
2664 return false;
2666 return true;
2669 /* Schedule a region. A region is either an inner loop, a loop-free
2670 subroutine, or a single basic block. Each bb in the region is
2671 scheduled after its flow predecessors. */
2673 static void
2674 schedule_region (int rgn)
2676 basic_block block;
2677 edge_iterator ei;
2678 edge e;
2679 int bb;
2680 int sched_rgn_n_insns = 0;
2682 rgn_n_insns = 0;
2683 /* Set variables for the current region. */
2684 current_nr_blocks = RGN_NR_BLOCKS (rgn);
2685 current_blocks = RGN_BLOCKS (rgn);
2687 /* See comments in add_block1, for what reasons we allocate +1 element. */
2688 ebb_head = xrealloc (ebb_head, (current_nr_blocks + 1) * sizeof (*ebb_head));
2689 for (bb = 0; bb <= current_nr_blocks; bb++)
2690 ebb_head[bb] = current_blocks + bb;
2692 /* Don't schedule region that is marked by
2693 NOTE_DISABLE_SCHED_OF_BLOCK. */
2694 if (sched_is_disabled_for_current_region_p ())
2695 return;
2697 if (!RGN_DONT_CALC_DEPS (rgn))
2699 init_deps_global ();
2701 /* Initializations for region data dependence analysis. */
2702 bb_deps = XNEWVEC (struct deps, current_nr_blocks);
2703 for (bb = 0; bb < current_nr_blocks; bb++)
2704 init_deps (bb_deps + bb);
2706 /* Compute dependencies. */
2707 for (bb = 0; bb < current_nr_blocks; bb++)
2708 compute_block_dependences (bb);
2710 free_pending_lists ();
2712 finish_deps_global ();
2714 free (bb_deps);
2716 else
2717 /* This is a recovery block. It is always a single block region. */
2718 gcc_assert (current_nr_blocks == 1);
2720 /* Set priorities. */
2721 current_sched_info->sched_max_insns_priority = 0;
2722 for (bb = 0; bb < current_nr_blocks; bb++)
2724 rtx head, tail;
2726 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
2727 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
2729 rgn_n_insns += set_priorities (head, tail);
2731 current_sched_info->sched_max_insns_priority++;
2733 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
2734 if (current_nr_blocks > 1)
2736 prob = XNEWVEC (int, current_nr_blocks);
2738 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
2739 sbitmap_vector_zero (dom, current_nr_blocks);
2741 /* Use ->aux to implement EDGE_TO_BIT mapping. */
2742 rgn_nr_edges = 0;
2743 FOR_EACH_BB (block)
2745 if (CONTAINING_RGN (block->index) != rgn)
2746 continue;
2747 FOR_EACH_EDGE (e, ei, block->succs)
2748 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
2751 rgn_edges = XNEWVEC (edge, rgn_nr_edges);
2752 rgn_nr_edges = 0;
2753 FOR_EACH_BB (block)
2755 if (CONTAINING_RGN (block->index) != rgn)
2756 continue;
2757 FOR_EACH_EDGE (e, ei, block->succs)
2758 rgn_edges[rgn_nr_edges++] = e;
2761 /* Split edges. */
2762 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2763 sbitmap_vector_zero (pot_split, current_nr_blocks);
2764 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
2765 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
2767 /* Compute probabilities, dominators, split_edges. */
2768 for (bb = 0; bb < current_nr_blocks; bb++)
2769 compute_dom_prob_ps (bb);
2771 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
2772 /* We don't need them anymore. But we want to avoid duplication of
2773 aux fields in the newly created edges. */
2774 FOR_EACH_BB (block)
2776 if (CONTAINING_RGN (block->index) != rgn)
2777 continue;
2778 FOR_EACH_EDGE (e, ei, block->succs)
2779 e->aux = NULL;
2783 /* Now we can schedule all blocks. */
2784 for (bb = 0; bb < current_nr_blocks; bb++)
2786 basic_block first_bb, last_bb, curr_bb;
2787 rtx head, tail;
2789 first_bb = EBB_FIRST_BB (bb);
2790 last_bb = EBB_LAST_BB (bb);
2792 get_ebb_head_tail (first_bb, last_bb, &head, &tail);
2794 if (no_real_insns_p (head, tail))
2796 gcc_assert (first_bb == last_bb);
2797 continue;
2800 current_sched_info->prev_head = PREV_INSN (head);
2801 current_sched_info->next_tail = NEXT_INSN (tail);
2804 /* rm_other_notes only removes notes which are _inside_ the
2805 block---that is, it won't remove notes before the first real insn
2806 or after the last real insn of the block. So if the first insn
2807 has a REG_SAVE_NOTE which would otherwise be emitted before the
2808 insn, it is redundant with the note before the start of the
2809 block, and so we have to take it out. */
2810 if (INSN_P (head))
2812 rtx note;
2814 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
2815 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
2816 remove_note (head, note);
2818 else
2819 /* This means that first block in ebb is empty.
2820 It looks to me as an impossible thing. There at least should be
2821 a recovery check, that caused the splitting. */
2822 gcc_unreachable ();
2824 /* Remove remaining note insns from the block, save them in
2825 note_list. These notes are restored at the end of
2826 schedule_block (). */
2827 rm_other_notes (head, tail);
2829 unlink_bb_notes (first_bb, last_bb);
2831 target_bb = bb;
2833 gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
2834 current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
2836 curr_bb = first_bb;
2837 if (dbg_cnt (sched_block))
2839 schedule_block (&curr_bb, rgn_n_insns);
2840 gcc_assert (EBB_FIRST_BB (bb) == first_bb);
2841 sched_rgn_n_insns += sched_n_insns;
2843 else
2845 sched_rgn_n_insns += rgn_n_insns;
2848 /* Clean up. */
2849 if (current_nr_blocks > 1)
2851 free (candidate_table);
2852 free (bblst_table);
2853 free (edgelst_table);
2857 /* Sanity check: verify that all region insns were scheduled. */
2858 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
2860 /* Done with this region. */
2862 if (current_nr_blocks > 1)
2864 free (prob);
2865 sbitmap_vector_free (dom);
2866 sbitmap_vector_free (pot_split);
2867 sbitmap_vector_free (ancestor_edges);
2868 free (rgn_edges);
2871 /* Free dependencies. */
2872 for (bb = 0; bb < current_nr_blocks; ++bb)
2873 free_block_dependencies (bb);
2875 gcc_assert (haifa_recovery_bb_ever_added_p
2876 || deps_pools_are_empty_p ());
2879 /* Initialize data structures for region scheduling. */
2881 static void
2882 init_regions (void)
2884 nr_regions = 0;
2885 rgn_table = 0;
2886 rgn_bb_table = 0;
2887 block_to_bb = 0;
2888 containing_rgn = 0;
2889 extend_regions ();
2891 /* Compute regions for scheduling. */
2892 if (reload_completed
2893 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
2894 || !flag_schedule_interblock
2895 || is_cfg_nonregular ())
2897 find_single_block_region ();
2899 else
2901 /* Compute the dominators and post dominators. */
2902 calculate_dominance_info (CDI_DOMINATORS);
2904 /* Find regions. */
2905 find_rgns ();
2907 if (sched_verbose >= 3)
2908 debug_regions ();
2910 /* For now. This will move as more and more of haifa is converted
2911 to using the cfg code. */
2912 free_dominance_info (CDI_DOMINATORS);
2914 RGN_BLOCKS (nr_regions) = RGN_BLOCKS (nr_regions - 1) +
2915 RGN_NR_BLOCKS (nr_regions - 1);
2918 /* The one entry point in this file. */
2920 void
2921 schedule_insns (void)
2923 int rgn;
2925 /* Taking care of this degenerate case makes the rest of
2926 this code simpler. */
2927 if (n_basic_blocks == NUM_FIXED_BLOCKS)
2928 return;
2930 nr_inter = 0;
2931 nr_spec = 0;
2933 /* We need current_sched_info in init_dependency_caches, which is
2934 invoked via sched_init. */
2935 current_sched_info = &region_sched_info;
2937 df_set_flags (DF_LR_RUN_DCE);
2938 df_note_add_problem ();
2939 df_analyze ();
2940 regstat_compute_calls_crossed ();
2942 sched_init ();
2944 bitmap_initialize (&not_in_df, 0);
2945 bitmap_clear (&not_in_df);
2947 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
2948 / 100);
2950 init_regions ();
2952 /* EBB_HEAD is a region-scope structure. But we realloc it for
2953 each region to save time/memory/something else. */
2954 ebb_head = 0;
2956 /* Schedule every region in the subroutine. */
2957 for (rgn = 0; rgn < nr_regions; rgn++)
2958 if (dbg_cnt (sched_region))
2959 schedule_region (rgn);
2961 free(ebb_head);
2962 /* Reposition the prologue and epilogue notes in case we moved the
2963 prologue/epilogue insns. */
2964 if (reload_completed)
2965 reposition_prologue_and_epilogue_notes ();
2967 if (sched_verbose)
2969 if (reload_completed == 0 && flag_schedule_interblock)
2971 fprintf (sched_dump,
2972 "\n;; Procedure interblock/speculative motions == %d/%d \n",
2973 nr_inter, nr_spec);
2975 else
2976 gcc_assert (nr_inter <= 0);
2977 fprintf (sched_dump, "\n\n");
2980 /* Clean up. */
2981 free (rgn_table);
2982 free (rgn_bb_table);
2983 free (block_to_bb);
2984 free (containing_rgn);
2986 regstat_free_calls_crossed ();
2988 bitmap_clear (&not_in_df);
2990 sched_finish ();
2993 /* INSN has been added to/removed from current region. */
2994 static void
2995 add_remove_insn (rtx insn, int remove_p)
2997 if (!remove_p)
2998 rgn_n_insns++;
2999 else
3000 rgn_n_insns--;
3002 if (INSN_BB (insn) == target_bb)
3004 if (!remove_p)
3005 target_n_insns++;
3006 else
3007 target_n_insns--;
3011 /* Extend internal data structures. */
3012 static void
3013 extend_regions (void)
3015 rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
3016 rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
3017 block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
3018 containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
3021 /* BB was added to ebb after AFTER. */
3022 static void
3023 add_block1 (basic_block bb, basic_block after)
3025 extend_regions ();
3027 bitmap_set_bit (&not_in_df, bb->index);
3029 if (after == 0 || after == EXIT_BLOCK_PTR)
3031 int i;
3033 i = RGN_BLOCKS (nr_regions);
3034 /* I - first free position in rgn_bb_table. */
3036 rgn_bb_table[i] = bb->index;
3037 RGN_NR_BLOCKS (nr_regions) = 1;
3038 RGN_DONT_CALC_DEPS (nr_regions) = after == EXIT_BLOCK_PTR;
3039 RGN_HAS_REAL_EBB (nr_regions) = 0;
3040 CONTAINING_RGN (bb->index) = nr_regions;
3041 BLOCK_TO_BB (bb->index) = 0;
3043 nr_regions++;
3045 RGN_BLOCKS (nr_regions) = i + 1;
3047 else
3049 int i, pos;
3051 /* We need to fix rgn_table, block_to_bb, containing_rgn
3052 and ebb_head. */
3054 BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
3056 /* We extend ebb_head to one more position to
3057 easily find the last position of the last ebb in
3058 the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
3059 is _always_ valid for access. */
3061 i = BLOCK_TO_BB (after->index) + 1;
3062 pos = ebb_head[i] - 1;
3063 /* Now POS is the index of the last block in the region. */
3065 /* Find index of basic block AFTER. */
3066 for (; rgn_bb_table[pos] != after->index; pos--);
3068 pos++;
3069 gcc_assert (pos > ebb_head[i - 1]);
3071 /* i - ebb right after "AFTER". */
3072 /* ebb_head[i] - VALID. */
3074 /* Source position: ebb_head[i]
3075 Destination position: ebb_head[i] + 1
3076 Last position:
3077 RGN_BLOCKS (nr_regions) - 1
3078 Number of elements to copy: (last_position) - (source_position) + 1
3081 memmove (rgn_bb_table + pos + 1,
3082 rgn_bb_table + pos,
3083 ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
3084 * sizeof (*rgn_bb_table));
3086 rgn_bb_table[pos] = bb->index;
3088 for (; i <= current_nr_blocks; i++)
3089 ebb_head [i]++;
3091 i = CONTAINING_RGN (after->index);
3092 CONTAINING_RGN (bb->index) = i;
3094 RGN_HAS_REAL_EBB (i) = 1;
3096 for (++i; i <= nr_regions; i++)
3097 RGN_BLOCKS (i)++;
3101 /* Fix internal data after interblock movement of jump instruction.
3102 For parameter meaning please refer to
3103 sched-int.h: struct sched_info: fix_recovery_cfg. */
3104 static void
3105 fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
3107 int old_pos, new_pos, i;
3109 BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
3111 for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
3112 rgn_bb_table[old_pos] != check_bb_nexti;
3113 old_pos--);
3114 gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
3116 for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
3117 rgn_bb_table[new_pos] != bbi;
3118 new_pos--);
3119 new_pos++;
3120 gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
3122 gcc_assert (new_pos < old_pos);
3124 memmove (rgn_bb_table + new_pos + 1,
3125 rgn_bb_table + new_pos,
3126 (old_pos - new_pos) * sizeof (*rgn_bb_table));
3128 rgn_bb_table[new_pos] = check_bb_nexti;
3130 for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
3131 ebb_head[i]++;
3134 /* Return next block in ebb chain. For parameter meaning please refer to
3135 sched-int.h: struct sched_info: advance_target_bb. */
3136 static basic_block
3137 advance_target_bb (basic_block bb, rtx insn)
3139 if (insn)
3140 return 0;
3142 gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
3143 && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
3144 return bb->next_bb;
3147 #endif
3149 static bool
3150 gate_handle_sched (void)
3152 #ifdef INSN_SCHEDULING
3153 return flag_schedule_insns && dbg_cnt (sched_func);
3154 #else
3155 return 0;
3156 #endif
3159 /* Run instruction scheduler. */
3160 static unsigned int
3161 rest_of_handle_sched (void)
3163 #ifdef INSN_SCHEDULING
3164 schedule_insns ();
3165 #endif
3166 return 0;
3169 static bool
3170 gate_handle_sched2 (void)
3172 #ifdef INSN_SCHEDULING
3173 return optimize > 0 && flag_schedule_insns_after_reload
3174 && dbg_cnt (sched2_func);
3175 #else
3176 return 0;
3177 #endif
3180 /* Run second scheduling pass after reload. */
3181 static unsigned int
3182 rest_of_handle_sched2 (void)
3184 #ifdef INSN_SCHEDULING
3185 /* Do control and data sched analysis again,
3186 and write some more of the results to dump file. */
3187 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
3188 schedule_ebbs ();
3189 else
3190 schedule_insns ();
3191 #endif
3192 return 0;
3195 struct tree_opt_pass pass_sched =
3197 "sched1", /* name */
3198 gate_handle_sched, /* gate */
3199 rest_of_handle_sched, /* execute */
3200 NULL, /* sub */
3201 NULL, /* next */
3202 0, /* static_pass_number */
3203 TV_SCHED, /* tv_id */
3204 0, /* properties_required */
3205 0, /* properties_provided */
3206 0, /* properties_destroyed */
3207 0, /* todo_flags_start */
3208 TODO_df_finish | TODO_verify_rtl_sharing |
3209 TODO_dump_func |
3210 TODO_verify_flow |
3211 TODO_ggc_collect, /* todo_flags_finish */
3212 'S' /* letter */
3215 struct tree_opt_pass pass_sched2 =
3217 "sched2", /* name */
3218 gate_handle_sched2, /* gate */
3219 rest_of_handle_sched2, /* execute */
3220 NULL, /* sub */
3221 NULL, /* next */
3222 0, /* static_pass_number */
3223 TV_SCHED2, /* tv_id */
3224 0, /* properties_required */
3225 0, /* properties_provided */
3226 0, /* properties_destroyed */
3227 0, /* todo_flags_start */
3228 TODO_df_finish | TODO_verify_rtl_sharing |
3229 TODO_dump_func |
3230 TODO_verify_flow |
3231 TODO_ggc_collect, /* todo_flags_finish */
3232 'R' /* letter */