* arm.c (adjacent_mem_locations): Reject volatile memory refs.
[official-gcc.git] / gcc / modulo-sched.c
blob950313eae46076a713dc40d69c2c788b60dd6385
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "hard-reg-set.h"
32 #include "regs.h"
33 #include "function.h"
34 #include "flags.h"
35 #include "insn-config.h"
36 #include "insn-attr.h"
37 #include "except.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "sched-int.h"
41 #include "target.h"
42 #include "cfglayout.h"
43 #include "cfgloop.h"
44 #include "cfghooks.h"
45 #include "expr.h"
46 #include "params.h"
47 #include "gcov-io.h"
48 #include "df.h"
49 #include "ddg.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
88 /* This page defines partial-schedule structures and functions for
89 modulo scheduling. */
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
108 #define CFG_HOOKS cfg_layout_rtl_cfg_hooks
110 /* A single instruction in the partial schedule. */
111 struct ps_insn
113 /* The corresponding DDG_NODE. */
114 ddg_node_ptr node;
116 /* The (absolute) cycle in which the PS instruction is scheduled.
117 Same as SCHED_TIME (node). */
118 int cycle;
120 /* The next/prev PS_INSN in the same row. */
121 ps_insn_ptr next_in_row,
122 prev_in_row;
124 /* The number of nodes in the same row that come after this node. */
125 int row_rest_count;
128 /* Holds the partial schedule as an array of II rows. Each entry of the
129 array points to a linked list of PS_INSNs, which represents the
130 instructions that are scheduled for that row. */
131 struct partial_schedule
133 int ii; /* Number of rows in the partial schedule. */
134 int history; /* Threshold for conflict checking using DFA. */
136 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 ps_insn_ptr *rows;
139 /* The earliest absolute cycle of an insn in the partial schedule. */
140 int min_cycle;
142 /* The latest absolute cycle of an insn in the partial schedule. */
143 int max_cycle;
145 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
149 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
150 static void free_partial_schedule (partial_schedule_ptr);
151 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
152 void print_partial_schedule (partial_schedule_ptr, FILE *);
153 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
154 ddg_node_ptr node, int cycle,
155 sbitmap must_precede,
156 sbitmap must_follow);
157 static void rotate_partial_schedule (partial_schedule_ptr, int);
159 /* This page defines constants and structures for the modulo scheduling
160 driver. */
162 /* As in haifa-sched.c: */
163 /* issue_rate is the number of insns that can be scheduled in the same
164 machine cycle. It can be defined in the config/mach/mach.h file,
165 otherwise we set it to 1. */
167 static int issue_rate;
169 /* For printing statistics. */
170 static FILE *stats_file;
172 static int sms_order_nodes (ddg_ptr, int, int * result);
173 static void set_node_sched_params (ddg_ptr);
174 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
175 int *, FILE*);
176 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
177 static void generate_prolog_epilog (partial_schedule_ptr, rtx, rtx, int);
178 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
179 int from_stage, int to_stage,
180 int is_prolog);
183 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
184 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
185 #define SCHED_FIRST_REG_MOVE(x) \
186 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
187 #define SCHED_NREG_MOVES(x) \
188 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
189 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
190 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
191 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
193 /* The scheduling parameters held for each node. */
194 typedef struct node_sched_params
196 int asap; /* A lower-bound on the absolute scheduling cycle. */
197 int time; /* The absolute scheduling cycle (time >= asap). */
199 /* The following field (first_reg_move) is a pointer to the first
200 register-move instruction added to handle the modulo-variable-expansion
201 of the register defined by this node. This register-move copies the
202 original register defined by the node. */
203 rtx first_reg_move;
205 /* The number of register-move instructions added, immediately preceding
206 first_reg_move. */
207 int nreg_moves;
209 int row; /* Holds time % ii. */
210 int stage; /* Holds time / ii. */
212 /* The column of a node inside the ps. If nodes u, v are on the same row,
213 u will precede v if column (u) < column (v). */
214 int column;
215 } *node_sched_params_ptr;
218 /* The following three functions are copied from the current scheduler
219 code in order to use sched_analyze() for computing the dependencies.
220 They are used when initializing the sched_info structure. */
221 static const char *
222 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
224 static char tmp[80];
226 sprintf (tmp, "i%4d", INSN_UID (insn));
227 return tmp;
230 static int
231 contributes_to_priority (rtx next, rtx insn)
233 return BLOCK_NUM (next) == BLOCK_NUM (insn);
236 static void
237 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
238 regset cond_exec ATTRIBUTE_UNUSED,
239 regset used ATTRIBUTE_UNUSED,
240 regset set ATTRIBUTE_UNUSED)
244 static struct sched_info sms_sched_info =
246 NULL,
247 NULL,
248 NULL,
249 NULL,
250 NULL,
251 sms_print_insn,
252 contributes_to_priority,
253 compute_jump_reg_dependencies,
254 NULL, NULL,
255 NULL, NULL,
256 0, 0, 0
260 /* Return the register decremented and tested or zero if it is not a decrement
261 and branch jump insn (similar to doloop_condition_get). */
262 static rtx
263 doloop_register_get (rtx insn, rtx *comp)
265 rtx pattern, cmp, inc, reg, condition;
267 if (!JUMP_P (insn))
268 return NULL_RTX;
269 pattern = PATTERN (insn);
271 /* The canonical doloop pattern we expect is:
273 (parallel [(set (pc) (if_then_else (condition)
274 (label_ref (label))
275 (pc)))
276 (set (reg) (plus (reg) (const_int -1)))
277 (additional clobbers and uses)])
279 where condition is further restricted to be
280 (ne (reg) (const_int 1)). */
282 if (GET_CODE (pattern) != PARALLEL)
283 return NULL_RTX;
285 cmp = XVECEXP (pattern, 0, 0);
286 inc = XVECEXP (pattern, 0, 1);
287 /* Return the compare rtx. */
288 *comp = cmp;
290 /* Check for (set (reg) (something)). */
291 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
292 return NULL_RTX;
294 /* Extract loop counter register. */
295 reg = SET_DEST (inc);
297 /* Check if something = (plus (reg) (const_int -1)). */
298 if (GET_CODE (SET_SRC (inc)) != PLUS
299 || XEXP (SET_SRC (inc), 0) != reg
300 || XEXP (SET_SRC (inc), 1) != constm1_rtx)
301 return NULL_RTX;
303 /* Check for (set (pc) (if_then_else (condition)
304 (label_ref (label))
305 (pc))). */
306 if (GET_CODE (cmp) != SET
307 || SET_DEST (cmp) != pc_rtx
308 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
309 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
310 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
311 return NULL_RTX;
313 /* Extract loop termination condition. */
314 condition = XEXP (SET_SRC (cmp), 0);
316 /* Check if condition = (ne (reg) (const_int 1)), which is more
317 restrictive than the check in doloop_condition_get:
318 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
319 || GET_CODE (XEXP (condition, 1)) != CONST_INT). */
320 if (GET_CODE (condition) != NE
321 || XEXP (condition, 1) != const1_rtx)
322 return NULL_RTX;
324 if (XEXP (condition, 0) == reg)
325 return reg;
327 return NULL_RTX;
330 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
331 that the number of iterations is a compile-time constant. If so,
332 return the rtx that sets COUNT_REG to a constant, and set COUNT to
333 this constant. Otherwise return 0. */
334 static rtx
335 const_iteration_count (rtx count_reg, basic_block pre_header,
336 HOST_WIDEST_INT * count)
338 rtx insn;
339 rtx head, tail;
341 if (! pre_header)
342 return NULL_RTX;
344 get_block_head_tail (pre_header->index, &head, &tail);
346 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
347 if (INSN_P (insn) && single_set (insn) &&
348 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
350 rtx pat = single_set (insn);
352 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
354 *count = INTVAL (SET_SRC (pat));
355 return insn;
358 return NULL_RTX;
361 return NULL_RTX;
364 /* A very simple resource-based lower bound on the initiation interval.
365 ??? Improve the accuracy of this bound by considering the
366 utilization of various units. */
367 static int
368 res_MII (ddg_ptr g)
370 return (g->num_nodes / issue_rate);
374 /* Points to the array that contains the sched data for each node. */
375 static node_sched_params_ptr node_sched_params;
377 /* Allocate sched_params for each node and initialize it. Assumes that
378 the aux field of each node contain the asap bound (computed earlier),
379 and copies it into the sched_params field. */
380 static void
381 set_node_sched_params (ddg_ptr g)
383 int i;
385 /* Allocate for each node in the DDG a place to hold the "sched_data". */
386 /* Initialize ASAP/ALAP/HIGHT to zero. */
387 node_sched_params = (node_sched_params_ptr)
388 xcalloc (g->num_nodes,
389 sizeof (struct node_sched_params));
391 /* Set the pointer of the general data of the node to point to the
392 appropriate sched_params structure. */
393 for (i = 0; i < g->num_nodes; i++)
395 /* Watch out for aliasing problems? */
396 node_sched_params[i].asap = g->nodes[i].aux.count;
397 g->nodes[i].aux.info = &node_sched_params[i];
401 static void
402 print_node_sched_params (FILE * dump_file, int num_nodes)
404 int i;
406 if (! dump_file)
407 return;
408 for (i = 0; i < num_nodes; i++)
410 node_sched_params_ptr nsp = &node_sched_params[i];
411 rtx reg_move = nsp->first_reg_move;
412 int j;
414 fprintf (dump_file, "Node %d:\n", i);
415 fprintf (dump_file, " asap = %d:\n", nsp->asap);
416 fprintf (dump_file, " time = %d:\n", nsp->time);
417 fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
418 for (j = 0; j < nsp->nreg_moves; j++)
420 fprintf (dump_file, " reg_move = ");
421 print_rtl_single (dump_file, reg_move);
422 reg_move = PREV_INSN (reg_move);
427 /* Calculate an upper bound for II. SMS should not schedule the loop if it
428 requires more cycles than this bound. Currently set to the sum of the
429 longest latency edge for each node. Reset based on experiments. */
430 static int
431 calculate_maxii (ddg_ptr g)
433 int i;
434 int maxii = 0;
436 for (i = 0; i < g->num_nodes; i++)
438 ddg_node_ptr u = &g->nodes[i];
439 ddg_edge_ptr e;
440 int max_edge_latency = 0;
442 for (e = u->out; e; e = e->next_out)
443 max_edge_latency = MAX (max_edge_latency, e->latency);
445 maxii += max_edge_latency;
447 return maxii;
451 Breaking intra-loop register anti-dependences:
452 Each intra-loop register anti-dependence implies a cross-iteration true
453 dependence of distance 1. Therefore, we can remove such false dependencies
454 and figure out if the partial schedule broke them by checking if (for a
455 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
456 if so generate a register move. The number of such moves is equal to:
457 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
458 nreg_moves = ----------------------------------- + 1 - { dependence.
459 ii { 1 if not.
461 static void
462 generate_reg_moves (partial_schedule_ptr ps)
464 ddg_ptr g = ps->g;
465 int ii = ps->ii;
466 int i;
468 for (i = 0; i < g->num_nodes; i++)
470 ddg_node_ptr u = &g->nodes[i];
471 ddg_edge_ptr e;
472 int nreg_moves = 0, i_reg_move;
473 sbitmap *uses_of_defs;
474 rtx last_reg_move;
475 rtx prev_reg, old_reg;
477 /* Compute the number of reg_moves needed for u, by looking at life
478 ranges started at u (excluding self-loops). */
479 for (e = u->out; e; e = e->next_out)
480 if (e->type == TRUE_DEP && e->dest != e->src)
482 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
484 if (e->distance == 1)
485 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
487 /* If dest precedes src in the schedule of the kernel, then dest
488 will read before src writes and we can save one reg_copy. */
489 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
490 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
491 nreg_moves4e--;
493 nreg_moves = MAX (nreg_moves, nreg_moves4e);
496 if (nreg_moves == 0)
497 continue;
499 /* Every use of the register defined by node may require a different
500 copy of this register, depending on the time the use is scheduled.
501 Set a bitmap vector, telling which nodes use each copy of this
502 register. */
503 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
504 sbitmap_vector_zero (uses_of_defs, nreg_moves);
505 for (e = u->out; e; e = e->next_out)
506 if (e->type == TRUE_DEP && e->dest != e->src)
508 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
510 if (e->distance == 1)
511 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
513 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
514 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
515 dest_copy--;
517 if (dest_copy)
518 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
521 /* Now generate the reg_moves, attaching relevant uses to them. */
522 SCHED_NREG_MOVES (u) = nreg_moves;
523 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
524 last_reg_move = u->insn;
526 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
528 int i_use;
529 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
530 rtx reg_move = gen_move_insn (new_reg, prev_reg);
532 add_insn_before (reg_move, last_reg_move);
533 last_reg_move = reg_move;
535 if (!SCHED_FIRST_REG_MOVE (u))
536 SCHED_FIRST_REG_MOVE (u) = reg_move;
538 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
539 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg));
541 prev_reg = new_reg;
546 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
547 of SCHED_ROW and SCHED_STAGE. */
548 static void
549 normalize_sched_times (partial_schedule_ptr ps)
551 int i;
552 ddg_ptr g = ps->g;
553 int amount = PS_MIN_CYCLE (ps);
554 int ii = ps->ii;
556 /* Don't include the closing branch assuming that it is the last node. */
557 for (i = 0; i < g->num_nodes - 1; i++)
559 ddg_node_ptr u = &g->nodes[i];
560 int normalized_time = SCHED_TIME (u) - amount;
562 if (normalized_time < 0)
563 abort ();
565 SCHED_TIME (u) = normalized_time;
566 SCHED_ROW (u) = normalized_time % ii;
567 SCHED_STAGE (u) = normalized_time / ii;
571 /* Set SCHED_COLUMN of each node according to its position in PS. */
572 static void
573 set_columns_for_ps (partial_schedule_ptr ps)
575 int row;
577 for (row = 0; row < ps->ii; row++)
579 ps_insn_ptr cur_insn = ps->rows[row];
580 int column = 0;
582 for (; cur_insn; cur_insn = cur_insn->next_in_row)
583 SCHED_COLUMN (cur_insn->node) = column++;
587 /* Permute the insns according to their order in PS, from row 0 to
588 row ii-1, and position them right before LAST. This schedules
589 the insns of the loop kernel. */
590 static void
591 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
593 int ii = ps->ii;
594 int row;
595 ps_insn_ptr ps_ij;
597 for (row = 0; row < ii ; row++)
598 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
599 if (PREV_INSN (last) != ps_ij->node->insn)
600 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
601 PREV_INSN (last));
604 /* Used to generate the prologue & epilogue. Duplicate the subset of
605 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
606 of both), together with a prefix/suffix of their reg_moves. */
607 static void
608 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
609 int to_stage, int for_prolog)
611 int row;
612 ps_insn_ptr ps_ij;
614 for (row = 0; row < ps->ii; row++)
615 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
617 ddg_node_ptr u_node = ps_ij->node;
618 int j, i_reg_moves;
619 rtx reg_move = NULL_RTX;
621 if (for_prolog)
623 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
624 number of reg_moves starting with the second occurrence of
625 u_node, which is generated if its SCHED_STAGE <= to_stage. */
626 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
627 i_reg_moves = MAX (i_reg_moves, 0);
628 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
630 /* The reg_moves start from the *first* reg_move backwards. */
631 if (i_reg_moves)
633 reg_move = SCHED_FIRST_REG_MOVE (u_node);
634 for (j = 1; j < i_reg_moves; j++)
635 reg_move = PREV_INSN (reg_move);
638 else /* It's for the epilog. */
640 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
641 starting to decrease one stage after u_node no longer occurs;
642 that is, generate all reg_moves until
643 SCHED_STAGE (u_node) == from_stage - 1. */
644 i_reg_moves = SCHED_NREG_MOVES (u_node)
645 - (from_stage - SCHED_STAGE (u_node) - 1);
646 i_reg_moves = MAX (i_reg_moves, 0);
647 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
649 /* The reg_moves start from the *last* reg_move forwards. */
650 if (i_reg_moves)
652 reg_move = SCHED_FIRST_REG_MOVE (u_node);
653 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
654 reg_move = PREV_INSN (reg_move);
658 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
659 emit_insn (copy_rtx (PATTERN (reg_move)));
661 if (SCHED_STAGE (u_node) >= from_stage
662 && SCHED_STAGE (u_node) <= to_stage)
663 duplicate_insn_chain (u_node->first_note, u_node->insn);
668 /* Generate the instructions (including reg_moves) for prolog & epilog. */
669 static void
670 generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg,
671 rtx orig_loop_end, int unknown_count)
673 int i;
674 int last_stage = PS_STAGE_COUNT (ps) - 1;
675 edge e;
676 rtx c_reg = NULL_RTX;
677 rtx cmp = NULL_RTX;
678 rtx precond_jump = NULL_RTX;
679 rtx precond_exit_label = NULL_RTX;
680 rtx precond_exit_label_insn = NULL_RTX;
681 rtx last_epilog_insn = NULL_RTX;
682 rtx loop_exit_label = NULL_RTX;
683 rtx loop_exit_label_insn = NULL_RTX;
684 rtx orig_loop_bct = NULL_RTX;
686 /* Loop header edge. */
687 e = EDGE_PRED (ps->g->bb, 0);
688 if (e->src == ps->g->bb)
689 e = EDGE_PRED (ps->g->bb, 1);
691 /* Generate the prolog, inserting its insns on the loop-entry edge. */
692 start_sequence ();
694 /* This is the place where we want to insert the precondition. */
695 if (unknown_count)
696 precond_jump = emit_note (NOTE_INSN_DELETED);
698 for (i = 0; i < last_stage; i++)
699 duplicate_insns_of_cycles (ps, 0, i, 1);
701 /* No need to call insert_insn_on_edge; we prepared the sequence. */
702 e->insns.r = get_insns ();
703 end_sequence ();
705 /* Generate the epilog, inserting its insns on the loop-exit edge. */
706 start_sequence ();
708 for (i = 0; i < last_stage; i++)
709 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
711 last_epilog_insn = emit_note (NOTE_INSN_DELETED);
713 /* Emit the label where to put the original loop code. */
714 if (unknown_count)
716 rtx label, cond;
718 precond_exit_label = gen_label_rtx ();
719 precond_exit_label_insn = emit_label (precond_exit_label);
721 /* Put the original loop code. */
722 reorder_insns_nobb (orig_loop_beg, orig_loop_end, precond_exit_label_insn);
724 /* Change the label of the BCT to be the PRECOND_EXIT_LABEL. */
725 orig_loop_bct = get_last_insn ();
726 c_reg = doloop_register_get (orig_loop_bct, &cmp);
727 label = XEXP (SET_SRC (cmp), 1);
728 cond = XEXP (SET_SRC (cmp), 0);
730 if (! c_reg || GET_CODE (cond) != NE)
731 abort ();
733 XEXP (label, 0) = precond_exit_label;
734 JUMP_LABEL (orig_loop_bct) = precond_exit_label_insn;
735 LABEL_NUSES (precond_exit_label_insn)++;
737 /* Generate the loop exit label. */
738 loop_exit_label = gen_label_rtx ();
739 loop_exit_label_insn = emit_label (loop_exit_label);
742 e = EDGE_SUCC (ps->g->bb, 0);
743 if (e->dest == ps->g->bb)
744 e = EDGE_SUCC (ps->g->bb, 1);
746 e->insns.r = get_insns ();
747 end_sequence ();
749 commit_edge_insertions ();
751 if (unknown_count)
753 rtx precond_insns, epilog_jump, insert_after_insn;
754 basic_block loop_exit_bb = BLOCK_FOR_INSN (loop_exit_label_insn);
755 basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
756 basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
757 basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
758 edge epilog_exit_edge = single_succ_edge (epilog_bb);
760 /* Do loop preconditioning to take care of cases were the loop count is
761 less than the stage count. Update the CFG properly. */
762 insert_after_insn = precond_jump;
763 start_sequence ();
764 c_reg = doloop_register_get (ps->g->closing_branch->insn, &cmp);
765 emit_cmp_and_jump_insns (c_reg, GEN_INT (PS_STAGE_COUNT (ps)), LT, NULL,
766 GET_MODE (c_reg), 1, precond_exit_label);
767 precond_insns = get_insns ();
768 precond_jump = get_last_insn ();
769 end_sequence ();
770 reorder_insns (precond_insns, precond_jump, insert_after_insn);
772 /* Generate a subtract instruction at the beginning of the prolog to
773 adjust the loop count by STAGE_COUNT. */
774 emit_insn_after (gen_sub2_insn (c_reg, GEN_INT (PS_STAGE_COUNT (ps) - 1)),
775 precond_jump);
776 update_bb_for_insn (precond_bb);
777 delete_insn (insert_after_insn);
779 /* Update label info for the precondition jump. */
780 JUMP_LABEL (precond_jump) = precond_exit_label_insn;
781 LABEL_NUSES (precond_exit_label_insn)++;
783 /* Update the CFG. */
784 split_block (precond_bb, precond_jump);
785 make_edge (precond_bb, orig_loop_bb, 0);
787 /* Add a jump at end of the epilog to the LOOP_EXIT_LABEL to jump over the
788 original loop copy and update the CFG. */
789 epilog_jump = emit_jump_insn_after (gen_jump (loop_exit_label),
790 last_epilog_insn);
791 delete_insn (last_epilog_insn);
792 JUMP_LABEL (epilog_jump) = loop_exit_label_insn;
793 LABEL_NUSES (loop_exit_label_insn)++;
795 redirect_edge_succ (epilog_exit_edge, loop_exit_bb);
796 epilog_exit_edge->flags &= ~EDGE_FALLTHRU;
797 emit_barrier_after (BB_END (epilog_bb));
801 /* Return the line note insn preceding INSN, for debugging. Taken from
802 emit-rtl.c. */
803 static rtx
804 find_line_note (rtx insn)
806 for (; insn; insn = PREV_INSN (insn))
807 if (NOTE_P (insn)
808 && NOTE_LINE_NUMBER (insn) >= 0)
809 break;
811 return insn;
814 /* Main entry point, perform SMS scheduling on the loops of the function
815 that consist of single basic blocks. */
816 void
817 sms_schedule (FILE *dump_file)
819 static int passes = 0;
820 rtx insn;
821 ddg_ptr *g_arr, g;
822 basic_block bb, pre_header = NULL;
823 int * node_order;
824 int maxii;
825 int i;
826 partial_schedule_ptr ps;
827 int max_bb_index = last_basic_block;
828 struct df *df;
830 stats_file = dump_file;
832 /* Initialize issue_rate. */
833 if (targetm.sched.issue_rate)
835 int temp = reload_completed;
837 reload_completed = 1;
838 issue_rate = (*targetm.sched.issue_rate) ();
839 reload_completed = temp;
841 else
842 issue_rate = 1;
844 /* Initialize the scheduler. */
845 current_sched_info = &sms_sched_info;
846 sched_init (NULL);
848 /* Init Data Flow analysis, to be used in interloop dep calculation. */
849 df = df_init ();
850 df_analyze (df, 0, DF_ALL);
852 /* Allocate memory to hold the DDG array. */
853 g_arr = xcalloc (max_bb_index, sizeof (ddg_ptr));
855 /* Build DDGs for all the relevant loops and hold them in G_ARR
856 indexed by the loop BB index. */
857 FOR_EACH_BB (bb)
859 rtx head, tail;
860 rtx count_reg, comp;
861 edge e, pre_header_edge;
863 if (bb->index < 0)
864 continue;
866 /* Check if bb has two successors, one being itself. */
867 if (EDGE_COUNT (bb->succs) != 2)
868 continue;
870 if (EDGE_SUCC (bb, 0)->dest != bb && EDGE_SUCC (bb, 1)->dest != bb)
871 continue;
873 if ((EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
874 || (EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
875 continue;
877 /* Check if bb has two predecessors, one being itself. */
878 if (EDGE_COUNT (bb->preds) != 2)
879 continue;
881 if (EDGE_PRED (bb, 0)->src != bb && EDGE_PRED (bb, 1)->src != bb)
882 continue;
884 if ((EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
885 || (EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX))
886 continue;
888 /* For debugging. */
889 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
891 if (dump_file)
892 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
893 break;
896 get_block_head_tail (bb->index, &head, &tail);
897 pre_header_edge = EDGE_PRED (bb, 0);
898 if (EDGE_PRED (bb, 0)->src != bb)
899 pre_header_edge = EDGE_PRED (bb, 1);
901 /* Perfrom SMS only on loops that their average count is above threshold. */
902 if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
904 if (stats_file)
906 rtx line_note = find_line_note (tail);
908 if (line_note)
910 expanded_location xloc;
911 NOTE_EXPANDED_LOCATION (xloc, line_note);
912 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
913 xloc.file, xloc.line);
915 fprintf (stats_file, "SMS single-bb-loop\n");
916 if (profile_info && flag_branch_probabilities)
918 fprintf (stats_file, "SMS loop-count ");
919 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
920 (HOST_WIDEST_INT) bb->count);
921 fprintf (stats_file, "\n");
922 fprintf (stats_file, "SMS preheader-count ");
923 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
924 (HOST_WIDEST_INT) pre_header_edge->count);
925 fprintf (stats_file, "\n");
926 fprintf (stats_file, "SMS profile-sum-max ");
927 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
928 (HOST_WIDEST_INT) profile_info->sum_max);
929 fprintf (stats_file, "\n");
932 continue;
935 /* Make sure this is a doloop. */
936 if ( !(count_reg = doloop_register_get (tail, &comp)))
937 continue;
939 e = EDGE_PRED (bb, 0);
940 if (e->src == bb)
941 pre_header = EDGE_PRED (bb, 1)->src;
942 else
943 pre_header = e->src;
945 /* Don't handle BBs with calls or barriers, or !single_set insns. */
946 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
947 if (CALL_P (insn)
948 || BARRIER_P (insn)
949 || (INSN_P (insn) && !JUMP_P (insn)
950 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
951 break;
953 if (insn != NEXT_INSN (tail))
955 if (stats_file)
957 if (CALL_P (insn))
958 fprintf (stats_file, "SMS loop-with-call\n");
959 else if (BARRIER_P (insn))
960 fprintf (stats_file, "SMS loop-with-barrier\n");
961 else
962 fprintf (stats_file, "SMS loop-with-not-single-set\n");
963 print_rtl_single (stats_file, insn);
966 continue;
969 if (! (g = create_ddg (bb, df, 0)))
971 if (stats_file)
972 fprintf (stats_file, "SMS doloop\n");
973 continue;
976 g_arr[bb->index] = g;
979 /* Release Data Flow analysis data structures. */
980 df_finish (df);
982 /* Go over the built DDGs and perfrom SMS for each one of them. */
983 for (i = 0; i < max_bb_index; i++)
985 rtx head, tail;
986 rtx count_reg, count_init, comp;
987 edge pre_header_edge;
988 int mii, rec_mii;
989 int stage_count = 0;
990 HOST_WIDEST_INT loop_count = 0;
992 if (! (g = g_arr[i]))
993 continue;
995 if (dump_file)
996 print_ddg (dump_file, g);
998 get_block_head_tail (g->bb->index, &head, &tail);
1000 pre_header_edge = EDGE_PRED (g->bb, 0);
1001 if (EDGE_PRED (g->bb, 0)->src != g->bb)
1002 pre_header_edge = EDGE_PRED (g->bb, 1);
1004 if (stats_file)
1006 rtx line_note = find_line_note (tail);
1008 if (line_note)
1010 expanded_location xloc;
1011 NOTE_EXPANDED_LOCATION (xloc, line_note);
1012 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
1013 xloc.file, xloc.line);
1015 fprintf (stats_file, "SMS single-bb-loop\n");
1016 if (profile_info && flag_branch_probabilities)
1018 fprintf (stats_file, "SMS loop-count ");
1019 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1020 (HOST_WIDEST_INT) bb->count);
1021 fprintf (stats_file, "\n");
1022 fprintf (stats_file, "SMS preheader-count ");
1023 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1024 (HOST_WIDEST_INT) pre_header_edge->count);
1025 fprintf (stats_file, "\n");
1026 fprintf (stats_file, "SMS profile-sum-max ");
1027 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1028 (HOST_WIDEST_INT) profile_info->sum_max);
1029 fprintf (stats_file, "\n");
1031 fprintf (stats_file, "SMS doloop\n");
1032 fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1033 fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1034 fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1037 /* Make sure this is a doloop. */
1038 if ( !(count_reg = doloop_register_get (tail, &comp)))
1039 abort ();
1041 /* This should be NULL_RTX if the count is unknown at compile time. */
1042 count_init = const_iteration_count (count_reg, pre_header, &loop_count);
1044 if (stats_file && count_init)
1046 fprintf (stats_file, "SMS const-doloop ");
1047 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1048 fprintf (stats_file, "\n");
1051 node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1053 mii = 1; /* Need to pass some estimate of mii. */
1054 rec_mii = sms_order_nodes (g, mii, node_order);
1055 mii = MAX (res_MII (g), rec_mii);
1056 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1058 if (stats_file)
1059 fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1060 rec_mii, mii, maxii);
1062 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1063 ASAP. */
1064 set_node_sched_params (g);
1066 ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1068 if (ps)
1069 stage_count = PS_STAGE_COUNT (ps);
1071 if (stage_count == 0 || (count_init && (stage_count > loop_count)))
1073 if (dump_file)
1074 fprintf (dump_file, "SMS failed... \n");
1075 if (stats_file)
1076 fprintf (stats_file, "SMS sched-failed %d\n", stage_count);
1078 else
1080 rtx orig_loop_beg = NULL_RTX;
1081 rtx orig_loop_end = NULL_RTX;
1083 if (stats_file)
1085 fprintf (stats_file,
1086 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1087 stage_count);
1088 print_partial_schedule (ps, dump_file);
1089 fprintf (dump_file,
1090 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1091 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1094 /* Save the original loop if we want to do loop preconditioning in
1095 case the BCT count is not known. */
1096 if (! count_init)
1098 int i;
1100 start_sequence ();
1101 /* Copy the original loop code before modifying it -
1102 so we can use it later. */
1103 for (i = 0; i < ps->g->num_nodes; i++)
1104 duplicate_insn_chain (ps->g->nodes[i].first_note,
1105 ps->g->nodes[i].insn);
1107 orig_loop_beg = get_insns ();
1108 orig_loop_end = get_last_insn ();
1109 end_sequence ();
1111 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1112 the closing_branch was scheduled and should appear in the last (ii-1)
1113 row. Otherwise, we are free to schedule the branch, and we let nodes
1114 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1115 row; this should reduce stage_count to minimum. */
1116 normalize_sched_times (ps);
1117 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1118 set_columns_for_ps (ps);
1120 permute_partial_schedule (ps, g->closing_branch->first_note);
1122 /* Mark this loop as software pipelined so the later
1123 scheduling passes doesn't touch it. */
1124 if (! flag_resched_modulo_sched)
1125 g->bb->flags |= BB_DISABLE_SCHEDULE;
1126 /* The life-info is not valid any more. */
1127 g->bb->flags |= BB_DIRTY;
1129 generate_reg_moves (ps);
1130 if (dump_file)
1131 print_node_sched_params (dump_file, g->num_nodes);
1133 /* Set new iteration count of loop kernel. */
1134 if (count_init)
1135 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1136 - stage_count + 1);
1138 /* Generate prolog and epilog. */
1139 generate_prolog_epilog (ps, orig_loop_beg, orig_loop_end,
1140 count_init ? 0 : 1);
1142 free_partial_schedule (ps);
1143 free (node_sched_params);
1144 free (node_order);
1145 free_ddg (g);
1148 /* Release scheduler data, needed until now because of DFA. */
1149 sched_finish ();
1152 /* The SMS scheduling algorithm itself
1153 -----------------------------------
1154 Input: 'O' an ordered list of insns of a loop.
1155 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1157 'Q' is the empty Set
1158 'PS' is the partial schedule; it holds the currently scheduled nodes with
1159 their cycle/slot.
1160 'PSP' previously scheduled predecessors.
1161 'PSS' previously scheduled successors.
1162 't(u)' the cycle where u is scheduled.
1163 'l(u)' is the latency of u.
1164 'd(v,u)' is the dependence distance from v to u.
1165 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1166 the node ordering phase.
1167 'check_hardware_resources_conflicts(u, PS, c)'
1168 run a trace around cycle/slot through DFA model
1169 to check resource conflicts involving instruction u
1170 at cycle c given the partial schedule PS.
1171 'add_to_partial_schedule_at_time(u, PS, c)'
1172 Add the node/instruction u to the partial schedule
1173 PS at time c.
1174 'calculate_register_pressure(PS)'
1175 Given a schedule of instructions, calculate the register
1176 pressure it implies. One implementation could be the
1177 maximum number of overlapping live ranges.
1178 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1179 registers available in the hardware.
1181 1. II = MII.
1182 2. PS = empty list
1183 3. for each node u in O in pre-computed order
1184 4. if (PSP(u) != Q && PSS(u) == Q) then
1185 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1186 6. start = Early_start; end = Early_start + II - 1; step = 1
1187 11. else if (PSP(u) == Q && PSS(u) != Q) then
1188 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1189 13. start = Late_start; end = Late_start - II + 1; step = -1
1190 14. else if (PSP(u) != Q && PSS(u) != Q) then
1191 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1192 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1193 17. start = Early_start;
1194 18. end = min(Early_start + II - 1 , Late_start);
1195 19. step = 1
1196 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1197 21. start = ASAP(u); end = start + II - 1; step = 1
1198 22. endif
1200 23. success = false
1201 24. for (c = start ; c != end ; c += step)
1202 25. if check_hardware_resources_conflicts(u, PS, c) then
1203 26. add_to_partial_schedule_at_time(u, PS, c)
1204 27. success = true
1205 28. break
1206 29. endif
1207 30. endfor
1208 31. if (success == false) then
1209 32. II = II + 1
1210 33. if (II > maxII) then
1211 34. finish - failed to schedule
1212 35. endif
1213 36. goto 2.
1214 37. endif
1215 38. endfor
1216 39. if (calculate_register_pressure(PS) > maxRP) then
1217 40. goto 32.
1218 41. endif
1219 42. compute epilogue & prologue
1220 43. finish - succeeded to schedule
1223 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1224 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1225 set to 0 to save compile time. */
1226 #define DFA_HISTORY SMS_DFA_HISTORY
1228 static partial_schedule_ptr
1229 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1231 int ii = mii;
1232 int i, c, success;
1233 int try_again_with_larger_ii = true;
1234 int num_nodes = g->num_nodes;
1235 ddg_edge_ptr e;
1236 int start, end, step; /* Place together into one struct? */
1237 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1238 sbitmap must_precede = sbitmap_alloc (num_nodes);
1239 sbitmap must_follow = sbitmap_alloc (num_nodes);
1241 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1243 while (try_again_with_larger_ii && ii < maxii)
1245 if (dump_file)
1246 fprintf(dump_file, "Starting with ii=%d\n", ii);
1247 try_again_with_larger_ii = false;
1248 sbitmap_zero (sched_nodes);
1250 for (i = 0; i < num_nodes; i++)
1252 int u = nodes_order[i];
1253 ddg_node_ptr u_node = &g->nodes[u];
1254 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1255 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1256 int psp_not_empty;
1257 int pss_not_empty;
1258 rtx insn = u_node->insn;
1260 if (!INSN_P (insn))
1261 continue;
1263 if (JUMP_P (insn)) /* Closing branch handled later. */
1264 continue;
1266 /* 1. compute sched window for u (start, end, step). */
1267 psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes);
1268 pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes);
1270 if (psp_not_empty && !pss_not_empty)
1272 int early_start = 0;
1274 end = INT_MAX;
1275 for (e = u_node->in; e != 0; e = e->next_in)
1277 ddg_node_ptr v_node = e->src;
1278 if (TEST_BIT (sched_nodes, v_node->cuid))
1280 int node_st = SCHED_TIME (v_node)
1281 + e->latency - (e->distance * ii);
1283 early_start = MAX (early_start, node_st);
1285 if (e->data_type == MEM_DEP)
1286 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1289 start = early_start;
1290 end = MIN (end, early_start + ii);
1291 step = 1;
1294 else if (!psp_not_empty && pss_not_empty)
1296 int late_start = INT_MAX;
1298 end = INT_MIN;
1299 for (e = u_node->out; e != 0; e = e->next_out)
1301 ddg_node_ptr v_node = e->dest;
1302 if (TEST_BIT (sched_nodes, v_node->cuid))
1304 late_start = MIN (late_start,
1305 SCHED_TIME (v_node) - e->latency
1306 + (e->distance * ii));
1307 if (e->data_type == MEM_DEP)
1308 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1311 start = late_start;
1312 end = MAX (end, late_start - ii);
1313 step = -1;
1316 else if (psp_not_empty && pss_not_empty)
1318 int early_start = 0;
1319 int late_start = INT_MAX;
1321 start = INT_MIN;
1322 end = INT_MAX;
1323 for (e = u_node->in; e != 0; e = e->next_in)
1325 ddg_node_ptr v_node = e->src;
1327 if (TEST_BIT (sched_nodes, v_node->cuid))
1329 early_start = MAX (early_start,
1330 SCHED_TIME (v_node) + e->latency
1331 - (e->distance * ii));
1332 if (e->data_type == MEM_DEP)
1333 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1336 for (e = u_node->out; e != 0; e = e->next_out)
1338 ddg_node_ptr v_node = e->dest;
1340 if (TEST_BIT (sched_nodes, v_node->cuid))
1342 late_start = MIN (late_start,
1343 SCHED_TIME (v_node) - e->latency
1344 + (e->distance * ii));
1345 if (e->data_type == MEM_DEP)
1346 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1349 start = MAX (start, early_start);
1350 end = MIN (end, MIN (early_start + ii, late_start + 1));
1351 step = 1;
1353 else /* psp is empty && pss is empty. */
1355 start = SCHED_ASAP (u_node);
1356 end = start + ii;
1357 step = 1;
1360 /* 2. Try scheduling u in window. */
1361 if (dump_file)
1362 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1363 u, start, end, step);
1365 /* use must_follow & must_precede bitmaps to determine order
1366 of nodes within the cycle. */
1367 sbitmap_zero (must_precede);
1368 sbitmap_zero (must_follow);
1369 for (e = u_node->in; e != 0; e = e->next_in)
1370 if (TEST_BIT (sched_nodes, e->src->cuid)
1371 && e->latency == (ii * e->distance)
1372 && start == SCHED_TIME (e->src))
1373 SET_BIT (must_precede, e->src->cuid);
1375 for (e = u_node->out; e != 0; e = e->next_out)
1376 if (TEST_BIT (sched_nodes, e->dest->cuid)
1377 && e->latency == (ii * e->distance)
1378 && end == SCHED_TIME (e->dest))
1379 SET_BIT (must_follow, e->dest->cuid);
1381 success = 0;
1382 if ((step > 0 && start < end) || (step < 0 && start > end))
1383 for (c = start; c != end; c += step)
1385 ps_insn_ptr psi;
1387 psi = ps_add_node_check_conflicts (ps, u_node, c,
1388 must_precede,
1389 must_follow);
1391 if (psi)
1393 SCHED_TIME (u_node) = c;
1394 SET_BIT (sched_nodes, u);
1395 success = 1;
1396 if (dump_file)
1397 fprintf(dump_file, "Schedule in %d\n", c);
1398 break;
1401 if (!success)
1403 /* ??? Try backtracking instead of immediately ii++? */
1404 ii++;
1405 try_again_with_larger_ii = true;
1406 reset_partial_schedule (ps, ii);
1407 break;
1409 /* ??? If (success), check register pressure estimates. */
1410 } /* Continue with next node. */
1411 } /* While try_again_with_larger_ii. */
1413 sbitmap_free (sched_nodes);
1415 if (ii >= maxii)
1417 free_partial_schedule (ps);
1418 ps = NULL;
1420 return ps;
1424 /* This page implements the algorithm for ordering the nodes of a DDG
1425 for modulo scheduling, activated through the
1426 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1428 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1429 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1430 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1431 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1432 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1433 #define DEPTH(x) (ASAP ((x)))
1435 typedef struct node_order_params * nopa;
1437 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1438 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1439 static nopa calculate_order_params (ddg_ptr, int mii);
1440 static int find_max_asap (ddg_ptr, sbitmap);
1441 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1442 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1444 enum sms_direction {BOTTOMUP, TOPDOWN};
1446 struct node_order_params
1448 int asap;
1449 int alap;
1450 int height;
1453 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1454 static void
1455 check_nodes_order (int *node_order, int num_nodes)
1457 int i;
1458 sbitmap tmp = sbitmap_alloc (num_nodes);
1460 sbitmap_zero (tmp);
1462 for (i = 0; i < num_nodes; i++)
1464 int u = node_order[i];
1466 if (u >= num_nodes || u < 0 || TEST_BIT (tmp, u))
1467 abort ();
1469 SET_BIT (tmp, u);
1472 sbitmap_free (tmp);
1475 /* Order the nodes of G for scheduling and pass the result in
1476 NODE_ORDER. Also set aux.count of each node to ASAP.
1477 Return the recMII for the given DDG. */
1478 static int
1479 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1481 int i;
1482 int rec_mii = 0;
1483 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1485 nopa nops = calculate_order_params (g, mii);
1487 order_nodes_of_sccs (sccs, node_order);
1489 if (sccs->num_sccs > 0)
1490 /* First SCC has the largest recurrence_length. */
1491 rec_mii = sccs->sccs[0]->recurrence_length;
1493 /* Save ASAP before destroying node_order_params. */
1494 for (i = 0; i < g->num_nodes; i++)
1496 ddg_node_ptr v = &g->nodes[i];
1497 v->aux.count = ASAP (v);
1500 free (nops);
1501 free_ddg_all_sccs (sccs);
1502 check_nodes_order (node_order, g->num_nodes);
1504 return rec_mii;
1507 static void
1508 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1510 int i, pos = 0;
1511 ddg_ptr g = all_sccs->ddg;
1512 int num_nodes = g->num_nodes;
1513 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1514 sbitmap on_path = sbitmap_alloc (num_nodes);
1515 sbitmap tmp = sbitmap_alloc (num_nodes);
1516 sbitmap ones = sbitmap_alloc (num_nodes);
1518 sbitmap_zero (prev_sccs);
1519 sbitmap_ones (ones);
1521 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1522 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1523 for (i = 0; i < all_sccs->num_sccs; i++)
1525 ddg_scc_ptr scc = all_sccs->sccs[i];
1527 /* Add nodes on paths from previous SCCs to the current SCC. */
1528 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1529 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1531 /* Add nodes on paths from the current SCC to previous SCCs. */
1532 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1533 sbitmap_a_or_b (tmp, tmp, on_path);
1535 /* Remove nodes of previous SCCs from current extended SCC. */
1536 sbitmap_difference (tmp, tmp, prev_sccs);
1538 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1539 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1542 /* Handle the remaining nodes that do not belong to any scc. Each call
1543 to order_nodes_in_scc handles a single connected component. */
1544 while (pos < g->num_nodes)
1546 sbitmap_difference (tmp, ones, prev_sccs);
1547 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1549 sbitmap_free (prev_sccs);
1550 sbitmap_free (on_path);
1551 sbitmap_free (tmp);
1552 sbitmap_free (ones);
1555 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1556 static struct node_order_params *
1557 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1559 int u;
1560 int max_asap;
1561 int num_nodes = g->num_nodes;
1562 ddg_edge_ptr e;
1563 /* Allocate a place to hold ordering params for each node in the DDG. */
1564 nopa node_order_params_arr;
1566 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1567 node_order_params_arr = (nopa) xcalloc (num_nodes,
1568 sizeof (struct node_order_params));
1570 /* Set the aux pointer of each node to point to its order_params structure. */
1571 for (u = 0; u < num_nodes; u++)
1572 g->nodes[u].aux.info = &node_order_params_arr[u];
1574 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1575 calculate ASAP, ALAP, mobility, distance, and height for each node
1576 in the dependence (direct acyclic) graph. */
1578 /* We assume that the nodes in the array are in topological order. */
1580 max_asap = 0;
1581 for (u = 0; u < num_nodes; u++)
1583 ddg_node_ptr u_node = &g->nodes[u];
1585 ASAP (u_node) = 0;
1586 for (e = u_node->in; e; e = e->next_in)
1587 if (e->distance == 0)
1588 ASAP (u_node) = MAX (ASAP (u_node),
1589 ASAP (e->src) + e->latency);
1590 max_asap = MAX (max_asap, ASAP (u_node));
1593 for (u = num_nodes - 1; u > -1; u--)
1595 ddg_node_ptr u_node = &g->nodes[u];
1597 ALAP (u_node) = max_asap;
1598 HEIGHT (u_node) = 0;
1599 for (e = u_node->out; e; e = e->next_out)
1600 if (e->distance == 0)
1602 ALAP (u_node) = MIN (ALAP (u_node),
1603 ALAP (e->dest) - e->latency);
1604 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1605 HEIGHT (e->dest) + e->latency);
1609 return node_order_params_arr;
1612 static int
1613 find_max_asap (ddg_ptr g, sbitmap nodes)
1615 int u;
1616 int max_asap = -1;
1617 int result = -1;
1619 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1621 ddg_node_ptr u_node = &g->nodes[u];
1623 if (max_asap < ASAP (u_node))
1625 max_asap = ASAP (u_node);
1626 result = u;
1629 return result;
1632 static int
1633 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1635 int u;
1636 int max_hv = -1;
1637 int min_mob = INT_MAX;
1638 int result = -1;
1640 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1642 ddg_node_ptr u_node = &g->nodes[u];
1644 if (max_hv < HEIGHT (u_node))
1646 max_hv = HEIGHT (u_node);
1647 min_mob = MOB (u_node);
1648 result = u;
1650 else if ((max_hv == HEIGHT (u_node))
1651 && (min_mob > MOB (u_node)))
1653 min_mob = MOB (u_node);
1654 result = u;
1657 return result;
1660 static int
1661 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1663 int u;
1664 int max_dv = -1;
1665 int min_mob = INT_MAX;
1666 int result = -1;
1668 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1670 ddg_node_ptr u_node = &g->nodes[u];
1672 if (max_dv < DEPTH (u_node))
1674 max_dv = DEPTH (u_node);
1675 min_mob = MOB (u_node);
1676 result = u;
1678 else if ((max_dv == DEPTH (u_node))
1679 && (min_mob > MOB (u_node)))
1681 min_mob = MOB (u_node);
1682 result = u;
1685 return result;
1688 /* Places the nodes of SCC into the NODE_ORDER array starting
1689 at position POS, according to the SMS ordering algorithm.
1690 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1691 the NODE_ORDER array, starting from position zero. */
1692 static int
1693 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1694 int * node_order, int pos)
1696 enum sms_direction dir;
1697 int num_nodes = g->num_nodes;
1698 sbitmap workset = sbitmap_alloc (num_nodes);
1699 sbitmap tmp = sbitmap_alloc (num_nodes);
1700 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1701 sbitmap predecessors = sbitmap_alloc (num_nodes);
1702 sbitmap successors = sbitmap_alloc (num_nodes);
1704 sbitmap_zero (predecessors);
1705 find_predecessors (predecessors, g, nodes_ordered);
1707 sbitmap_zero (successors);
1708 find_successors (successors, g, nodes_ordered);
1710 sbitmap_zero (tmp);
1711 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1713 sbitmap_copy (workset, tmp);
1714 dir = BOTTOMUP;
1716 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1718 sbitmap_copy (workset, tmp);
1719 dir = TOPDOWN;
1721 else
1723 int u;
1725 sbitmap_zero (workset);
1726 if ((u = find_max_asap (g, scc)) >= 0)
1727 SET_BIT (workset, u);
1728 dir = BOTTOMUP;
1731 sbitmap_zero (zero_bitmap);
1732 while (!sbitmap_equal (workset, zero_bitmap))
1734 int v;
1735 ddg_node_ptr v_node;
1736 sbitmap v_node_preds;
1737 sbitmap v_node_succs;
1739 if (dir == TOPDOWN)
1741 while (!sbitmap_equal (workset, zero_bitmap))
1743 v = find_max_hv_min_mob (g, workset);
1744 v_node = &g->nodes[v];
1745 node_order[pos++] = v;
1746 v_node_succs = NODE_SUCCESSORS (v_node);
1747 sbitmap_a_and_b (tmp, v_node_succs, scc);
1749 /* Don't consider the already ordered successors again. */
1750 sbitmap_difference (tmp, tmp, nodes_ordered);
1751 sbitmap_a_or_b (workset, workset, tmp);
1752 RESET_BIT (workset, v);
1753 SET_BIT (nodes_ordered, v);
1755 dir = BOTTOMUP;
1756 sbitmap_zero (predecessors);
1757 find_predecessors (predecessors, g, nodes_ordered);
1758 sbitmap_a_and_b (workset, predecessors, scc);
1760 else
1762 while (!sbitmap_equal (workset, zero_bitmap))
1764 v = find_max_dv_min_mob (g, workset);
1765 v_node = &g->nodes[v];
1766 node_order[pos++] = v;
1767 v_node_preds = NODE_PREDECESSORS (v_node);
1768 sbitmap_a_and_b (tmp, v_node_preds, scc);
1770 /* Don't consider the already ordered predecessors again. */
1771 sbitmap_difference (tmp, tmp, nodes_ordered);
1772 sbitmap_a_or_b (workset, workset, tmp);
1773 RESET_BIT (workset, v);
1774 SET_BIT (nodes_ordered, v);
1776 dir = TOPDOWN;
1777 sbitmap_zero (successors);
1778 find_successors (successors, g, nodes_ordered);
1779 sbitmap_a_and_b (workset, successors, scc);
1782 sbitmap_free (tmp);
1783 sbitmap_free (workset);
1784 sbitmap_free (zero_bitmap);
1785 sbitmap_free (predecessors);
1786 sbitmap_free (successors);
1787 return pos;
1791 /* This page contains functions for manipulating partial-schedules during
1792 modulo scheduling. */
1794 /* Create a partial schedule and allocate a memory to hold II rows. */
1795 static partial_schedule_ptr
1796 create_partial_schedule (int ii, ddg_ptr g, int history)
1798 partial_schedule_ptr ps = (partial_schedule_ptr)
1799 xmalloc (sizeof (struct partial_schedule));
1800 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1801 ps->ii = ii;
1802 ps->history = history;
1803 ps->min_cycle = INT_MAX;
1804 ps->max_cycle = INT_MIN;
1805 ps->g = g;
1807 return ps;
1810 /* Free the PS_INSNs in rows array of the given partial schedule.
1811 ??? Consider caching the PS_INSN's. */
1812 static void
1813 free_ps_insns (partial_schedule_ptr ps)
1815 int i;
1817 for (i = 0; i < ps->ii; i++)
1819 while (ps->rows[i])
1821 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1823 free (ps->rows[i]);
1824 ps->rows[i] = ps_insn;
1826 ps->rows[i] = NULL;
1830 /* Free all the memory allocated to the partial schedule. */
1831 static void
1832 free_partial_schedule (partial_schedule_ptr ps)
1834 if (!ps)
1835 return;
1836 free_ps_insns (ps);
1837 free (ps->rows);
1838 free (ps);
1841 /* Clear the rows array with its PS_INSNs, and create a new one with
1842 NEW_II rows. */
1843 static void
1844 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1846 if (!ps)
1847 return;
1848 free_ps_insns (ps);
1849 if (new_ii == ps->ii)
1850 return;
1851 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
1852 * sizeof (ps_insn_ptr));
1853 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
1854 ps->ii = new_ii;
1855 ps->min_cycle = INT_MAX;
1856 ps->max_cycle = INT_MIN;
1859 /* Prints the partial schedule as an ii rows array, for each rows
1860 print the ids of the insns in it. */
1861 void
1862 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
1864 int i;
1866 for (i = 0; i < ps->ii; i++)
1868 ps_insn_ptr ps_i = ps->rows[i];
1870 fprintf (dump, "\n[CYCLE %d ]: ", i);
1871 while (ps_i)
1873 fprintf (dump, "%d, ",
1874 INSN_UID (ps_i->node->insn));
1875 ps_i = ps_i->next_in_row;
1880 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1881 static ps_insn_ptr
1882 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
1884 ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
1886 ps_i->node = node;
1887 ps_i->next_in_row = NULL;
1888 ps_i->prev_in_row = NULL;
1889 ps_i->row_rest_count = rest_count;
1890 ps_i->cycle = cycle;
1892 return ps_i;
1896 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1897 node is not found in the partial schedule, else returns true. */
1898 static int
1899 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1901 int row;
1903 if (!ps || !ps_i)
1904 return false;
1906 row = SMODULO (ps_i->cycle, ps->ii);
1907 if (! ps_i->prev_in_row)
1909 if (ps_i != ps->rows[row])
1910 return false;
1912 ps->rows[row] = ps_i->next_in_row;
1913 if (ps->rows[row])
1914 ps->rows[row]->prev_in_row = NULL;
1916 else
1918 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
1919 if (ps_i->next_in_row)
1920 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
1922 free (ps_i);
1923 return true;
1926 /* Unlike what literature describes for modulo scheduling (which focuses
1927 on VLIW machines) the order of the instructions inside a cycle is
1928 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
1929 where the current instruction should go relative to the already
1930 scheduled instructions in the given cycle. Go over these
1931 instructions and find the first possible column to put it in. */
1932 static bool
1933 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1934 sbitmap must_precede, sbitmap must_follow)
1936 ps_insn_ptr next_ps_i;
1937 ps_insn_ptr first_must_follow = NULL;
1938 ps_insn_ptr last_must_precede = NULL;
1939 int row;
1941 if (! ps_i)
1942 return false;
1944 row = SMODULO (ps_i->cycle, ps->ii);
1946 /* Find the first must follow and the last must precede
1947 and insert the node immediately after the must precede
1948 but make sure that it there is no must follow after it. */
1949 for (next_ps_i = ps->rows[row];
1950 next_ps_i;
1951 next_ps_i = next_ps_i->next_in_row)
1953 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
1954 && ! first_must_follow)
1955 first_must_follow = next_ps_i;
1956 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
1958 /* If we have already met a node that must follow, then
1959 there is no possible column. */
1960 if (first_must_follow)
1961 return false;
1962 else
1963 last_must_precede = next_ps_i;
1967 /* Now insert the node after INSERT_AFTER_PSI. */
1969 if (! last_must_precede)
1971 ps_i->next_in_row = ps->rows[row];
1972 ps_i->prev_in_row = NULL;
1973 if (ps_i->next_in_row)
1974 ps_i->next_in_row->prev_in_row = ps_i;
1975 ps->rows[row] = ps_i;
1977 else
1979 ps_i->next_in_row = last_must_precede->next_in_row;
1980 last_must_precede->next_in_row = ps_i;
1981 ps_i->prev_in_row = last_must_precede;
1982 if (ps_i->next_in_row)
1983 ps_i->next_in_row->prev_in_row = ps_i;
1986 return true;
1989 /* Advances the PS_INSN one column in its current row; returns false
1990 in failure and true in success. Bit N is set in MUST_FOLLOW if
1991 the node with cuid N must be come after the node pointed to by
1992 PS_I when scheduled in the same cycle. */
1993 static int
1994 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1995 sbitmap must_follow)
1997 ps_insn_ptr prev, next;
1998 int row;
1999 ddg_node_ptr next_node;
2001 if (!ps || !ps_i)
2002 return false;
2004 row = SMODULO (ps_i->cycle, ps->ii);
2006 if (! ps_i->next_in_row)
2007 return false;
2009 next_node = ps_i->next_in_row->node;
2011 /* Check if next_in_row is dependent on ps_i, both having same sched
2012 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
2013 if (TEST_BIT (must_follow, next_node->cuid))
2014 return false;
2016 /* Advance PS_I over its next_in_row in the doubly linked list. */
2017 prev = ps_i->prev_in_row;
2018 next = ps_i->next_in_row;
2020 if (ps_i == ps->rows[row])
2021 ps->rows[row] = next;
2023 ps_i->next_in_row = next->next_in_row;
2025 if (next->next_in_row)
2026 next->next_in_row->prev_in_row = ps_i;
2028 next->next_in_row = ps_i;
2029 ps_i->prev_in_row = next;
2031 next->prev_in_row = prev;
2032 if (prev)
2033 prev->next_in_row = next;
2035 return true;
2038 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2039 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2040 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2041 before/after (respectively) the node pointed to by PS_I when scheduled
2042 in the same cycle. */
2043 static ps_insn_ptr
2044 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2045 sbitmap must_precede, sbitmap must_follow)
2047 ps_insn_ptr ps_i;
2048 int rest_count = 1;
2049 int row = SMODULO (cycle, ps->ii);
2051 if (ps->rows[row]
2052 && ps->rows[row]->row_rest_count >= issue_rate)
2053 return NULL;
2055 if (ps->rows[row])
2056 rest_count += ps->rows[row]->row_rest_count;
2058 ps_i = create_ps_insn (node, rest_count, cycle);
2060 /* Finds and inserts PS_I according to MUST_FOLLOW and
2061 MUST_PRECEDE. */
2062 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2064 free (ps_i);
2065 return NULL;
2068 return ps_i;
2071 /* Advance time one cycle. Assumes DFA is being used. */
2072 static void
2073 advance_one_cycle (void)
2075 if (targetm.sched.dfa_pre_cycle_insn)
2076 state_transition (curr_state,
2077 (*targetm.sched.dfa_pre_cycle_insn) ());
2079 state_transition (curr_state, NULL);
2081 if (targetm.sched.dfa_post_cycle_insn)
2082 state_transition (curr_state,
2083 (*targetm.sched.dfa_post_cycle_insn) ());
2086 /* Checks if PS has resource conflicts according to DFA, starting from
2087 FROM cycle to TO cycle; returns true if there are conflicts and false
2088 if there are no conflicts. Assumes DFA is being used. */
2089 static int
2090 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2092 int cycle;
2094 state_reset (curr_state);
2096 for (cycle = from; cycle <= to; cycle++)
2098 ps_insn_ptr crr_insn;
2099 /* Holds the remaining issue slots in the current row. */
2100 int can_issue_more = issue_rate;
2102 /* Walk through the DFA for the current row. */
2103 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2104 crr_insn;
2105 crr_insn = crr_insn->next_in_row)
2107 rtx insn = crr_insn->node->insn;
2109 if (!INSN_P (insn))
2110 continue;
2112 /* Check if there is room for the current insn. */
2113 if (!can_issue_more || state_dead_lock_p (curr_state))
2114 return true;
2116 /* Update the DFA state and return with failure if the DFA found
2117 recource conflicts. */
2118 if (state_transition (curr_state, insn) >= 0)
2119 return true;
2121 if (targetm.sched.variable_issue)
2122 can_issue_more =
2123 (*targetm.sched.variable_issue) (sched_dump, sched_verbose,
2124 insn, can_issue_more);
2125 /* A naked CLOBBER or USE generates no instruction, so don't
2126 let them consume issue slots. */
2127 else if (GET_CODE (PATTERN (insn)) != USE
2128 && GET_CODE (PATTERN (insn)) != CLOBBER)
2129 can_issue_more--;
2132 /* Advance the DFA to the next cycle. */
2133 advance_one_cycle ();
2135 return false;
2138 /* Checks if the given node causes resource conflicts when added to PS at
2139 cycle C. If not the node is added to PS and returned; otherwise zero
2140 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2141 cuid N must be come before/after (respectively) the node pointed to by
2142 PS_I when scheduled in the same cycle. */
2143 static ps_insn_ptr
2144 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2145 int c, sbitmap must_precede,
2146 sbitmap must_follow)
2148 int has_conflicts = 0;
2149 ps_insn_ptr ps_i;
2151 /* First add the node to the PS, if this succeeds check for
2152 conflicts, trying different issue slots in the same row. */
2153 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2154 return NULL; /* Failed to insert the node at the given cycle. */
2156 has_conflicts = ps_has_conflicts (ps, c, c)
2157 || (ps->history > 0
2158 && ps_has_conflicts (ps,
2159 c - ps->history,
2160 c + ps->history));
2162 /* Try different issue slots to find one that the given node can be
2163 scheduled in without conflicts. */
2164 while (has_conflicts)
2166 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2167 break;
2168 has_conflicts = ps_has_conflicts (ps, c, c)
2169 || (ps->history > 0
2170 && ps_has_conflicts (ps,
2171 c - ps->history,
2172 c + ps->history));
2175 if (has_conflicts)
2177 remove_node_from_ps (ps, ps_i);
2178 return NULL;
2181 ps->min_cycle = MIN (ps->min_cycle, c);
2182 ps->max_cycle = MAX (ps->max_cycle, c);
2183 return ps_i;
2186 /* Rotate the rows of PS such that insns scheduled at time
2187 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2188 static void
2189 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2191 int i, row, backward_rotates;
2192 int last_row = ps->ii - 1;
2194 if (start_cycle == 0)
2195 return;
2197 backward_rotates = SMODULO (start_cycle, ps->ii);
2199 /* Revisit later and optimize this into a single loop. */
2200 for (i = 0; i < backward_rotates; i++)
2202 ps_insn_ptr first_row = ps->rows[0];
2204 for (row = 0; row < last_row; row++)
2205 ps->rows[row] = ps->rows[row+1];
2207 ps->rows[last_row] = first_row;
2210 ps->max_cycle -= start_cycle;
2211 ps->min_cycle -= start_cycle;
2214 #endif /* INSN_SCHEDULING*/