2005-01-22 Thomas Koenig <Thomas.Koenig@online.de>
[official-gcc.git] / gcc / modulo-sched.c
blob57879baf799b10e48a8ba784a671a39c8689d93f
1 /* Swing Modulo Scheduling implementation.
2 Copyright (C) 2004
3 Free Software Foundation, Inc.
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "hard-reg-set.h"
32 #include "regs.h"
33 #include "function.h"
34 #include "flags.h"
35 #include "insn-config.h"
36 #include "insn-attr.h"
37 #include "except.h"
38 #include "toplev.h"
39 #include "recog.h"
40 #include "sched-int.h"
41 #include "target.h"
42 #include "cfglayout.h"
43 #include "cfgloop.h"
44 #include "cfghooks.h"
45 #include "expr.h"
46 #include "params.h"
47 #include "gcov-io.h"
48 #include "df.h"
49 #include "ddg.h"
51 #ifdef INSN_SCHEDULING
53 /* This file contains the implementation of the Swing Modulo Scheduler,
54 described in the following references:
55 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
56 Lifetime--sensitive modulo scheduling in a production environment.
57 IEEE Trans. on Comps., 50(3), March 2001
58 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
59 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
60 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
62 The basic structure is:
63 1. Build a data-dependence graph (DDG) for each loop.
64 2. Use the DDG to order the insns of a loop (not in topological order
65 necessarily, but rather) trying to place each insn after all its
66 predecessors _or_ after all its successors.
67 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
68 4. Use the ordering to perform list-scheduling of the loop:
69 1. Set II = MII. We will try to schedule the loop within II cycles.
70 2. Try to schedule the insns one by one according to the ordering.
71 For each insn compute an interval of cycles by considering already-
72 scheduled preds and succs (and associated latencies); try to place
73 the insn in the cycles of this window checking for potential
74 resource conflicts (using the DFA interface).
75 Note: this is different from the cycle-scheduling of schedule_insns;
76 here the insns are not scheduled monotonically top-down (nor bottom-
77 up).
78 3. If failed in scheduling all insns - bump II++ and try again, unless
79 II reaches an upper bound MaxII, in which case report failure.
80 5. If we succeeded in scheduling the loop within II cycles, we now
81 generate prolog and epilog, decrease the counter of the loop, and
82 perform modulo variable expansion for live ranges that span more than
83 II cycles (i.e. use register copies to prevent a def from overwriting
84 itself before reaching the use).
88 /* This page defines partial-schedule structures and functions for
89 modulo scheduling. */
91 typedef struct partial_schedule *partial_schedule_ptr;
92 typedef struct ps_insn *ps_insn_ptr;
94 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
95 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
97 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
98 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
100 /* Perform signed modulo, always returning a non-negative value. */
101 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
103 /* The number of different iterations the nodes in ps span, assuming
104 the stage boundaries are placed efficiently. */
105 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
106 + 1 + (ps)->ii - 1) / (ps)->ii)
108 #define CFG_HOOKS cfg_layout_rtl_cfg_hooks
110 /* A single instruction in the partial schedule. */
111 struct ps_insn
113 /* The corresponding DDG_NODE. */
114 ddg_node_ptr node;
116 /* The (absolute) cycle in which the PS instruction is scheduled.
117 Same as SCHED_TIME (node). */
118 int cycle;
120 /* The next/prev PS_INSN in the same row. */
121 ps_insn_ptr next_in_row,
122 prev_in_row;
124 /* The number of nodes in the same row that come after this node. */
125 int row_rest_count;
128 /* Holds the partial schedule as an array of II rows. Each entry of the
129 array points to a linked list of PS_INSNs, which represents the
130 instructions that are scheduled for that row. */
131 struct partial_schedule
133 int ii; /* Number of rows in the partial schedule. */
134 int history; /* Threshold for conflict checking using DFA. */
136 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
137 ps_insn_ptr *rows;
139 /* The earliest absolute cycle of an insn in the partial schedule. */
140 int min_cycle;
142 /* The latest absolute cycle of an insn in the partial schedule. */
143 int max_cycle;
145 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
149 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
150 static void free_partial_schedule (partial_schedule_ptr);
151 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
152 void print_partial_schedule (partial_schedule_ptr, FILE *);
153 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
154 ddg_node_ptr node, int cycle,
155 sbitmap must_precede,
156 sbitmap must_follow);
157 static void rotate_partial_schedule (partial_schedule_ptr, int);
158 void set_row_column_for_ps (partial_schedule_ptr);
161 /* This page defines constants and structures for the modulo scheduling
162 driver. */
164 /* As in haifa-sched.c: */
165 /* issue_rate is the number of insns that can be scheduled in the same
166 machine cycle. It can be defined in the config/mach/mach.h file,
167 otherwise we set it to 1. */
169 static int issue_rate;
171 /* For printing statistics. */
172 static FILE *stats_file;
174 static int sms_order_nodes (ddg_ptr, int, int * result);
175 static void set_node_sched_params (ddg_ptr);
176 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int,
177 int *, FILE*);
178 static void permute_partial_schedule (partial_schedule_ptr ps, rtx last);
179 static void generate_prolog_epilog (partial_schedule_ptr, rtx, rtx, int);
180 static void duplicate_insns_of_cycles (partial_schedule_ptr ps,
181 int from_stage, int to_stage,
182 int is_prolog);
185 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
186 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
187 #define SCHED_FIRST_REG_MOVE(x) \
188 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
189 #define SCHED_NREG_MOVES(x) \
190 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
191 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
192 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
193 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
195 /* The scheduling parameters held for each node. */
196 typedef struct node_sched_params
198 int asap; /* A lower-bound on the absolute scheduling cycle. */
199 int time; /* The absolute scheduling cycle (time >= asap). */
201 /* The following field (first_reg_move) is a pointer to the first
202 register-move instruction added to handle the modulo-variable-expansion
203 of the register defined by this node. This register-move copies the
204 original register defined by the node. */
205 rtx first_reg_move;
207 /* The number of register-move instructions added, immediately preceding
208 first_reg_move. */
209 int nreg_moves;
211 int row; /* Holds time % ii. */
212 int stage; /* Holds time / ii. */
214 /* The column of a node inside the ps. If nodes u, v are on the same row,
215 u will precede v if column (u) < column (v). */
216 int column;
217 } *node_sched_params_ptr;
220 /* The following three functions are copied from the current scheduler
221 code in order to use sched_analyze() for computing the dependencies.
222 They are used when initializing the sched_info structure. */
223 static const char *
224 sms_print_insn (rtx insn, int aligned ATTRIBUTE_UNUSED)
226 static char tmp[80];
228 sprintf (tmp, "i%4d", INSN_UID (insn));
229 return tmp;
232 static int
233 contributes_to_priority (rtx next, rtx insn)
235 return BLOCK_NUM (next) == BLOCK_NUM (insn);
238 static void
239 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
240 regset cond_exec ATTRIBUTE_UNUSED,
241 regset used ATTRIBUTE_UNUSED,
242 regset set ATTRIBUTE_UNUSED)
246 static struct sched_info sms_sched_info =
248 NULL,
249 NULL,
250 NULL,
251 NULL,
252 NULL,
253 sms_print_insn,
254 contributes_to_priority,
255 compute_jump_reg_dependencies,
256 NULL, NULL,
257 NULL, NULL,
258 0, 0, 0
262 /* Return the register decremented and tested or zero if it is not a decrement
263 and branch jump insn (similar to doloop_condition_get). */
264 static rtx
265 doloop_register_get (rtx insn, rtx *comp)
267 rtx pattern, cmp, inc, reg, condition;
269 if (!JUMP_P (insn))
270 return NULL_RTX;
271 pattern = PATTERN (insn);
273 /* The canonical doloop pattern we expect is:
275 (parallel [(set (pc) (if_then_else (condition)
276 (label_ref (label))
277 (pc)))
278 (set (reg) (plus (reg) (const_int -1)))
279 (additional clobbers and uses)])
281 where condition is further restricted to be
282 (ne (reg) (const_int 1)). */
284 if (GET_CODE (pattern) != PARALLEL)
285 return NULL_RTX;
287 cmp = XVECEXP (pattern, 0, 0);
288 inc = XVECEXP (pattern, 0, 1);
289 /* Return the compare rtx. */
290 *comp = cmp;
292 /* Check for (set (reg) (something)). */
293 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
294 return NULL_RTX;
296 /* Extract loop counter register. */
297 reg = SET_DEST (inc);
299 /* Check if something = (plus (reg) (const_int -1)). */
300 if (GET_CODE (SET_SRC (inc)) != PLUS
301 || XEXP (SET_SRC (inc), 0) != reg
302 || XEXP (SET_SRC (inc), 1) != constm1_rtx)
303 return NULL_RTX;
305 /* Check for (set (pc) (if_then_else (condition)
306 (label_ref (label))
307 (pc))). */
308 if (GET_CODE (cmp) != SET
309 || SET_DEST (cmp) != pc_rtx
310 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
311 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
312 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
313 return NULL_RTX;
315 /* Extract loop termination condition. */
316 condition = XEXP (SET_SRC (cmp), 0);
318 /* Check if condition = (ne (reg) (const_int 1)), which is more
319 restrictive than the check in doloop_condition_get:
320 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
321 || GET_CODE (XEXP (condition, 1)) != CONST_INT). */
322 if (GET_CODE (condition) != NE
323 || XEXP (condition, 1) != const1_rtx)
324 return NULL_RTX;
326 if (XEXP (condition, 0) == reg)
327 return reg;
329 return NULL_RTX;
332 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
333 that the number of iterations is a compile-time constant. If so,
334 return the rtx that sets COUNT_REG to a constant, and set COUNT to
335 this constant. Otherwise return 0. */
336 static rtx
337 const_iteration_count (rtx count_reg, basic_block pre_header,
338 HOST_WIDEST_INT * count)
340 rtx insn;
341 rtx head, tail;
342 get_block_head_tail (pre_header->index, &head, &tail);
344 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
345 if (INSN_P (insn) && single_set (insn) &&
346 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
348 rtx pat = single_set (insn);
350 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
352 *count = INTVAL (SET_SRC (pat));
353 return insn;
356 return NULL_RTX;
359 return NULL_RTX;
362 /* A very simple resource-based lower bound on the initiation interval.
363 ??? Improve the accuracy of this bound by considering the
364 utilization of various units. */
365 static int
366 res_MII (ddg_ptr g)
368 return (g->num_nodes / issue_rate);
372 /* Points to the array that contains the sched data for each node. */
373 static node_sched_params_ptr node_sched_params;
375 /* Allocate sched_params for each node and initialize it. Assumes that
376 the aux field of each node contain the asap bound (computed earlier),
377 and copies it into the sched_params field. */
378 static void
379 set_node_sched_params (ddg_ptr g)
381 int i;
383 /* Allocate for each node in the DDG a place to hold the "sched_data". */
384 /* Initialize ASAP/ALAP/HIGHT to zero. */
385 node_sched_params = (node_sched_params_ptr)
386 xcalloc (g->num_nodes,
387 sizeof (struct node_sched_params));
389 /* Set the pointer of the general data of the node to point to the
390 appropriate sched_params structure. */
391 for (i = 0; i < g->num_nodes; i++)
393 /* Watch out for aliasing problems? */
394 node_sched_params[i].asap = g->nodes[i].aux.count;
395 g->nodes[i].aux.info = &node_sched_params[i];
399 static void
400 print_node_sched_params (FILE * dump_file, int num_nodes)
402 int i;
404 for (i = 0; i < num_nodes; i++)
406 node_sched_params_ptr nsp = &node_sched_params[i];
407 rtx reg_move = nsp->first_reg_move;
408 int j;
410 fprintf (dump_file, "Node %d:\n", i);
411 fprintf (dump_file, " asap = %d:\n", nsp->asap);
412 fprintf (dump_file, " time = %d:\n", nsp->time);
413 fprintf (dump_file, " nreg_moves = %d:\n", nsp->nreg_moves);
414 for (j = 0; j < nsp->nreg_moves; j++)
416 fprintf (dump_file, " reg_move = ");
417 print_rtl_single (dump_file, reg_move);
418 reg_move = PREV_INSN (reg_move);
423 /* Calculate an upper bound for II. SMS should not schedule the loop if it
424 requires more cycles than this bound. Currently set to the sum of the
425 longest latency edge for each node. Reset based on experiments. */
426 static int
427 calculate_maxii (ddg_ptr g)
429 int i;
430 int maxii = 0;
432 for (i = 0; i < g->num_nodes; i++)
434 ddg_node_ptr u = &g->nodes[i];
435 ddg_edge_ptr e;
436 int max_edge_latency = 0;
438 for (e = u->out; e; e = e->next_out)
439 max_edge_latency = MAX (max_edge_latency, e->latency);
441 maxii += max_edge_latency;
443 return maxii;
447 /* Given the partial schedule, generate register moves when the length
448 of the register live range is more than ii; the number of moves is
449 determined according to the following equation:
450 SCHED_TIME (use) - SCHED_TIME (def) { 1 broken loop-carried
451 nreg_moves = ----------------------------------- - { dependence.
452 ii { 0 if not.
453 This handles the modulo-variable-expansions (mve's) needed for the ps. */
454 static void
455 generate_reg_moves (partial_schedule_ptr ps)
457 ddg_ptr g = ps->g;
458 int ii = ps->ii;
459 int i;
461 for (i = 0; i < g->num_nodes; i++)
463 ddg_node_ptr u = &g->nodes[i];
464 ddg_edge_ptr e;
465 int nreg_moves = 0, i_reg_move;
466 sbitmap *uses_of_defs;
467 rtx last_reg_move;
468 rtx prev_reg, old_reg;
470 /* Compute the number of reg_moves needed for u, by looking at life
471 ranges started at u (excluding self-loops). */
472 for (e = u->out; e; e = e->next_out)
473 if (e->type == TRUE_DEP && e->dest != e->src)
475 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
477 /* If dest precedes src in the schedule of the kernel, then dest
478 will read before src writes and we can save one reg_copy. */
479 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
480 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
481 nreg_moves4e--;
483 nreg_moves = MAX (nreg_moves, nreg_moves4e);
486 if (nreg_moves == 0)
487 continue;
489 /* Every use of the register defined by node may require a different
490 copy of this register, depending on the time the use is scheduled.
491 Set a bitmap vector, telling which nodes use each copy of this
492 register. */
493 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
494 sbitmap_vector_zero (uses_of_defs, nreg_moves);
495 for (e = u->out; e; e = e->next_out)
496 if (e->type == TRUE_DEP && e->dest != e->src)
498 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
500 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
501 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
502 dest_copy--;
504 if (dest_copy)
505 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
508 /* Now generate the reg_moves, attaching relevant uses to them. */
509 SCHED_NREG_MOVES (u) = nreg_moves;
510 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
511 last_reg_move = u->insn;
513 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
515 int i_use;
516 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
517 rtx reg_move = gen_move_insn (new_reg, prev_reg);
519 add_insn_before (reg_move, last_reg_move);
520 last_reg_move = reg_move;
522 if (!SCHED_FIRST_REG_MOVE (u))
523 SCHED_FIRST_REG_MOVE (u) = reg_move;
525 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use,
526 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg));
528 prev_reg = new_reg;
533 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
534 of SCHED_ROW and SCHED_STAGE. */
535 static void
536 normalize_sched_times (partial_schedule_ptr ps)
538 int i;
539 ddg_ptr g = ps->g;
540 int amount = PS_MIN_CYCLE (ps);
541 int ii = ps->ii;
543 for (i = 0; i < g->num_nodes; i++)
545 ddg_node_ptr u = &g->nodes[i];
546 int normalized_time = SCHED_TIME (u) - amount;
548 if (normalized_time < 0)
549 abort ();
551 SCHED_TIME (u) = normalized_time;
552 SCHED_ROW (u) = normalized_time % ii;
553 SCHED_STAGE (u) = normalized_time / ii;
557 /* Set SCHED_COLUMN of each node according to its position in PS. */
558 static void
559 set_columns_for_ps (partial_schedule_ptr ps)
561 int row;
563 for (row = 0; row < ps->ii; row++)
565 ps_insn_ptr cur_insn = ps->rows[row];
566 int column = 0;
568 for (; cur_insn; cur_insn = cur_insn->next_in_row)
569 SCHED_COLUMN (cur_insn->node) = column++;
573 /* Permute the insns according to their order in PS, from row 0 to
574 row ii-1, and position them right before LAST. This schedules
575 the insns of the loop kernel. */
576 static void
577 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
579 int ii = ps->ii;
580 int row;
581 ps_insn_ptr ps_ij;
583 for (row = 0; row < ii ; row++)
584 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
585 if (PREV_INSN (last) != ps_ij->node->insn)
586 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
587 PREV_INSN (last));
590 /* Used to generate the prologue & epilogue. Duplicate the subset of
591 nodes whose stages are between FROM_STAGE and TO_STAGE (inclusive
592 of both), together with a prefix/suffix of their reg_moves. */
593 static void
594 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
595 int to_stage, int for_prolog)
597 int row;
598 ps_insn_ptr ps_ij;
600 for (row = 0; row < ps->ii; row++)
601 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
603 ddg_node_ptr u_node = ps_ij->node;
604 int j, i_reg_moves;
605 rtx reg_move = NULL_RTX;
607 if (for_prolog)
609 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
610 number of reg_moves starting with the second occurrence of
611 u_node, which is generated if its SCHED_STAGE <= to_stage. */
612 i_reg_moves = to_stage - SCHED_STAGE (u_node);
613 i_reg_moves = MAX (i_reg_moves, 0);
614 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
616 /* The reg_moves start from the *first* reg_move backwards. */
617 if (i_reg_moves)
619 reg_move = SCHED_FIRST_REG_MOVE (u_node);
620 for (j = 1; j < i_reg_moves; j++)
621 reg_move = PREV_INSN (reg_move);
624 else /* It's for the epilog. */
626 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
627 starting to decrease one stage after u_node no longer occurs;
628 that is, generate all reg_moves until
629 SCHED_STAGE (u_node) == from_stage - 1. */
630 i_reg_moves = SCHED_NREG_MOVES (u_node)
631 - (from_stage - SCHED_STAGE (u_node) - 1);
632 i_reg_moves = MAX (i_reg_moves, 0);
633 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
635 /* The reg_moves start from the *last* reg_move forwards. */
636 if (i_reg_moves)
638 reg_move = SCHED_FIRST_REG_MOVE (u_node);
639 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
640 reg_move = PREV_INSN (reg_move);
644 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
645 emit_insn (copy_rtx (PATTERN (reg_move)));
647 if (SCHED_STAGE (u_node) >= from_stage
648 && SCHED_STAGE (u_node) <= to_stage)
649 duplicate_insn_chain (u_node->first_note, u_node->insn);
654 /* Generate the instructions (including reg_moves) for prolog & epilog. */
655 static void
656 generate_prolog_epilog (partial_schedule_ptr ps, rtx orig_loop_beg,
657 rtx orig_loop_end, int unknown_count)
659 int i;
660 int last_stage = PS_STAGE_COUNT (ps) - 1;
661 edge e;
662 rtx c_reg = NULL_RTX;
663 rtx cmp = NULL_RTX;
664 rtx precond_jump = NULL_RTX;
665 rtx precond_exit_label = NULL_RTX;
666 rtx precond_exit_label_insn = NULL_RTX;
667 rtx last_epilog_insn = NULL_RTX;
668 rtx loop_exit_label = NULL_RTX;
669 rtx loop_exit_label_insn = NULL_RTX;
670 rtx orig_loop_bct = NULL_RTX;
672 /* Loop header edge. */
673 e = EDGE_PRED (ps->g->bb, 0);
674 if (e->src == ps->g->bb)
675 e = EDGE_PRED (ps->g->bb, 1);
677 /* Generate the prolog, inserting its insns on the loop-entry edge. */
678 start_sequence ();
680 /* This is the place where we want to insert the precondition. */
681 if (unknown_count)
682 precond_jump = emit_note (NOTE_INSN_DELETED);
684 for (i = 0; i < last_stage; i++)
685 duplicate_insns_of_cycles (ps, 0, i, 1);
687 /* No need to call insert_insn_on_edge; we prepared the sequence. */
688 e->insns.r = get_insns ();
689 end_sequence ();
691 /* Generate the epilog, inserting its insns on the loop-exit edge. */
692 start_sequence ();
694 for (i = 0; i < last_stage; i++)
695 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0);
697 last_epilog_insn = emit_note (NOTE_INSN_DELETED);
699 /* Emit the label where to put the original loop code. */
700 if (unknown_count)
702 rtx label, cond;
704 precond_exit_label = gen_label_rtx ();
705 precond_exit_label_insn = emit_label (precond_exit_label);
707 /* Put the original loop code. */
708 reorder_insns_nobb (orig_loop_beg, orig_loop_end, precond_exit_label_insn);
710 /* Change the label of the BCT to be the PRECOND_EXIT_LABEL. */
711 orig_loop_bct = get_last_insn ();
712 c_reg = doloop_register_get (orig_loop_bct, &cmp);
713 label = XEXP (SET_SRC (cmp), 1);
714 cond = XEXP (SET_SRC (cmp), 0);
716 if (! c_reg || GET_CODE (cond) != NE)
717 abort ();
719 XEXP (label, 0) = precond_exit_label;
720 JUMP_LABEL (orig_loop_bct) = precond_exit_label_insn;
721 LABEL_NUSES (precond_exit_label_insn)++;
723 /* Generate the loop exit label. */
724 loop_exit_label = gen_label_rtx ();
725 loop_exit_label_insn = emit_label (loop_exit_label);
728 e = EDGE_SUCC (ps->g->bb, 0);
729 if (e->dest == ps->g->bb)
730 e = EDGE_SUCC (ps->g->bb, 1);
732 e->insns.r = get_insns ();
733 end_sequence ();
735 commit_edge_insertions ();
737 if (unknown_count)
739 rtx precond_insns, epilog_jump, insert_after_insn;
740 basic_block loop_exit_bb = BLOCK_FOR_INSN (loop_exit_label_insn);
741 basic_block epilog_bb = BLOCK_FOR_INSN (last_epilog_insn);
742 basic_block precond_bb = BLOCK_FOR_INSN (precond_jump);
743 basic_block orig_loop_bb = BLOCK_FOR_INSN (precond_exit_label_insn);
744 edge epilog_exit_edge = EDGE_SUCC (epilog_bb, 0);
746 /* Do loop preconditioning to take care of cases were the loop count is
747 less than the stage count. Update the CFG properly. */
748 insert_after_insn = precond_jump;
749 start_sequence ();
750 c_reg = doloop_register_get (ps->g->closing_branch->insn, &cmp);
751 emit_cmp_and_jump_insns (c_reg, GEN_INT (PS_STAGE_COUNT (ps)), LT, NULL,
752 GET_MODE (c_reg), 1, precond_exit_label);
753 precond_insns = get_insns ();
754 precond_jump = get_last_insn ();
755 end_sequence ();
756 reorder_insns (precond_insns, precond_jump, insert_after_insn);
758 /* Generate a subtract instruction at the beginning of the prolog to
759 adjust the loop count by STAGE_COUNT. */
760 emit_insn_after (gen_sub2_insn (c_reg, GEN_INT (PS_STAGE_COUNT (ps) - 1)),
761 precond_jump);
762 update_bb_for_insn (precond_bb);
763 delete_insn (insert_after_insn);
765 /* Update label info for the precondition jump. */
766 JUMP_LABEL (precond_jump) = precond_exit_label_insn;
767 LABEL_NUSES (precond_exit_label_insn)++;
769 /* Update the CFG. */
770 split_block (precond_bb, precond_jump);
771 make_edge (precond_bb, orig_loop_bb, 0);
773 /* Add a jump at end of the epilog to the LOOP_EXIT_LABEL to jump over the
774 original loop copy and update the CFG. */
775 epilog_jump = emit_jump_insn_after (gen_jump (loop_exit_label),
776 last_epilog_insn);
777 delete_insn (last_epilog_insn);
778 JUMP_LABEL (epilog_jump) = loop_exit_label_insn;
779 LABEL_NUSES (loop_exit_label_insn)++;
781 redirect_edge_succ (epilog_exit_edge, loop_exit_bb);
782 epilog_exit_edge->flags &= ~EDGE_FALLTHRU;
783 emit_barrier_after (BB_END (epilog_bb));
787 /* Return the line note insn preceding INSN, for debugging. Taken from
788 emit-rtl.c. */
789 static rtx
790 find_line_note (rtx insn)
792 for (; insn; insn = PREV_INSN (insn))
793 if (NOTE_P (insn)
794 && NOTE_LINE_NUMBER (insn) >= 0)
795 break;
797 return insn;
800 /* Main entry point, perform SMS scheduling on the loops of the function
801 that consist of single basic blocks. */
802 void
803 sms_schedule (FILE *dump_file)
805 static int passes = 0;
806 rtx insn;
807 ddg_ptr *g_arr, g;
808 basic_block bb, pre_header = NULL;
809 int * node_order;
810 int maxii;
811 int i;
812 partial_schedule_ptr ps;
813 int max_bb_index = last_basic_block;
814 struct df *df;
816 stats_file = dump_file;
818 /* Initialize issue_rate. */
819 if (targetm.sched.issue_rate)
821 int temp = reload_completed;
823 reload_completed = 1;
824 issue_rate = (*targetm.sched.issue_rate) ();
825 reload_completed = temp;
827 else
828 issue_rate = 1;
830 /* Initialize the scheduler. */
831 current_sched_info = &sms_sched_info;
832 sched_init (NULL);
834 /* Init Data Flow analysis, to be used in interloop dep calculation. */
835 df = df_init ();
836 df_analyze (df, 0, DF_ALL);
838 /* Allocate memory to hold the DDG array. */
839 g_arr = xcalloc (max_bb_index, sizeof (ddg_ptr));
841 /* Build DDGs for all the relevant loops and hold them in G_ARR
842 indexed by the loop BB index. */
843 FOR_EACH_BB (bb)
845 rtx head, tail;
846 rtx count_reg, comp;
847 edge e, pre_header_edge;
849 if (bb->index < 0)
850 continue;
852 /* Check if bb has two successors, one being itself. */
853 if (EDGE_COUNT (bb->succs) != 2)
854 continue;
856 if (EDGE_SUCC (bb, 0)->dest != bb && EDGE_SUCC (bb, 1)->dest != bb)
857 continue;
859 if ((EDGE_SUCC (bb, 0)->flags & EDGE_COMPLEX)
860 || (EDGE_SUCC (bb, 1)->flags & EDGE_COMPLEX))
861 continue;
863 /* Check if bb has two predecessors, one being itself. */
864 if (EDGE_COUNT (bb->preds) != 2)
865 continue;
867 if (EDGE_PRED (bb, 0)->src != bb && EDGE_PRED (bb, 1)->src != bb)
868 continue;
870 if ((EDGE_PRED (bb, 0)->flags & EDGE_COMPLEX)
871 || (EDGE_PRED (bb, 1)->flags & EDGE_COMPLEX))
872 continue;
874 /* For debugging. */
875 if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1))
877 if (dump_file)
878 fprintf (dump_file, "SMS reached MAX_PASSES... \n");
879 break;
882 get_block_head_tail (bb->index, &head, &tail);
883 pre_header_edge = EDGE_PRED (bb, 0);
884 if (EDGE_PRED (bb, 0)->src != bb)
885 pre_header_edge = EDGE_PRED (bb, 1);
887 /* Perfrom SMS only on loops that their average count is above threshold. */
888 if (bb->count < pre_header_edge->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)
890 if (stats_file)
892 rtx line_note = find_line_note (tail);
894 if (line_note)
896 expanded_location xloc;
897 NOTE_EXPANDED_LOCATION (xloc, line_note);
898 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
899 xloc.file, xloc.line);
901 fprintf (stats_file, "SMS single-bb-loop\n");
902 if (profile_info && flag_branch_probabilities)
904 fprintf (stats_file, "SMS loop-count ");
905 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
906 (HOST_WIDEST_INT) bb->count);
907 fprintf (stats_file, "\n");
908 fprintf (stats_file, "SMS preheader-count ");
909 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
910 (HOST_WIDEST_INT) pre_header_edge->count);
911 fprintf (stats_file, "\n");
912 fprintf (stats_file, "SMS profile-sum-max ");
913 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
914 (HOST_WIDEST_INT) profile_info->sum_max);
915 fprintf (stats_file, "\n");
918 continue;
921 /* Make sure this is a doloop. */
922 if ( !(count_reg = doloop_register_get (tail, &comp)))
923 continue;
925 e = EDGE_PRED (bb, 0);
926 if (e->src == bb)
927 pre_header = EDGE_PRED (bb, 1)->src;
928 else
929 pre_header = e->src;
931 /* Don't handle BBs with calls or barriers, or !single_set insns. */
932 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
933 if (CALL_P (insn)
934 || BARRIER_P (insn)
935 || (INSN_P (insn) && !JUMP_P (insn)
936 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
937 break;
939 if (insn != NEXT_INSN (tail))
941 if (stats_file)
943 if (CALL_P (insn))
944 fprintf (stats_file, "SMS loop-with-call\n");
945 else if (BARRIER_P (insn))
946 fprintf (stats_file, "SMS loop-with-barrier\n");
947 else
948 fprintf (stats_file, "SMS loop-with-not-single-set\n");
949 print_rtl_single (stats_file, insn);
952 continue;
955 if (! (g = create_ddg (bb, df, 0)))
957 if (stats_file)
958 fprintf (stats_file, "SMS doloop\n");
959 continue;
962 g_arr[bb->index] = g;
965 /* Release Data Flow analysis data structures. */
966 df_finish (df);
968 /* Go over the built DDGs and perfrom SMS for each one of them. */
969 for (i = 0; i < max_bb_index; i++)
971 rtx head, tail;
972 rtx count_reg, count_init, comp;
973 edge pre_header_edge;
974 int mii, rec_mii;
975 int stage_count = 0;
976 HOST_WIDEST_INT loop_count = 0;
978 if (! (g = g_arr[i]))
979 continue;
981 if (dump_file)
982 print_ddg (dump_file, g);
984 get_block_head_tail (g->bb->index, &head, &tail);
986 pre_header_edge = EDGE_PRED (g->bb, 0);
987 if (EDGE_PRED (g->bb, 0)->src != g->bb)
988 pre_header_edge = EDGE_PRED (g->bb, 1);
990 if (stats_file)
992 rtx line_note = find_line_note (tail);
994 if (line_note)
996 expanded_location xloc;
997 NOTE_EXPANDED_LOCATION (xloc, line_note);
998 fprintf (stats_file, "SMS bb %s %d (file, line)\n",
999 xloc.file, xloc.line);
1001 fprintf (stats_file, "SMS single-bb-loop\n");
1002 if (profile_info && flag_branch_probabilities)
1004 fprintf (stats_file, "SMS loop-count ");
1005 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1006 (HOST_WIDEST_INT) bb->count);
1007 fprintf (stats_file, "\n");
1008 fprintf (stats_file, "SMS preheader-count ");
1009 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1010 (HOST_WIDEST_INT) pre_header_edge->count);
1011 fprintf (stats_file, "\n");
1012 fprintf (stats_file, "SMS profile-sum-max ");
1013 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC,
1014 (HOST_WIDEST_INT) profile_info->sum_max);
1015 fprintf (stats_file, "\n");
1017 fprintf (stats_file, "SMS doloop\n");
1018 fprintf (stats_file, "SMS built-ddg %d\n", g->num_nodes);
1019 fprintf (stats_file, "SMS num-loads %d\n", g->num_loads);
1020 fprintf (stats_file, "SMS num-stores %d\n", g->num_stores);
1023 /* Make sure this is a doloop. */
1024 if ( !(count_reg = doloop_register_get (tail, &comp)))
1025 abort ();
1027 /* This should be NULL_RTX if the count is unknown at compile time. */
1028 count_init = const_iteration_count (count_reg, pre_header, &loop_count);
1030 if (stats_file && count_init)
1032 fprintf (stats_file, "SMS const-doloop ");
1033 fprintf (stats_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
1034 fprintf (stats_file, "\n");
1037 node_order = (int *) xmalloc (sizeof (int) * g->num_nodes);
1039 mii = 1; /* Need to pass some estimate of mii. */
1040 rec_mii = sms_order_nodes (g, mii, node_order);
1041 mii = MAX (res_MII (g), rec_mii);
1042 maxii = (calculate_maxii (g) * SMS_MAX_II_FACTOR) / 100;
1044 if (stats_file)
1045 fprintf (stats_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
1046 rec_mii, mii, maxii);
1048 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
1049 ASAP. */
1050 set_node_sched_params (g);
1052 ps = sms_schedule_by_order (g, mii, maxii, node_order, dump_file);
1054 if (ps)
1055 stage_count = PS_STAGE_COUNT (ps);
1057 if (stage_count == 0 || (count_init && (stage_count > loop_count)))
1059 if (dump_file)
1060 fprintf (dump_file, "SMS failed... \n");
1061 if (stats_file)
1062 fprintf (stats_file, "SMS sched-failed %d\n", stage_count);
1064 else
1066 rtx orig_loop_beg = NULL_RTX;
1067 rtx orig_loop_end = NULL_RTX;
1069 if (stats_file)
1071 fprintf (stats_file,
1072 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
1073 stage_count);
1074 print_partial_schedule (ps, dump_file);
1075 fprintf (dump_file,
1076 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
1077 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
1080 /* Save the original loop if we want to do loop preconditioning in
1081 case the BCT count is not known. */
1082 if (! count_init)
1084 int i;
1086 start_sequence ();
1087 /* Copy the original loop code before modifying it -
1088 so we can use it later. */
1089 for (i = 0; i < ps->g->num_nodes; i++)
1090 duplicate_insn_chain (ps->g->nodes[i].first_note,
1091 ps->g->nodes[i].insn);
1093 orig_loop_beg = get_insns ();
1094 orig_loop_end = get_last_insn ();
1095 end_sequence ();
1097 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
1098 the closing_branch was scheduled and should appear in the last (ii-1)
1099 row. Otherwise, we are free to schedule the branch, and we let nodes
1100 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
1101 row; this should reduce stage_count to minimum. */
1102 normalize_sched_times (ps);
1103 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
1104 set_columns_for_ps (ps);
1106 permute_partial_schedule (ps, g->closing_branch->first_note);
1108 /* Mark this loop as software pipelined so the later
1109 scheduling passes doesn't touch it. */
1110 if (! flag_resched_modulo_sched)
1111 g->bb->flags |= BB_DISABLE_SCHEDULE;
1113 generate_reg_moves (ps);
1114 if (dump_file)
1115 print_node_sched_params (dump_file, g->num_nodes);
1117 /* Set new iteration count of loop kernel. */
1118 if (count_init)
1119 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1120 - stage_count + 1);
1122 /* Generate prolog and epilog. */
1123 generate_prolog_epilog (ps, orig_loop_beg, orig_loop_end,
1124 count_init ? 0 : 1);
1126 free_partial_schedule (ps);
1127 free (node_sched_params);
1128 free (node_order);
1129 free_ddg (g);
1132 /* Release scheduler data, needed until now because of DFA. */
1133 sched_finish ();
1136 /* The SMS scheduling algorithm itself
1137 -----------------------------------
1138 Input: 'O' an ordered list of insns of a loop.
1139 Output: A scheduling of the loop - kernel, prolog, and epilogue.
1141 'Q' is the empty Set
1142 'PS' is the partial schedule; it holds the currently scheduled nodes with
1143 their cycle/slot.
1144 'PSP' previously scheduled predecessors.
1145 'PSS' previously scheduled successors.
1146 't(u)' the cycle where u is scheduled.
1147 'l(u)' is the latency of u.
1148 'd(v,u)' is the dependence distance from v to u.
1149 'ASAP(u)' the earliest time at which u could be scheduled as computed in
1150 the node ordering phase.
1151 'check_hardware_resources_conflicts(u, PS, c)'
1152 run a trace around cycle/slot through DFA model
1153 to check resource conflicts involving instruction u
1154 at cycle c given the partial schedule PS.
1155 'add_to_partial_schedule_at_time(u, PS, c)'
1156 Add the node/instruction u to the partial schedule
1157 PS at time c.
1158 'calculate_register_pressure(PS)'
1159 Given a schedule of instructions, calculate the register
1160 pressure it implies. One implementation could be the
1161 maximum number of overlapping live ranges.
1162 'maxRP' The maximum allowed register pressure, it is usually derived from the number
1163 registers available in the hardware.
1165 1. II = MII.
1166 2. PS = empty list
1167 3. for each node u in O in pre-computed order
1168 4. if (PSP(u) != Q && PSS(u) == Q) then
1169 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1170 6. start = Early_start; end = Early_start + II - 1; step = 1
1171 11. else if (PSP(u) == Q && PSS(u) != Q) then
1172 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1173 13. start = Late_start; end = Late_start - II + 1; step = -1
1174 14. else if (PSP(u) != Q && PSS(u) != Q) then
1175 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
1176 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
1177 17. start = Early_start;
1178 18. end = min(Early_start + II - 1 , Late_start);
1179 19. step = 1
1180 20. else "if (PSP(u) == Q && PSS(u) == Q)"
1181 21. start = ASAP(u); end = start + II - 1; step = 1
1182 22. endif
1184 23. success = false
1185 24. for (c = start ; c != end ; c += step)
1186 25. if check_hardware_resources_conflicts(u, PS, c) then
1187 26. add_to_partial_schedule_at_time(u, PS, c)
1188 27. success = true
1189 28. break
1190 29. endif
1191 30. endfor
1192 31. if (success == false) then
1193 32. II = II + 1
1194 33. if (II > maxII) then
1195 34. finish - failed to schedule
1196 35. endif
1197 36. goto 2.
1198 37. endif
1199 38. endfor
1200 39. if (calculate_register_pressure(PS) > maxRP) then
1201 40. goto 32.
1202 41. endif
1203 42. compute epilogue & prologue
1204 43. finish - succeeded to schedule
1207 /* A limit on the number of cycles that resource conflicts can span. ??? Should
1208 be provided by DFA, and be dependent on the type of insn scheduled. Currently
1209 set to 0 to save compile time. */
1210 #define DFA_HISTORY SMS_DFA_HISTORY
1212 static partial_schedule_ptr
1213 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *dump_file)
1215 int ii = mii;
1216 int i, c, success;
1217 int try_again_with_larger_ii = true;
1218 int num_nodes = g->num_nodes;
1219 ddg_edge_ptr e;
1220 int start, end, step; /* Place together into one struct? */
1221 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
1222 sbitmap must_precede = sbitmap_alloc (num_nodes);
1223 sbitmap must_follow = sbitmap_alloc (num_nodes);
1225 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
1227 while (try_again_with_larger_ii && ii < maxii)
1229 if (dump_file)
1230 fprintf(dump_file, "Starting with ii=%d\n", ii);
1231 try_again_with_larger_ii = false;
1232 sbitmap_zero (sched_nodes);
1234 for (i = 0; i < num_nodes; i++)
1236 int u = nodes_order[i];
1237 ddg_node_ptr u_node = &g->nodes[u];
1238 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
1239 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
1240 int psp_not_empty;
1241 int pss_not_empty;
1242 rtx insn = u_node->insn;
1244 if (!INSN_P (insn))
1245 continue;
1247 if (JUMP_P (insn)) /* Closing branch handled later. */
1248 continue;
1250 /* 1. compute sched window for u (start, end, step). */
1251 psp_not_empty = sbitmap_any_common_bits (u_node_preds, sched_nodes);
1252 pss_not_empty = sbitmap_any_common_bits (u_node_succs, sched_nodes);
1254 if (psp_not_empty && !pss_not_empty)
1256 int early_start = 0;
1258 end = INT_MAX;
1259 for (e = u_node->in; e != 0; e = e->next_in)
1261 ddg_node_ptr v_node = e->src;
1262 if (TEST_BIT (sched_nodes, v_node->cuid))
1264 int node_st = SCHED_TIME (v_node)
1265 + e->latency - (e->distance * ii);
1267 early_start = MAX (early_start, node_st);
1269 if (e->data_type == MEM_DEP)
1270 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1273 start = early_start;
1274 end = MIN (end, early_start + ii);
1275 step = 1;
1278 else if (!psp_not_empty && pss_not_empty)
1280 int late_start = INT_MAX;
1282 end = INT_MIN;
1283 for (e = u_node->out; e != 0; e = e->next_out)
1285 ddg_node_ptr v_node = e->dest;
1286 if (TEST_BIT (sched_nodes, v_node->cuid))
1288 late_start = MIN (late_start,
1289 SCHED_TIME (v_node) - e->latency
1290 + (e->distance * ii));
1291 if (e->data_type == MEM_DEP)
1292 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
1295 start = late_start;
1296 end = MAX (end, late_start - ii);
1297 step = -1;
1300 else if (psp_not_empty && pss_not_empty)
1302 int early_start = 0;
1303 int late_start = INT_MAX;
1305 start = INT_MIN;
1306 end = INT_MAX;
1307 for (e = u_node->in; e != 0; e = e->next_in)
1309 ddg_node_ptr v_node = e->src;
1311 if (TEST_BIT (sched_nodes, v_node->cuid))
1313 early_start = MAX (early_start,
1314 SCHED_TIME (v_node) + e->latency
1315 - (e->distance * ii));
1316 if (e->data_type == MEM_DEP)
1317 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
1320 for (e = u_node->out; e != 0; e = e->next_out)
1322 ddg_node_ptr v_node = e->dest;
1324 if (TEST_BIT (sched_nodes, v_node->cuid))
1326 late_start = MIN (late_start,
1327 SCHED_TIME (v_node) - e->latency
1328 + (e->distance * ii));
1329 if (e->data_type == MEM_DEP)
1330 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
1333 start = MAX (start, early_start);
1334 end = MIN (end, MIN (early_start + ii, late_start + 1));
1335 step = 1;
1337 else /* psp is empty && pss is empty. */
1339 start = SCHED_ASAP (u_node);
1340 end = start + ii;
1341 step = 1;
1344 /* 2. Try scheduling u in window. */
1345 if (dump_file)
1346 fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n",
1347 u, start, end, step);
1349 /* use must_follow & must_precede bitmaps to determine order
1350 of nodes within the cycle. */
1351 sbitmap_zero (must_precede);
1352 sbitmap_zero (must_follow);
1353 for (e = u_node->in; e != 0; e = e->next_in)
1354 if (TEST_BIT (sched_nodes, e->src->cuid)
1355 && e->latency == (ii * e->distance)
1356 && start == SCHED_TIME (e->src))
1357 SET_BIT (must_precede, e->src->cuid);
1359 for (e = u_node->out; e != 0; e = e->next_out)
1360 if (TEST_BIT (sched_nodes, e->dest->cuid)
1361 && e->latency == (ii * e->distance)
1362 && end == SCHED_TIME (e->dest))
1363 SET_BIT (must_follow, e->dest->cuid);
1365 success = 0;
1366 if ((step > 0 && start < end) || (step < 0 && start > end))
1367 for (c = start; c != end; c += step)
1369 ps_insn_ptr psi;
1371 psi = ps_add_node_check_conflicts (ps, u_node, c,
1372 must_precede,
1373 must_follow);
1375 if (psi)
1377 SCHED_TIME (u_node) = c;
1378 SET_BIT (sched_nodes, u);
1379 success = 1;
1380 if (dump_file)
1381 fprintf(dump_file, "Schedule in %d\n", c);
1382 break;
1385 if (!success)
1387 /* ??? Try backtracking instead of immediately ii++? */
1388 ii++;
1389 try_again_with_larger_ii = true;
1390 reset_partial_schedule (ps, ii);
1391 break;
1393 /* ??? If (success), check register pressure estimates. */
1394 } /* Continue with next node. */
1395 } /* While try_again_with_larger_ii. */
1397 sbitmap_free (sched_nodes);
1399 if (ii >= maxii)
1401 free_partial_schedule (ps);
1402 ps = NULL;
1404 return ps;
1408 /* This page implements the algorithm for ordering the nodes of a DDG
1409 for modulo scheduling, activated through the
1410 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
1412 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
1413 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
1414 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
1415 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
1416 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
1417 #define DEPTH(x) (ASAP ((x)))
1419 typedef struct node_order_params * nopa;
1421 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
1422 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
1423 static nopa calculate_order_params (ddg_ptr, int mii);
1424 static int find_max_asap (ddg_ptr, sbitmap);
1425 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
1426 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
1428 enum sms_direction {BOTTOMUP, TOPDOWN};
1430 struct node_order_params
1432 int asap;
1433 int alap;
1434 int height;
1437 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
1438 static void
1439 check_nodes_order (int *node_order, int num_nodes)
1441 int i;
1442 sbitmap tmp = sbitmap_alloc (num_nodes);
1444 sbitmap_zero (tmp);
1446 for (i = 0; i < num_nodes; i++)
1448 int u = node_order[i];
1450 if (u >= num_nodes || u < 0 || TEST_BIT (tmp, u))
1451 abort ();
1453 SET_BIT (tmp, u);
1456 sbitmap_free (tmp);
1459 /* Order the nodes of G for scheduling and pass the result in
1460 NODE_ORDER. Also set aux.count of each node to ASAP.
1461 Return the recMII for the given DDG. */
1462 static int
1463 sms_order_nodes (ddg_ptr g, int mii, int * node_order)
1465 int i;
1466 int rec_mii = 0;
1467 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
1469 nopa nops = calculate_order_params (g, mii);
1471 order_nodes_of_sccs (sccs, node_order);
1473 if (sccs->num_sccs > 0)
1474 /* First SCC has the largest recurrence_length. */
1475 rec_mii = sccs->sccs[0]->recurrence_length;
1477 /* Save ASAP before destroying node_order_params. */
1478 for (i = 0; i < g->num_nodes; i++)
1480 ddg_node_ptr v = &g->nodes[i];
1481 v->aux.count = ASAP (v);
1484 free (nops);
1485 free_ddg_all_sccs (sccs);
1486 check_nodes_order (node_order, g->num_nodes);
1488 return rec_mii;
1491 static void
1492 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
1494 int i, pos = 0;
1495 ddg_ptr g = all_sccs->ddg;
1496 int num_nodes = g->num_nodes;
1497 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
1498 sbitmap on_path = sbitmap_alloc (num_nodes);
1499 sbitmap tmp = sbitmap_alloc (num_nodes);
1500 sbitmap ones = sbitmap_alloc (num_nodes);
1502 sbitmap_zero (prev_sccs);
1503 sbitmap_ones (ones);
1505 /* Perfrom the node ordering starting from the SCC with the highest recMII.
1506 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
1507 for (i = 0; i < all_sccs->num_sccs; i++)
1509 ddg_scc_ptr scc = all_sccs->sccs[i];
1511 /* Add nodes on paths from previous SCCs to the current SCC. */
1512 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
1513 sbitmap_a_or_b (tmp, scc->nodes, on_path);
1515 /* Add nodes on paths from the current SCC to previous SCCs. */
1516 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
1517 sbitmap_a_or_b (tmp, tmp, on_path);
1519 /* Remove nodes of previous SCCs from current extended SCC. */
1520 sbitmap_difference (tmp, tmp, prev_sccs);
1522 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1523 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
1526 /* Handle the remaining nodes that do not belong to any scc. Each call
1527 to order_nodes_in_scc handles a single connected component. */
1528 while (pos < g->num_nodes)
1530 sbitmap_difference (tmp, ones, prev_sccs);
1531 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
1533 sbitmap_free (prev_sccs);
1534 sbitmap_free (on_path);
1535 sbitmap_free (tmp);
1536 sbitmap_free (ones);
1539 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
1540 static struct node_order_params *
1541 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED)
1543 int u;
1544 int max_asap;
1545 int num_nodes = g->num_nodes;
1546 ddg_edge_ptr e;
1547 /* Allocate a place to hold ordering params for each node in the DDG. */
1548 nopa node_order_params_arr;
1550 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
1551 node_order_params_arr = (nopa) xcalloc (num_nodes,
1552 sizeof (struct node_order_params));
1554 /* Set the aux pointer of each node to point to its order_params structure. */
1555 for (u = 0; u < num_nodes; u++)
1556 g->nodes[u].aux.info = &node_order_params_arr[u];
1558 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
1559 calculate ASAP, ALAP, mobility, distance, and height for each node
1560 in the dependence (direct acyclic) graph. */
1562 /* We assume that the nodes in the array are in topological order. */
1564 max_asap = 0;
1565 for (u = 0; u < num_nodes; u++)
1567 ddg_node_ptr u_node = &g->nodes[u];
1569 ASAP (u_node) = 0;
1570 for (e = u_node->in; e; e = e->next_in)
1571 if (e->distance == 0)
1572 ASAP (u_node) = MAX (ASAP (u_node),
1573 ASAP (e->src) + e->latency);
1574 max_asap = MAX (max_asap, ASAP (u_node));
1577 for (u = num_nodes - 1; u > -1; u--)
1579 ddg_node_ptr u_node = &g->nodes[u];
1581 ALAP (u_node) = max_asap;
1582 HEIGHT (u_node) = 0;
1583 for (e = u_node->out; e; e = e->next_out)
1584 if (e->distance == 0)
1586 ALAP (u_node) = MIN (ALAP (u_node),
1587 ALAP (e->dest) - e->latency);
1588 HEIGHT (u_node) = MAX (HEIGHT (u_node),
1589 HEIGHT (e->dest) + e->latency);
1593 return node_order_params_arr;
1596 static int
1597 find_max_asap (ddg_ptr g, sbitmap nodes)
1599 int u;
1600 int max_asap = -1;
1601 int result = -1;
1603 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1605 ddg_node_ptr u_node = &g->nodes[u];
1607 if (max_asap < ASAP (u_node))
1609 max_asap = ASAP (u_node);
1610 result = u;
1613 return result;
1616 static int
1617 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
1619 int u;
1620 int max_hv = -1;
1621 int min_mob = INT_MAX;
1622 int result = -1;
1624 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1626 ddg_node_ptr u_node = &g->nodes[u];
1628 if (max_hv < HEIGHT (u_node))
1630 max_hv = HEIGHT (u_node);
1631 min_mob = MOB (u_node);
1632 result = u;
1634 else if ((max_hv == HEIGHT (u_node))
1635 && (min_mob > MOB (u_node)))
1637 min_mob = MOB (u_node);
1638 result = u;
1641 return result;
1644 static int
1645 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
1647 int u;
1648 int max_dv = -1;
1649 int min_mob = INT_MAX;
1650 int result = -1;
1652 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u,
1654 ddg_node_ptr u_node = &g->nodes[u];
1656 if (max_dv < DEPTH (u_node))
1658 max_dv = DEPTH (u_node);
1659 min_mob = MOB (u_node);
1660 result = u;
1662 else if ((max_dv == DEPTH (u_node))
1663 && (min_mob > MOB (u_node)))
1665 min_mob = MOB (u_node);
1666 result = u;
1669 return result;
1672 /* Places the nodes of SCC into the NODE_ORDER array starting
1673 at position POS, according to the SMS ordering algorithm.
1674 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
1675 the NODE_ORDER array, starting from position zero. */
1676 static int
1677 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
1678 int * node_order, int pos)
1680 enum sms_direction dir;
1681 int num_nodes = g->num_nodes;
1682 sbitmap workset = sbitmap_alloc (num_nodes);
1683 sbitmap tmp = sbitmap_alloc (num_nodes);
1684 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
1685 sbitmap predecessors = sbitmap_alloc (num_nodes);
1686 sbitmap successors = sbitmap_alloc (num_nodes);
1688 sbitmap_zero (predecessors);
1689 find_predecessors (predecessors, g, nodes_ordered);
1691 sbitmap_zero (successors);
1692 find_successors (successors, g, nodes_ordered);
1694 sbitmap_zero (tmp);
1695 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
1697 sbitmap_copy (workset, tmp);
1698 dir = BOTTOMUP;
1700 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
1702 sbitmap_copy (workset, tmp);
1703 dir = TOPDOWN;
1705 else
1707 int u;
1709 sbitmap_zero (workset);
1710 if ((u = find_max_asap (g, scc)) >= 0)
1711 SET_BIT (workset, u);
1712 dir = BOTTOMUP;
1715 sbitmap_zero (zero_bitmap);
1716 while (!sbitmap_equal (workset, zero_bitmap))
1718 int v;
1719 ddg_node_ptr v_node;
1720 sbitmap v_node_preds;
1721 sbitmap v_node_succs;
1723 if (dir == TOPDOWN)
1725 while (!sbitmap_equal (workset, zero_bitmap))
1727 v = find_max_hv_min_mob (g, workset);
1728 v_node = &g->nodes[v];
1729 node_order[pos++] = v;
1730 v_node_succs = NODE_SUCCESSORS (v_node);
1731 sbitmap_a_and_b (tmp, v_node_succs, scc);
1733 /* Don't consider the already ordered successors again. */
1734 sbitmap_difference (tmp, tmp, nodes_ordered);
1735 sbitmap_a_or_b (workset, workset, tmp);
1736 RESET_BIT (workset, v);
1737 SET_BIT (nodes_ordered, v);
1739 dir = BOTTOMUP;
1740 sbitmap_zero (predecessors);
1741 find_predecessors (predecessors, g, nodes_ordered);
1742 sbitmap_a_and_b (workset, predecessors, scc);
1744 else
1746 while (!sbitmap_equal (workset, zero_bitmap))
1748 v = find_max_dv_min_mob (g, workset);
1749 v_node = &g->nodes[v];
1750 node_order[pos++] = v;
1751 v_node_preds = NODE_PREDECESSORS (v_node);
1752 sbitmap_a_and_b (tmp, v_node_preds, scc);
1754 /* Don't consider the already ordered predecessors again. */
1755 sbitmap_difference (tmp, tmp, nodes_ordered);
1756 sbitmap_a_or_b (workset, workset, tmp);
1757 RESET_BIT (workset, v);
1758 SET_BIT (nodes_ordered, v);
1760 dir = TOPDOWN;
1761 sbitmap_zero (successors);
1762 find_successors (successors, g, nodes_ordered);
1763 sbitmap_a_and_b (workset, successors, scc);
1766 sbitmap_free (tmp);
1767 sbitmap_free (workset);
1768 sbitmap_free (zero_bitmap);
1769 sbitmap_free (predecessors);
1770 sbitmap_free (successors);
1771 return pos;
1775 /* This page contains functions for manipulating partial-schedules during
1776 modulo scheduling. */
1778 /* Create a partial schedule and allocate a memory to hold II rows. */
1779 static partial_schedule_ptr
1780 create_partial_schedule (int ii, ddg_ptr g, int history)
1782 partial_schedule_ptr ps = (partial_schedule_ptr)
1783 xmalloc (sizeof (struct partial_schedule));
1784 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
1785 ps->ii = ii;
1786 ps->history = history;
1787 ps->min_cycle = INT_MAX;
1788 ps->max_cycle = INT_MIN;
1789 ps->g = g;
1791 return ps;
1794 /* Free the PS_INSNs in rows array of the given partial schedule.
1795 ??? Consider caching the PS_INSN's. */
1796 static void
1797 free_ps_insns (partial_schedule_ptr ps)
1799 int i;
1801 for (i = 0; i < ps->ii; i++)
1803 while (ps->rows[i])
1805 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
1807 free (ps->rows[i]);
1808 ps->rows[i] = ps_insn;
1810 ps->rows[i] = NULL;
1814 /* Free all the memory allocated to the partial schedule. */
1815 static void
1816 free_partial_schedule (partial_schedule_ptr ps)
1818 if (!ps)
1819 return;
1820 free_ps_insns (ps);
1821 free (ps->rows);
1822 free (ps);
1825 /* Clear the rows array with its PS_INSNs, and create a new one with
1826 NEW_II rows. */
1827 static void
1828 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
1830 if (!ps)
1831 return;
1832 free_ps_insns (ps);
1833 if (new_ii == ps->ii)
1834 return;
1835 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
1836 * sizeof (ps_insn_ptr));
1837 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
1838 ps->ii = new_ii;
1839 ps->min_cycle = INT_MAX;
1840 ps->max_cycle = INT_MIN;
1843 /* Prints the partial schedule as an ii rows array, for each rows
1844 print the ids of the insns in it. */
1845 void
1846 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
1848 int i;
1850 for (i = 0; i < ps->ii; i++)
1852 ps_insn_ptr ps_i = ps->rows[i];
1854 fprintf (dump, "\n[CYCLE %d ]: ", i);
1855 while (ps_i)
1857 fprintf (dump, "%d, ",
1858 INSN_UID (ps_i->node->insn));
1859 ps_i = ps_i->next_in_row;
1864 /* Creates an object of PS_INSN and initializes it to the given parameters. */
1865 static ps_insn_ptr
1866 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
1868 ps_insn_ptr ps_i = xmalloc (sizeof (struct ps_insn));
1870 ps_i->node = node;
1871 ps_i->next_in_row = NULL;
1872 ps_i->prev_in_row = NULL;
1873 ps_i->row_rest_count = rest_count;
1874 ps_i->cycle = cycle;
1876 return ps_i;
1880 /* Removes the given PS_INSN from the partial schedule. Returns false if the
1881 node is not found in the partial schedule, else returns true. */
1882 static int
1883 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
1885 int row;
1887 if (!ps || !ps_i)
1888 return false;
1890 row = SMODULO (ps_i->cycle, ps->ii);
1891 if (! ps_i->prev_in_row)
1893 if (ps_i != ps->rows[row])
1894 return false;
1896 ps->rows[row] = ps_i->next_in_row;
1897 if (ps->rows[row])
1898 ps->rows[row]->prev_in_row = NULL;
1900 else
1902 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
1903 if (ps_i->next_in_row)
1904 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
1906 free (ps_i);
1907 return true;
1910 /* Unlike what literature describes for modulo scheduling (which focuses
1911 on VLIW machines) the order of the instructions inside a cycle is
1912 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
1913 where the current instruction should go relative to the already
1914 scheduled instructions in the given cycle. Go over these
1915 instructions and find the first possible column to put it in. */
1916 static bool
1917 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1918 sbitmap must_precede, sbitmap must_follow)
1920 ps_insn_ptr next_ps_i;
1921 ps_insn_ptr first_must_follow = NULL;
1922 ps_insn_ptr last_must_precede = NULL;
1923 int row;
1925 if (! ps_i)
1926 return false;
1928 row = SMODULO (ps_i->cycle, ps->ii);
1930 /* Find the first must follow and the last must precede
1931 and insert the node immediately after the must precede
1932 but make sure that it there is no must follow after it. */
1933 for (next_ps_i = ps->rows[row];
1934 next_ps_i;
1935 next_ps_i = next_ps_i->next_in_row)
1937 if (TEST_BIT (must_follow, next_ps_i->node->cuid)
1938 && ! first_must_follow)
1939 first_must_follow = next_ps_i;
1940 if (TEST_BIT (must_precede, next_ps_i->node->cuid))
1942 /* If we have already met a node that must follow, then
1943 there is no possible column. */
1944 if (first_must_follow)
1945 return false;
1946 else
1947 last_must_precede = next_ps_i;
1951 /* Now insert the node after INSERT_AFTER_PSI. */
1953 if (! last_must_precede)
1955 ps_i->next_in_row = ps->rows[row];
1956 ps_i->prev_in_row = NULL;
1957 if (ps_i->next_in_row)
1958 ps_i->next_in_row->prev_in_row = ps_i;
1959 ps->rows[row] = ps_i;
1961 else
1963 ps_i->next_in_row = last_must_precede->next_in_row;
1964 last_must_precede->next_in_row = ps_i;
1965 ps_i->prev_in_row = last_must_precede;
1966 if (ps_i->next_in_row)
1967 ps_i->next_in_row->prev_in_row = ps_i;
1970 return true;
1973 /* Advances the PS_INSN one column in its current row; returns false
1974 in failure and true in success. Bit N is set in MUST_FOLLOW if
1975 the node with cuid N must be come after the node pointed to by
1976 PS_I when scheduled in the same cycle. */
1977 static int
1978 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
1979 sbitmap must_follow)
1981 ps_insn_ptr prev, next;
1982 int row;
1983 ddg_node_ptr next_node;
1985 if (!ps || !ps_i)
1986 return false;
1988 row = SMODULO (ps_i->cycle, ps->ii);
1990 if (! ps_i->next_in_row)
1991 return false;
1993 next_node = ps_i->next_in_row->node;
1995 /* Check if next_in_row is dependent on ps_i, both having same sched
1996 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
1997 if (TEST_BIT (must_follow, next_node->cuid))
1998 return false;
2000 /* Advance PS_I over its next_in_row in the doubly linked list. */
2001 prev = ps_i->prev_in_row;
2002 next = ps_i->next_in_row;
2004 if (ps_i == ps->rows[row])
2005 ps->rows[row] = next;
2007 ps_i->next_in_row = next->next_in_row;
2009 if (next->next_in_row)
2010 next->next_in_row->prev_in_row = ps_i;
2012 next->next_in_row = ps_i;
2013 ps_i->prev_in_row = next;
2015 next->prev_in_row = prev;
2016 if (prev)
2017 prev->next_in_row = next;
2019 return true;
2022 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
2023 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
2024 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
2025 before/after (respectively) the node pointed to by PS_I when scheduled
2026 in the same cycle. */
2027 static ps_insn_ptr
2028 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
2029 sbitmap must_precede, sbitmap must_follow)
2031 ps_insn_ptr ps_i;
2032 int rest_count = 1;
2033 int row = SMODULO (cycle, ps->ii);
2035 if (ps->rows[row]
2036 && ps->rows[row]->row_rest_count >= issue_rate)
2037 return NULL;
2039 if (ps->rows[row])
2040 rest_count += ps->rows[row]->row_rest_count;
2042 ps_i = create_ps_insn (node, rest_count, cycle);
2044 /* Finds and inserts PS_I according to MUST_FOLLOW and
2045 MUST_PRECEDE. */
2046 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
2048 free (ps_i);
2049 return NULL;
2052 return ps_i;
2055 /* Advance time one cycle. Assumes DFA is being used. */
2056 static void
2057 advance_one_cycle (void)
2059 if (targetm.sched.dfa_pre_cycle_insn)
2060 state_transition (curr_state,
2061 (*targetm.sched.dfa_pre_cycle_insn) ());
2063 state_transition (curr_state, NULL);
2065 if (targetm.sched.dfa_post_cycle_insn)
2066 state_transition (curr_state,
2067 (*targetm.sched.dfa_post_cycle_insn) ());
2070 /* Checks if PS has resource conflicts according to DFA, starting from
2071 FROM cycle to TO cycle; returns true if there are conflicts and false
2072 if there are no conflicts. Assumes DFA is being used. */
2073 static int
2074 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
2076 int cycle;
2078 state_reset (curr_state);
2080 for (cycle = from; cycle <= to; cycle++)
2082 ps_insn_ptr crr_insn;
2083 /* Holds the remaining issue slots in the current row. */
2084 int can_issue_more = issue_rate;
2086 /* Walk through the DFA for the current row. */
2087 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
2088 crr_insn;
2089 crr_insn = crr_insn->next_in_row)
2091 rtx insn = crr_insn->node->insn;
2093 if (!INSN_P (insn))
2094 continue;
2096 /* Check if there is room for the current insn. */
2097 if (!can_issue_more || state_dead_lock_p (curr_state))
2098 return true;
2100 /* Update the DFA state and return with failure if the DFA found
2101 recource conflicts. */
2102 if (state_transition (curr_state, insn) >= 0)
2103 return true;
2105 if (targetm.sched.variable_issue)
2106 can_issue_more =
2107 (*targetm.sched.variable_issue) (sched_dump, sched_verbose,
2108 insn, can_issue_more);
2109 /* A naked CLOBBER or USE generates no instruction, so don't
2110 let them consume issue slots. */
2111 else if (GET_CODE (PATTERN (insn)) != USE
2112 && GET_CODE (PATTERN (insn)) != CLOBBER)
2113 can_issue_more--;
2116 /* Advance the DFA to the next cycle. */
2117 advance_one_cycle ();
2119 return false;
2122 /* Checks if the given node causes resource conflicts when added to PS at
2123 cycle C. If not the node is added to PS and returned; otherwise zero
2124 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
2125 cuid N must be come before/after (respectively) the node pointed to by
2126 PS_I when scheduled in the same cycle. */
2127 static ps_insn_ptr
2128 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
2129 int c, sbitmap must_precede,
2130 sbitmap must_follow)
2132 int has_conflicts = 0;
2133 ps_insn_ptr ps_i;
2135 /* First add the node to the PS, if this succeeds check for
2136 conflicts, trying different issue slots in the same row. */
2137 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
2138 return NULL; /* Failed to insert the node at the given cycle. */
2140 has_conflicts = ps_has_conflicts (ps, c, c)
2141 || (ps->history > 0
2142 && ps_has_conflicts (ps,
2143 c - ps->history,
2144 c + ps->history));
2146 /* Try different issue slots to find one that the given node can be
2147 scheduled in without conflicts. */
2148 while (has_conflicts)
2150 if (! ps_insn_advance_column (ps, ps_i, must_follow))
2151 break;
2152 has_conflicts = ps_has_conflicts (ps, c, c)
2153 || (ps->history > 0
2154 && ps_has_conflicts (ps,
2155 c - ps->history,
2156 c + ps->history));
2159 if (has_conflicts)
2161 remove_node_from_ps (ps, ps_i);
2162 return NULL;
2165 ps->min_cycle = MIN (ps->min_cycle, c);
2166 ps->max_cycle = MAX (ps->max_cycle, c);
2167 return ps_i;
2170 /* Rotate the rows of PS such that insns scheduled at time
2171 START_CYCLE will appear in row 0. Updates max/min_cycles. */
2172 static void
2173 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
2175 int i, row, backward_rotates;
2176 int last_row = ps->ii - 1;
2178 if (start_cycle == 0)
2179 return;
2181 backward_rotates = SMODULO (start_cycle, ps->ii);
2183 /* Revisit later and optimize this into a single loop. */
2184 for (i = 0; i < backward_rotates; i++)
2186 ps_insn_ptr first_row = ps->rows[0];
2188 for (row = 0; row < last_row; row++)
2189 ps->rows[row] = ps->rows[row+1];
2191 ps->rows[last_row] = first_row;
2194 ps->max_cycle -= start_cycle;
2195 ps->min_cycle -= start_cycle;
2198 #endif /* INSN_SCHEDULING*/