* configure.in (sparc64-*-linux*): Use posix threads if enabled.
[official-gcc.git] / gcc / haifa-sched.c
blobe3eedbb6793a69b88a5b9afbc28ce6ae74c8b396
1 /* Instruction scheduling pass.
2 Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
4 and currently maintained by, Jim Wilson (wilson@cygnus.com)
6 This file is part of GNU CC.
8 GNU CC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GNU CC is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to the Free
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
24 /* Instruction scheduling pass.
26 This pass implements list scheduling within basic blocks. It is
27 run twice: (1) after flow analysis, but before register allocation,
28 and (2) after register allocation.
30 The first run performs interblock scheduling, moving insns between
31 different blocks in the same "region", and the second runs only
32 basic block scheduling.
34 Interblock motions performed are useful motions and speculative
35 motions, including speculative loads. Motions requiring code
36 duplication are not supported. The identification of motion type
37 and the check for validity of speculative motions requires
38 construction and analysis of the function's control flow graph.
39 The scheduler works as follows:
41 We compute insn priorities based on data dependencies. Flow
42 analysis only creates a fraction of the data-dependencies we must
43 observe: namely, only those dependencies which the combiner can be
44 expected to use. For this pass, we must therefore create the
45 remaining dependencies we need to observe: register dependencies,
46 memory dependencies, dependencies to keep function calls in order,
47 and the dependence between a conditional branch and the setting of
48 condition codes are all dealt with here.
50 The scheduler first traverses the data flow graph, starting with
51 the last instruction, and proceeding to the first, assigning values
52 to insn_priority as it goes. This sorts the instructions
53 topologically by data dependence.
55 Once priorities have been established, we order the insns using
56 list scheduling. This works as follows: starting with a list of
57 all the ready insns, and sorted according to priority number, we
58 schedule the insn from the end of the list by placing its
59 predecessors in the list according to their priority order. We
60 consider this insn scheduled by setting the pointer to the "end" of
61 the list to point to the previous insn. When an insn has no
62 predecessors, we either queue it until sufficient time has elapsed
63 or add it to the ready list. As the instructions are scheduled or
64 when stalls are introduced, the queue advances and dumps insns into
65 the ready list. When all insns down to the lowest priority have
66 been scheduled, the critical path of the basic block has been made
67 as short as possible. The remaining insns are then scheduled in
68 remaining slots.
70 Function unit conflicts are resolved during forward list scheduling
71 by tracking the time when each insn is committed to the schedule
72 and from that, the time the function units it uses must be free.
73 As insns on the ready list are considered for scheduling, those
74 that would result in a blockage of the already committed insns are
75 queued until no blockage will result.
77 The following list shows the order in which we want to break ties
78 among insns in the ready list:
80 1. choose insn with the longest path to end of bb, ties
81 broken by
82 2. choose insn with least contribution to register pressure,
83 ties broken by
84 3. prefer in-block upon interblock motion, ties broken by
85 4. prefer useful upon speculative motion, ties broken by
86 5. choose insn with largest control flow probability, ties
87 broken by
88 6. choose insn with the least dependences upon the previously
89 scheduled insn, or finally
90 7 choose the insn which has the most insns dependent on it.
91 8. choose insn with lowest UID.
93 Memory references complicate matters. Only if we can be certain
94 that memory references are not part of the data dependency graph
95 (via true, anti, or output dependence), can we move operations past
96 memory references. To first approximation, reads can be done
97 independently, while writes introduce dependencies. Better
98 approximations will yield fewer dependencies.
100 Before reload, an extended analysis of interblock data dependences
101 is required for interblock scheduling. This is performed in
102 compute_block_backward_dependences ().
104 Dependencies set up by memory references are treated in exactly the
105 same way as other dependencies, by using LOG_LINKS backward
106 dependences. LOG_LINKS are translated into INSN_DEPEND forward
107 dependences for the purpose of forward list scheduling.
109 Having optimized the critical path, we may have also unduly
110 extended the lifetimes of some registers. If an operation requires
111 that constants be loaded into registers, it is certainly desirable
112 to load those constants as early as necessary, but no earlier.
113 I.e., it will not do to load up a bunch of registers at the
114 beginning of a basic block only to use them at the end, if they
115 could be loaded later, since this may result in excessive register
116 utilization.
118 Note that since branches are never in basic blocks, but only end
119 basic blocks, this pass will not move branches. But that is ok,
120 since we can use GNU's delayed branch scheduling pass to take care
121 of this case.
123 Also note that no further optimizations based on algebraic
124 identities are performed, so this pass would be a good one to
125 perform instruction splitting, such as breaking up a multiply
126 instruction into shifts and adds where that is profitable.
128 Given the memory aliasing analysis that this pass should perform,
129 it should be possible to remove redundant stores to memory, and to
130 load values from registers instead of hitting memory.
132 Before reload, speculative insns are moved only if a 'proof' exists
133 that no exception will be caused by this, and if no live registers
134 exist that inhibit the motion (live registers constraints are not
135 represented by data dependence edges).
137 This pass must update information that subsequent passes expect to
138 be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
139 reg_n_calls_crossed, and reg_live_length. Also, BLOCK_HEAD,
140 BLOCK_END.
142 The information in the line number notes is carefully retained by
143 this pass. Notes that refer to the starting and ending of
144 exception regions are also carefully retained by this pass. All
145 other NOTE insns are grouped in their same relative order at the
146 beginning of basic blocks and regions that have been scheduled.
148 The main entry point for this pass is schedule_insns(), called for
149 each function. The work of the scheduler is organized in three
150 levels: (1) function level: insns are subject to splitting,
151 control-flow-graph is constructed, regions are computed (after
152 reload, each region is of one block), (2) region level: control
153 flow graph attributes required for interblock scheduling are
154 computed (dominators, reachability, etc.), data dependences and
155 priorities are computed, and (3) block level: insns in the block
156 are actually scheduled. */
158 #include "config.h"
159 #include "system.h"
160 #include "toplev.h"
161 #include "rtl.h"
162 #include "tm_p.h"
163 #include "basic-block.h"
164 #include "regs.h"
165 #include "function.h"
166 #include "hard-reg-set.h"
167 #include "flags.h"
168 #include "insn-config.h"
169 #include "insn-attr.h"
170 #include "except.h"
171 #include "toplev.h"
172 #include "recog.h"
174 extern char *reg_known_equiv_p;
175 extern rtx *reg_known_value;
177 #ifdef INSN_SCHEDULING
179 /* target_units bitmask has 1 for each unit in the cpu. It should be
180 possible to compute this variable from the machine description.
181 But currently it is computed by examining the insn list. Since
182 this is only needed for visualization, it seems an acceptable
183 solution. (For understanding the mapping of bits to units, see
184 definition of function_units[] in "insn-attrtab.c".) */
186 static int target_units = 0;
188 /* issue_rate is the number of insns that can be scheduled in the same
189 machine cycle. It can be defined in the config/mach/mach.h file,
190 otherwise we set it to 1. */
192 static int issue_rate;
194 #ifndef ISSUE_RATE
195 #define ISSUE_RATE 1
196 #endif
198 /* sched-verbose controls the amount of debugging output the
199 scheduler prints. It is controlled by -fsched-verbose-N:
200 N>0 and no -DSR : the output is directed to stderr.
201 N>=10 will direct the printouts to stderr (regardless of -dSR).
202 N=1: same as -dSR.
203 N=2: bb's probabilities, detailed ready list info, unit/insn info.
204 N=3: rtl at abort point, control-flow, regions info.
205 N=5: dependences info. */
207 #define MAX_RGN_BLOCKS 10
208 #define MAX_RGN_INSNS 100
210 static int sched_verbose_param = 0;
211 static int sched_verbose = 0;
213 /* nr_inter/spec counts interblock/speculative motion for the function. */
214 static int nr_inter, nr_spec;
217 /* Debugging file. All printouts are sent to dump, which is always set,
218 either to stderr, or to the dump listing file (-dRS). */
219 static FILE *dump = 0;
221 /* fix_sched_param() is called from toplev.c upon detection
222 of the -fsched-***-N options. */
224 void
225 fix_sched_param (param, val)
226 const char *param, *val;
228 if (!strcmp (param, "verbose"))
229 sched_verbose_param = atoi (val);
230 else
231 warning ("fix_sched_param: unknown param: %s", param);
234 /* Describe state of dependencies used during sched_analyze phase. */
235 struct deps
237 /* The *_insns and *_mems are paired lists. Each pending memory operation
238 will have a pointer to the MEM rtx on one list and a pointer to the
239 containing insn on the other list in the same place in the list. */
241 /* We can't use add_dependence like the old code did, because a single insn
242 may have multiple memory accesses, and hence needs to be on the list
243 once for each memory access. Add_dependence won't let you add an insn
244 to a list more than once. */
246 /* An INSN_LIST containing all insns with pending read operations. */
247 rtx pending_read_insns;
249 /* An EXPR_LIST containing all MEM rtx's which are pending reads. */
250 rtx pending_read_mems;
252 /* An INSN_LIST containing all insns with pending write operations. */
253 rtx pending_write_insns;
255 /* An EXPR_LIST containing all MEM rtx's which are pending writes. */
256 rtx pending_write_mems;
258 /* Indicates the combined length of the two pending lists. We must prevent
259 these lists from ever growing too large since the number of dependencies
260 produced is at least O(N*N), and execution time is at least O(4*N*N), as
261 a function of the length of these pending lists. */
262 int pending_lists_length;
264 /* The last insn upon which all memory references must depend.
265 This is an insn which flushed the pending lists, creating a dependency
266 between it and all previously pending memory references. This creates
267 a barrier (or a checkpoint) which no memory reference is allowed to cross.
269 This includes all non constant CALL_INSNs. When we do interprocedural
270 alias analysis, this restriction can be relaxed.
271 This may also be an INSN that writes memory if the pending lists grow
272 too large. */
273 rtx last_pending_memory_flush;
275 /* The last function call we have seen. All hard regs, and, of course,
276 the last function call, must depend on this. */
277 rtx last_function_call;
279 /* The LOG_LINKS field of this is a list of insns which use a pseudo register
280 that does not already cross a call. We create dependencies between each
281 of those insn and the next call insn, to ensure that they won't cross a call
282 after scheduling is done. */
283 rtx sched_before_next_call;
285 /* Element N is the next insn that sets (hard or pseudo) register
286 N within the current basic block; or zero, if there is no
287 such insn. Needed for new registers which may be introduced
288 by splitting insns. */
289 rtx *reg_last_uses;
290 rtx *reg_last_sets;
291 rtx *reg_last_clobbers;
294 static regset reg_pending_sets;
295 static regset reg_pending_clobbers;
296 static int reg_pending_sets_all;
298 /* To speed up the test for duplicate dependency links we keep a record
299 of true dependencies created by add_dependence when the average number
300 of instructions in a basic block is very large.
302 Studies have shown that there is typically around 5 instructions between
303 branches for typical C code. So we can make a guess that the average
304 basic block is approximately 5 instructions long; we will choose 100X
305 the average size as a very large basic block.
307 Each insn has an associated bitmap for its dependencies. Each bitmap
308 has enough entries to represent a dependency on any other insn in the
309 insn chain. */
310 static sbitmap *true_dependency_cache;
312 /* Indexed by INSN_UID, the collection of all data associated with
313 a single instruction. */
315 struct haifa_insn_data
317 /* A list of insns which depend on the instruction. Unlike LOG_LINKS,
318 it represents forward dependancies. */
319 rtx depend;
321 /* The line number note in effect for each insn. For line number
322 notes, this indicates whether the note may be reused. */
323 rtx line_note;
325 /* Logical uid gives the original ordering of the insns. */
326 int luid;
328 /* A priority for each insn. */
329 int priority;
331 /* The number of incoming edges in the forward dependency graph.
332 As scheduling proceds, counts are decreased. An insn moves to
333 the ready queue when its counter reaches zero. */
334 int dep_count;
336 /* An encoding of the blockage range function. Both unit and range
337 are coded. */
338 unsigned int blockage;
340 /* Number of instructions referring to this insn. */
341 int ref_count;
343 /* The minimum clock tick at which the insn becomes ready. This is
344 used to note timing constraints for the insns in the pending list. */
345 int tick;
347 short cost;
349 /* An encoding of the function units used. */
350 short units;
352 /* This weight is an estimation of the insn's contribution to
353 register pressure. */
354 short reg_weight;
356 /* Some insns (e.g. call) are not allowed to move across blocks. */
357 unsigned int cant_move : 1;
359 /* Set if there's DEF-USE dependance between some speculatively
360 moved load insn and this one. */
361 unsigned int fed_by_spec_load : 1;
362 unsigned int is_load_insn : 1;
365 static struct haifa_insn_data *h_i_d;
367 #define INSN_DEPEND(INSN) (h_i_d[INSN_UID (INSN)].depend)
368 #define INSN_LUID(INSN) (h_i_d[INSN_UID (INSN)].luid)
369 #define INSN_PRIORITY(INSN) (h_i_d[INSN_UID (INSN)].priority)
370 #define INSN_DEP_COUNT(INSN) (h_i_d[INSN_UID (INSN)].dep_count)
371 #define INSN_COST(INSN) (h_i_d[INSN_UID (INSN)].cost)
372 #define INSN_UNIT(INSN) (h_i_d[INSN_UID (INSN)].units)
373 #define INSN_REG_WEIGHT(INSN) (h_i_d[INSN_UID (INSN)].reg_weight)
375 #define INSN_BLOCKAGE(INSN) (h_i_d[INSN_UID (INSN)].blockage)
376 #define UNIT_BITS 5
377 #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
378 #define ENCODE_BLOCKAGE(U, R) \
379 (((U) << BLOCKAGE_BITS \
380 | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \
381 | MAX_BLOCKAGE_COST (R))
382 #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
383 #define BLOCKAGE_RANGE(B) \
384 (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
385 | ((B) & BLOCKAGE_MASK))
387 /* Encodings of the `<name>_unit_blockage_range' function. */
388 #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
389 #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
391 #define DONE_PRIORITY -1
392 #define MAX_PRIORITY 0x7fffffff
393 #define TAIL_PRIORITY 0x7ffffffe
394 #define LAUNCH_PRIORITY 0x7f000001
395 #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
396 #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
398 #define INSN_REF_COUNT(INSN) (h_i_d[INSN_UID (INSN)].ref_count)
399 #define LINE_NOTE(INSN) (h_i_d[INSN_UID (INSN)].line_note)
400 #define INSN_TICK(INSN) (h_i_d[INSN_UID (INSN)].tick)
401 #define CANT_MOVE(insn) (h_i_d[INSN_UID (insn)].cant_move)
402 #define FED_BY_SPEC_LOAD(insn) (h_i_d[INSN_UID (insn)].fed_by_spec_load)
403 #define IS_LOAD_INSN(insn) (h_i_d[INSN_UID (insn)].is_load_insn)
405 /* Vector indexed by basic block number giving the starting line-number
406 for each basic block. */
407 static rtx *line_note_head;
409 /* List of important notes we must keep around. This is a pointer to the
410 last element in the list. */
411 static rtx note_list;
413 /* Queues, etc. */
415 /* An instruction is ready to be scheduled when all insns preceding it
416 have already been scheduled. It is important to ensure that all
417 insns which use its result will not be executed until its result
418 has been computed. An insn is maintained in one of four structures:
420 (P) the "Pending" set of insns which cannot be scheduled until
421 their dependencies have been satisfied.
422 (Q) the "Queued" set of insns that can be scheduled when sufficient
423 time has passed.
424 (R) the "Ready" list of unscheduled, uncommitted insns.
425 (S) the "Scheduled" list of insns.
427 Initially, all insns are either "Pending" or "Ready" depending on
428 whether their dependencies are satisfied.
430 Insns move from the "Ready" list to the "Scheduled" list as they
431 are committed to the schedule. As this occurs, the insns in the
432 "Pending" list have their dependencies satisfied and move to either
433 the "Ready" list or the "Queued" set depending on whether
434 sufficient time has passed to make them ready. As time passes,
435 insns move from the "Queued" set to the "Ready" list. Insns may
436 move from the "Ready" list to the "Queued" set if they are blocked
437 due to a function unit conflict.
439 The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
440 insns, i.e., those that are ready, queued, and pending.
441 The "Queued" set (Q) is implemented by the variable `insn_queue'.
442 The "Ready" list (R) is implemented by the variables `ready' and
443 `n_ready'.
444 The "Scheduled" list (S) is the new insn chain built by this pass.
446 The transition (R->S) is implemented in the scheduling loop in
447 `schedule_block' when the best insn to schedule is chosen.
448 The transition (R->Q) is implemented in `queue_insn' when an
449 insn is found to have a function unit conflict with the already
450 committed insns.
451 The transitions (P->R and P->Q) are implemented in `schedule_insn' as
452 insns move from the ready list to the scheduled list.
453 The transition (Q->R) is implemented in 'queue_to_insn' as time
454 passes or stalls are introduced. */
456 /* Implement a circular buffer to delay instructions until sufficient
457 time has passed. INSN_QUEUE_SIZE is a power of two larger than
458 MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the
459 longest time an isnsn may be queued. */
460 static rtx insn_queue[INSN_QUEUE_SIZE];
461 static int q_ptr = 0;
462 static int q_size = 0;
463 #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
464 #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
466 /* Forward declarations. */
467 static void add_dependence PROTO ((rtx, rtx, enum reg_note));
468 #ifdef HAVE_cc0
469 static void remove_dependence PROTO ((rtx, rtx));
470 #endif
471 static rtx find_insn_list PROTO ((rtx, rtx));
472 static int insn_unit PROTO ((rtx));
473 static unsigned int blockage_range PROTO ((int, rtx));
474 static void clear_units PROTO ((void));
475 static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int));
476 static void schedule_unit PROTO ((int, rtx, int));
477 static int actual_hazard PROTO ((int, rtx, int, int));
478 static int potential_hazard PROTO ((int, rtx, int));
479 static int insn_cost PROTO ((rtx, rtx, rtx));
480 static int priority PROTO ((rtx));
481 static void free_pending_lists PROTO ((void));
482 static void add_insn_mem_dependence PROTO ((struct deps *, rtx *, rtx *, rtx,
483 rtx));
484 static void flush_pending_lists PROTO ((struct deps *, rtx, int));
485 static void sched_analyze_1 PROTO ((struct deps *, rtx, rtx));
486 static void sched_analyze_2 PROTO ((struct deps *, rtx, rtx));
487 static void sched_analyze_insn PROTO ((struct deps *, rtx, rtx, rtx));
488 static void sched_analyze PROTO ((struct deps *, rtx, rtx));
489 static int rank_for_schedule PROTO ((const PTR, const PTR));
490 static void swap_sort PROTO ((rtx *, int));
491 static void queue_insn PROTO ((rtx, int));
492 static int schedule_insn PROTO ((rtx, rtx *, int, int));
493 static void find_insn_reg_weight PROTO ((int));
494 static int schedule_block PROTO ((int, int));
495 static char *safe_concat PROTO ((char *, char *, const char *));
496 static int insn_issue_delay PROTO ((rtx));
497 static void adjust_priority PROTO ((rtx));
499 /* Control flow graph edges are kept in circular lists. */
500 typedef struct
502 int from_block;
503 int to_block;
504 int next_in;
505 int next_out;
507 haifa_edge;
508 static haifa_edge *edge_table;
510 #define NEXT_IN(edge) (edge_table[edge].next_in)
511 #define NEXT_OUT(edge) (edge_table[edge].next_out)
512 #define FROM_BLOCK(edge) (edge_table[edge].from_block)
513 #define TO_BLOCK(edge) (edge_table[edge].to_block)
515 /* Number of edges in the control flow graph. (In fact, larger than
516 that by 1, since edge 0 is unused.) */
517 static int nr_edges;
519 /* Circular list of incoming/outgoing edges of a block. */
520 static int *in_edges;
521 static int *out_edges;
523 #define IN_EDGES(block) (in_edges[block])
524 #define OUT_EDGES(block) (out_edges[block])
528 static int is_cfg_nonregular PROTO ((void));
529 static int build_control_flow PROTO ((struct edge_list *));
530 static void new_edge PROTO ((int, int));
533 /* A region is the main entity for interblock scheduling: insns
534 are allowed to move between blocks in the same region, along
535 control flow graph edges, in the 'up' direction. */
536 typedef struct
538 int rgn_nr_blocks; /* Number of blocks in region. */
539 int rgn_blocks; /* cblocks in the region (actually index in rgn_bb_table). */
541 region;
543 /* Number of regions in the procedure. */
544 static int nr_regions;
546 /* Table of region descriptions. */
547 static region *rgn_table;
549 /* Array of lists of regions' blocks. */
550 static int *rgn_bb_table;
552 /* Topological order of blocks in the region (if b2 is reachable from
553 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
554 always referred to by either block or b, while its topological
555 order name (in the region) is refered to by bb. */
556 static int *block_to_bb;
558 /* The number of the region containing a block. */
559 static int *containing_rgn;
561 #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
562 #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
563 #define BLOCK_TO_BB(block) (block_to_bb[block])
564 #define CONTAINING_RGN(block) (containing_rgn[block])
566 void debug_regions PROTO ((void));
567 static void find_single_block_region PROTO ((void));
568 static void find_rgns PROTO ((struct edge_list *, sbitmap *));
569 static int too_large PROTO ((int, int *, int *));
571 extern void debug_live PROTO ((int, int));
573 /* Blocks of the current region being scheduled. */
574 static int current_nr_blocks;
575 static int current_blocks;
577 /* The mapping from bb to block. */
578 #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
581 /* Bit vectors and bitset operations are needed for computations on
582 the control flow graph. */
584 typedef unsigned HOST_WIDE_INT *bitset;
585 typedef struct
587 int *first_member; /* Pointer to the list start in bitlst_table. */
588 int nr_members; /* The number of members of the bit list. */
590 bitlst;
592 static int bitlst_table_last;
593 static int bitlst_table_size;
594 static int *bitlst_table;
596 static char bitset_member PROTO ((bitset, int, int));
597 static void extract_bitlst PROTO ((bitset, int, bitlst *));
599 /* Target info declarations.
601 The block currently being scheduled is referred to as the "target" block,
602 while other blocks in the region from which insns can be moved to the
603 target are called "source" blocks. The candidate structure holds info
604 about such sources: are they valid? Speculative? Etc. */
605 typedef bitlst bblst;
606 typedef struct
608 char is_valid;
609 char is_speculative;
610 int src_prob;
611 bblst split_bbs;
612 bblst update_bbs;
614 candidate;
616 static candidate *candidate_table;
618 /* A speculative motion requires checking live information on the path
619 from 'source' to 'target'. The split blocks are those to be checked.
620 After a speculative motion, live information should be modified in
621 the 'update' blocks.
623 Lists of split and update blocks for each candidate of the current
624 target are in array bblst_table. */
625 static int *bblst_table, bblst_size, bblst_last;
627 #define IS_VALID(src) ( candidate_table[src].is_valid )
628 #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
629 #define SRC_PROB(src) ( candidate_table[src].src_prob )
631 /* The bb being currently scheduled. */
632 static int target_bb;
634 /* List of edges. */
635 typedef bitlst edgelst;
637 /* Target info functions. */
638 static void split_edges PROTO ((int, int, edgelst *));
639 static void compute_trg_info PROTO ((int));
640 void debug_candidate PROTO ((int));
641 void debug_candidates PROTO ((int));
644 /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */
645 typedef bitset bbset;
647 /* Number of words of the bbset. */
648 static int bbset_size;
650 /* Dominators array: dom[i] contains the bbset of dominators of
651 bb i in the region. */
652 static bbset *dom;
654 /* bb 0 is the only region entry. */
655 #define IS_RGN_ENTRY(bb) (!bb)
657 /* Is bb_src dominated by bb_trg. */
658 #define IS_DOMINATED(bb_src, bb_trg) \
659 ( bitset_member (dom[bb_src], bb_trg, bbset_size) )
661 /* Probability: Prob[i] is a float in [0, 1] which is the probability
662 of bb i relative to the region entry. */
663 static float *prob;
665 /* The probability of bb_src, relative to bb_trg. Note, that while the
666 'prob[bb]' is a float in [0, 1], this macro returns an integer
667 in [0, 100]. */
668 #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
669 prob[bb_trg])))
671 /* Bit-set of edges, where bit i stands for edge i. */
672 typedef bitset edgeset;
674 /* Number of edges in the region. */
675 static int rgn_nr_edges;
677 /* Array of size rgn_nr_edges. */
678 static int *rgn_edges;
680 /* Number of words in an edgeset. */
681 static int edgeset_size;
683 /* Mapping from each edge in the graph to its number in the rgn. */
684 static int *edge_to_bit;
685 #define EDGE_TO_BIT(edge) (edge_to_bit[edge])
687 /* The split edges of a source bb is different for each target
688 bb. In order to compute this efficiently, the 'potential-split edges'
689 are computed for each bb prior to scheduling a region. This is actually
690 the split edges of each bb relative to the region entry.
692 pot_split[bb] is the set of potential split edges of bb. */
693 static edgeset *pot_split;
695 /* For every bb, a set of its ancestor edges. */
696 static edgeset *ancestor_edges;
698 static void compute_dom_prob_ps PROTO ((int));
700 #define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
701 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
702 #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
703 #define INSN_BB(INSN) (BLOCK_TO_BB (BLOCK_NUM (INSN)))
705 /* Parameters affecting the decision of rank_for_schedule(). */
706 #define MIN_DIFF_PRIORITY 2
707 #define MIN_PROBABILITY 40
708 #define MIN_PROB_DIFF 10
710 /* Speculative scheduling functions. */
711 static int check_live_1 PROTO ((int, rtx));
712 static void update_live_1 PROTO ((int, rtx));
713 static int check_live PROTO ((rtx, int));
714 static void update_live PROTO ((rtx, int));
715 static void set_spec_fed PROTO ((rtx));
716 static int is_pfree PROTO ((rtx, int, int));
717 static int find_conditional_protection PROTO ((rtx, int));
718 static int is_conditionally_protected PROTO ((rtx, int, int));
719 static int may_trap_exp PROTO ((rtx, int));
720 static int haifa_classify_insn PROTO ((rtx));
721 static int is_prisky PROTO ((rtx, int, int));
722 static int is_exception_free PROTO ((rtx, int, int));
724 static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
725 static void compute_block_forward_dependences PROTO ((int));
726 static void add_branch_dependences PROTO ((rtx, rtx));
727 static void compute_block_backward_dependences PROTO ((int));
728 void debug_dependencies PROTO ((void));
730 /* Notes handling mechanism:
731 =========================
732 Generally, NOTES are saved before scheduling and restored after scheduling.
733 The scheduler distinguishes between three types of notes:
735 (1) LINE_NUMBER notes, generated and used for debugging. Here,
736 before scheduling a region, a pointer to the LINE_NUMBER note is
737 added to the insn following it (in save_line_notes()), and the note
738 is removed (in rm_line_notes() and unlink_line_notes()). After
739 scheduling the region, this pointer is used for regeneration of
740 the LINE_NUMBER note (in restore_line_notes()).
742 (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
743 Before scheduling a region, a pointer to the note is added to the insn
744 that follows or precedes it. (This happens as part of the data dependence
745 computation). After scheduling an insn, the pointer contained in it is
746 used for regenerating the corresponding note (in reemit_notes).
748 (3) All other notes (e.g. INSN_DELETED): Before scheduling a block,
749 these notes are put in a list (in rm_other_notes() and
750 unlink_other_notes ()). After scheduling the block, these notes are
751 inserted at the beginning of the block (in schedule_block()). */
753 static rtx unlink_other_notes PROTO ((rtx, rtx));
754 static rtx unlink_line_notes PROTO ((rtx, rtx));
755 static void rm_line_notes PROTO ((int));
756 static void save_line_notes PROTO ((int));
757 static void restore_line_notes PROTO ((int));
758 static void rm_redundant_line_notes PROTO ((void));
759 static void rm_other_notes PROTO ((rtx, rtx));
760 static rtx reemit_notes PROTO ((rtx, rtx));
762 static void get_block_head_tail PROTO ((int, rtx *, rtx *));
763 static void get_bb_head_tail PROTO ((int, rtx *, rtx *));
765 static int queue_to_ready PROTO ((rtx [], int));
767 static void debug_ready_list PROTO ((rtx[], int));
768 static void init_target_units PROTO ((void));
769 static void insn_print_units PROTO ((rtx));
770 static int get_visual_tbl_length PROTO ((void));
771 static void init_block_visualization PROTO ((void));
772 static void print_block_visualization PROTO ((int, const char *));
773 static void visualize_scheduled_insns PROTO ((int, int));
774 static void visualize_no_unit PROTO ((rtx));
775 static void visualize_stall_cycles PROTO ((int, int));
776 static void print_exp PROTO ((char *, rtx, int));
777 static void print_value PROTO ((char *, rtx, int));
778 static void print_pattern PROTO ((char *, rtx, int));
779 static void print_insn PROTO ((char *, rtx, int));
780 void debug_reg_vector PROTO ((regset));
782 static rtx move_insn1 PROTO ((rtx, rtx));
783 static rtx move_insn PROTO ((rtx, rtx));
784 static rtx group_leader PROTO ((rtx));
785 static int set_priorities PROTO ((int));
786 static void init_deps PROTO ((struct deps *));
787 static void schedule_region PROTO ((int));
789 #endif /* INSN_SCHEDULING */
791 #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
793 /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
794 LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type
795 of dependence that this link represents. */
797 static void
798 add_dependence (insn, elem, dep_type)
799 rtx insn;
800 rtx elem;
801 enum reg_note dep_type;
803 rtx link, next;
805 /* Don't depend an insn on itself. */
806 if (insn == elem)
807 return;
809 /* We can get a dependency on deleted insns due to optimizations in
810 the register allocation and reloading or due to splitting. Any
811 such dependency is useless and can be ignored. */
812 if (GET_CODE (elem) == NOTE)
813 return;
815 /* If elem is part of a sequence that must be scheduled together, then
816 make the dependence point to the last insn of the sequence.
817 When HAVE_cc0, it is possible for NOTEs to exist between users and
818 setters of the condition codes, so we must skip past notes here.
819 Otherwise, NOTEs are impossible here. */
821 next = NEXT_INSN (elem);
823 #ifdef HAVE_cc0
824 while (next && GET_CODE (next) == NOTE)
825 next = NEXT_INSN (next);
826 #endif
828 if (next && SCHED_GROUP_P (next)
829 && GET_CODE (next) != CODE_LABEL)
831 /* Notes will never intervene here though, so don't bother checking
832 for them. */
833 /* We must reject CODE_LABELs, so that we don't get confused by one
834 that has LABEL_PRESERVE_P set, which is represented by the same
835 bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be
836 SCHED_GROUP_P. */
837 while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next))
838 && GET_CODE (NEXT_INSN (next)) != CODE_LABEL)
839 next = NEXT_INSN (next);
841 /* Again, don't depend an insn on itself. */
842 if (insn == next)
843 return;
845 /* Make the dependence to NEXT, the last insn of the group, instead
846 of the original ELEM. */
847 elem = next;
850 #ifdef INSN_SCHEDULING
851 /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
852 No need for interblock dependences with calls, since
853 calls are not moved between blocks. Note: the edge where
854 elem is a CALL is still required. */
855 if (GET_CODE (insn) == CALL_INSN
856 && (INSN_BB (elem) != INSN_BB (insn)))
857 return;
860 /* If we already have a true dependency for ELEM, then we do not
861 need to do anything. Avoiding the list walk below can cut
862 compile times dramatically for some code. */
863 if (true_dependency_cache
864 && TEST_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem)))
865 return;
866 #endif
868 /* Check that we don't already have this dependence. */
869 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
870 if (XEXP (link, 0) == elem)
872 /* If this is a more restrictive type of dependence than the existing
873 one, then change the existing dependence to this type. */
874 if ((int) dep_type < (int) REG_NOTE_KIND (link))
875 PUT_REG_NOTE_KIND (link, dep_type);
877 #ifdef INSN_SCHEDULING
878 /* If we are adding a true dependency to INSN's LOG_LINKs, then
879 note that in the bitmap cache of true dependency information. */
880 if ((int)dep_type == 0 && true_dependency_cache)
881 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
882 #endif
883 return;
885 /* Might want to check one level of transitivity to save conses. */
887 link = alloc_INSN_LIST (elem, LOG_LINKS (insn));
888 LOG_LINKS (insn) = link;
890 /* Insn dependency, not data dependency. */
891 PUT_REG_NOTE_KIND (link, dep_type);
893 #ifdef INSN_SCHEDULING
894 /* If we are adding a true dependency to INSN's LOG_LINKs, then
895 note that in the bitmap cache of true dependency information. */
896 if ((int)dep_type == 0 && true_dependency_cache)
897 SET_BIT (true_dependency_cache[INSN_LUID (insn)], INSN_LUID (elem));
898 #endif
901 #ifdef HAVE_cc0
902 /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
903 of INSN. Abort if not found. */
905 static void
906 remove_dependence (insn, elem)
907 rtx insn;
908 rtx elem;
910 rtx prev, link, next;
911 int found = 0;
913 for (prev = 0, link = LOG_LINKS (insn); link; link = next)
915 next = XEXP (link, 1);
916 if (XEXP (link, 0) == elem)
918 if (prev)
919 XEXP (prev, 1) = next;
920 else
921 LOG_LINKS (insn) = next;
923 #ifdef INSN_SCHEDULING
924 /* If we are removing a true dependency from the LOG_LINKS list,
925 make sure to remove it from the cache too. */
926 if (REG_NOTE_KIND (link) == 0 && true_dependency_cache)
927 RESET_BIT (true_dependency_cache[INSN_LUID (insn)],
928 INSN_LUID (elem));
929 #endif
931 free_INSN_LIST_node (link);
933 found = 1;
935 else
936 prev = link;
939 if (!found)
940 abort ();
941 return;
943 #endif /* HAVE_cc0 */
945 #ifndef INSN_SCHEDULING
946 void
947 schedule_insns (dump_file)
948 FILE *dump_file;
951 #else
952 #ifndef __GNUC__
953 #define __inline
954 #endif
956 #ifndef HAIFA_INLINE
957 #define HAIFA_INLINE __inline
958 #endif
960 /* Computation of memory dependencies. */
962 /* Data structures for the computation of data dependences in a regions. We
963 keep one mem_deps structure for every basic block. Before analyzing the
964 data dependences for a bb, its variables are initialized as a function of
965 the variables of its predecessors. When the analysis for a bb completes,
966 we save the contents to the corresponding bb_mem_deps[bb] variable. */
968 static struct deps *bb_deps;
970 /* Pointer to the last instruction scheduled. Used by rank_for_schedule,
971 so that insns independent of the last scheduled insn will be preferred
972 over dependent instructions. */
974 static rtx last_scheduled_insn;
976 /* Functions for construction of the control flow graph. */
978 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
980 We decide not to build the control flow graph if there is possibly more
981 than one entry to the function, if computed branches exist, of if we
982 have nonlocal gotos. */
984 static int
985 is_cfg_nonregular ()
987 int b;
988 rtx insn;
989 RTX_CODE code;
991 /* If we have a label that could be the target of a nonlocal goto, then
992 the cfg is not well structured. */
993 if (nonlocal_goto_handler_labels)
994 return 1;
996 /* If we have any forced labels, then the cfg is not well structured. */
997 if (forced_labels)
998 return 1;
1000 /* If this function has a computed jump, then we consider the cfg
1001 not well structured. */
1002 if (current_function_has_computed_jump)
1003 return 1;
1005 /* If we have exception handlers, then we consider the cfg not well
1006 structured. ?!? We should be able to handle this now that flow.c
1007 computes an accurate cfg for EH. */
1008 if (exception_handler_labels)
1009 return 1;
1011 /* If we have non-jumping insns which refer to labels, then we consider
1012 the cfg not well structured. */
1013 /* Check for labels referred to other thn by jumps. */
1014 for (b = 0; b < n_basic_blocks; b++)
1015 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
1017 code = GET_CODE (insn);
1018 if (GET_RTX_CLASS (code) == 'i')
1020 rtx note;
1022 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
1023 if (REG_NOTE_KIND (note) == REG_LABEL)
1024 return 1;
1027 if (insn == BLOCK_END (b))
1028 break;
1031 /* All the tests passed. Consider the cfg well structured. */
1032 return 0;
1035 /* Build the control flow graph and set nr_edges.
1037 Instead of trying to build a cfg ourselves, we rely on flow to
1038 do it for us. Stamp out useless code (and bug) duplication.
1040 Return nonzero if an irregularity in the cfg is found which would
1041 prevent cross block scheduling. */
1043 static int
1044 build_control_flow (edge_list)
1045 struct edge_list *edge_list;
1047 int i, unreachable, num_edges;
1049 /* This already accounts for entry/exit edges. */
1050 num_edges = NUM_EDGES (edge_list);
1052 /* Unreachable loops with more than one basic block are detected
1053 during the DFS traversal in find_rgns.
1055 Unreachable loops with a single block are detected here. This
1056 test is redundant with the one in find_rgns, but it's much
1057 cheaper to go ahead and catch the trivial case here. */
1058 unreachable = 0;
1059 for (i = 0; i < n_basic_blocks; i++)
1061 basic_block b = BASIC_BLOCK (i);
1063 if (b->pred == NULL
1064 || (b->pred->dest == b
1065 && b->pred->pred_next == NULL))
1066 unreachable = 1;
1069 /* ??? We can kill these soon. */
1070 in_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1071 out_edges = (int *) xcalloc (n_basic_blocks, sizeof (int));
1072 edge_table = (haifa_edge *) xcalloc (num_edges, sizeof (haifa_edge));
1074 nr_edges = 0;
1075 for (i = 0; i < num_edges; i++)
1077 edge e = INDEX_EDGE (edge_list, i);
1079 if (e->dest != EXIT_BLOCK_PTR
1080 && e->src != ENTRY_BLOCK_PTR)
1081 new_edge (e->src->index, e->dest->index);
1084 /* Increment by 1, since edge 0 is unused. */
1085 nr_edges++;
1087 return unreachable;
1091 /* Record an edge in the control flow graph from SOURCE to TARGET.
1093 In theory, this is redundant with the s_succs computed above, but
1094 we have not converted all of haifa to use information from the
1095 integer lists. */
1097 static void
1098 new_edge (source, target)
1099 int source, target;
1101 int e, next_edge;
1102 int curr_edge, fst_edge;
1104 /* Check for duplicates. */
1105 fst_edge = curr_edge = OUT_EDGES (source);
1106 while (curr_edge)
1108 if (FROM_BLOCK (curr_edge) == source
1109 && TO_BLOCK (curr_edge) == target)
1111 return;
1114 curr_edge = NEXT_OUT (curr_edge);
1116 if (fst_edge == curr_edge)
1117 break;
1120 e = ++nr_edges;
1122 FROM_BLOCK (e) = source;
1123 TO_BLOCK (e) = target;
1125 if (OUT_EDGES (source))
1127 next_edge = NEXT_OUT (OUT_EDGES (source));
1128 NEXT_OUT (OUT_EDGES (source)) = e;
1129 NEXT_OUT (e) = next_edge;
1131 else
1133 OUT_EDGES (source) = e;
1134 NEXT_OUT (e) = e;
1137 if (IN_EDGES (target))
1139 next_edge = NEXT_IN (IN_EDGES (target));
1140 NEXT_IN (IN_EDGES (target)) = e;
1141 NEXT_IN (e) = next_edge;
1143 else
1145 IN_EDGES (target) = e;
1146 NEXT_IN (e) = e;
1151 /* BITSET macros for operations on the control flow graph. */
1153 /* Compute bitwise union of two bitsets. */
1154 #define BITSET_UNION(set1, set2, len) \
1155 do { register bitset tp = set1, sp = set2; \
1156 register int i; \
1157 for (i = 0; i < len; i++) \
1158 *(tp++) |= *(sp++); } while (0)
1160 /* Compute bitwise intersection of two bitsets. */
1161 #define BITSET_INTER(set1, set2, len) \
1162 do { register bitset tp = set1, sp = set2; \
1163 register int i; \
1164 for (i = 0; i < len; i++) \
1165 *(tp++) &= *(sp++); } while (0)
1167 /* Compute bitwise difference of two bitsets. */
1168 #define BITSET_DIFFER(set1, set2, len) \
1169 do { register bitset tp = set1, sp = set2; \
1170 register int i; \
1171 for (i = 0; i < len; i++) \
1172 *(tp++) &= ~*(sp++); } while (0)
1174 /* Inverts every bit of bitset 'set'. */
1175 #define BITSET_INVERT(set, len) \
1176 do { register bitset tmpset = set; \
1177 register int i; \
1178 for (i = 0; i < len; i++, tmpset++) \
1179 *tmpset = ~*tmpset; } while (0)
1181 /* Turn on the index'th bit in bitset set. */
1182 #define BITSET_ADD(set, index, len) \
1184 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1185 abort (); \
1186 else \
1187 set[index/HOST_BITS_PER_WIDE_INT] |= \
1188 1 << (index % HOST_BITS_PER_WIDE_INT); \
1191 /* Turn off the index'th bit in set. */
1192 #define BITSET_REMOVE(set, index, len) \
1194 if (index >= HOST_BITS_PER_WIDE_INT * len) \
1195 abort (); \
1196 else \
1197 set[index/HOST_BITS_PER_WIDE_INT] &= \
1198 ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \
1202 /* Check if the index'th bit in bitset set is on. */
1204 static char
1205 bitset_member (set, index, len)
1206 bitset set;
1207 int index, len;
1209 if (index >= HOST_BITS_PER_WIDE_INT * len)
1210 abort ();
1211 return (set[index / HOST_BITS_PER_WIDE_INT] &
1212 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
1216 /* Translate a bit-set SET to a list BL of the bit-set members. */
1218 static void
1219 extract_bitlst (set, len, bl)
1220 bitset set;
1221 int len;
1222 bitlst *bl;
1224 int i, j, offset;
1225 unsigned HOST_WIDE_INT word;
1227 /* bblst table space is reused in each call to extract_bitlst. */
1228 bitlst_table_last = 0;
1230 bl->first_member = &bitlst_table[bitlst_table_last];
1231 bl->nr_members = 0;
1233 for (i = 0; i < len; i++)
1235 word = set[i];
1236 offset = i * HOST_BITS_PER_WIDE_INT;
1237 for (j = 0; word; j++)
1239 if (word & 1)
1241 bitlst_table[bitlst_table_last++] = offset;
1242 (bl->nr_members)++;
1244 word >>= 1;
1245 ++offset;
1252 /* Functions for the construction of regions. */
1254 /* Print the regions, for debugging purposes. Callable from debugger. */
1256 void
1257 debug_regions ()
1259 int rgn, bb;
1261 fprintf (dump, "\n;; ------------ REGIONS ----------\n\n");
1262 for (rgn = 0; rgn < nr_regions; rgn++)
1264 fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
1265 rgn_table[rgn].rgn_nr_blocks);
1266 fprintf (dump, ";;\tbb/block: ");
1268 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
1270 current_blocks = RGN_BLOCKS (rgn);
1272 if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
1273 abort ();
1275 fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
1278 fprintf (dump, "\n\n");
1283 /* Build a single block region for each basic block in the function.
1284 This allows for using the same code for interblock and basic block
1285 scheduling. */
1287 static void
1288 find_single_block_region ()
1290 int i;
1292 for (i = 0; i < n_basic_blocks; i++)
1294 rgn_bb_table[i] = i;
1295 RGN_NR_BLOCKS (i) = 1;
1296 RGN_BLOCKS (i) = i;
1297 CONTAINING_RGN (i) = i;
1298 BLOCK_TO_BB (i) = 0;
1300 nr_regions = n_basic_blocks;
1304 /* Update number of blocks and the estimate for number of insns
1305 in the region. Return 1 if the region is "too large" for interblock
1306 scheduling (compile time considerations), otherwise return 0. */
1308 static int
1309 too_large (block, num_bbs, num_insns)
1310 int block, *num_bbs, *num_insns;
1312 (*num_bbs)++;
1313 (*num_insns) += (INSN_LUID (BLOCK_END (block)) -
1314 INSN_LUID (BLOCK_HEAD (block)));
1315 if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS))
1316 return 1;
1317 else
1318 return 0;
1322 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
1323 is still an inner loop. Put in max_hdr[blk] the header of the most inner
1324 loop containing blk. */
1325 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
1327 if (max_hdr[blk] == -1) \
1328 max_hdr[blk] = hdr; \
1329 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
1330 RESET_BIT (inner, hdr); \
1331 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
1333 RESET_BIT (inner,max_hdr[blk]); \
1334 max_hdr[blk] = hdr; \
1339 /* Find regions for interblock scheduling.
1341 A region for scheduling can be:
1343 * A loop-free procedure, or
1345 * A reducible inner loop, or
1347 * A basic block not contained in any other region.
1350 ?!? In theory we could build other regions based on extended basic
1351 blocks or reverse extended basic blocks. Is it worth the trouble?
1353 Loop blocks that form a region are put into the region's block list
1354 in topological order.
1356 This procedure stores its results into the following global (ick) variables
1358 * rgn_nr
1359 * rgn_table
1360 * rgn_bb_table
1361 * block_to_bb
1362 * containing region
1365 We use dominator relationships to avoid making regions out of non-reducible
1366 loops.
1368 This procedure needs to be converted to work on pred/succ lists instead
1369 of edge tables. That would simplify it somewhat. */
1371 static void
1372 find_rgns (edge_list, dom)
1373 struct edge_list *edge_list;
1374 sbitmap *dom;
1376 int *max_hdr, *dfs_nr, *stack, *degree;
1377 char no_loops = 1;
1378 int node, child, loop_head, i, head, tail;
1379 int count = 0, sp, idx = 0, current_edge = out_edges[0];
1380 int num_bbs, num_insns, unreachable;
1381 int too_large_failure;
1383 /* Note if an edge has been passed. */
1384 sbitmap passed;
1386 /* Note if a block is a natural loop header. */
1387 sbitmap header;
1389 /* Note if a block is an natural inner loop header. */
1390 sbitmap inner;
1392 /* Note if a block is in the block queue. */
1393 sbitmap in_queue;
1395 /* Note if a block is in the block queue. */
1396 sbitmap in_stack;
1398 int num_edges = NUM_EDGES (edge_list);
1400 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
1401 and a mapping from block to its loop header (if the block is contained
1402 in a loop, else -1).
1404 Store results in HEADER, INNER, and MAX_HDR respectively, these will
1405 be used as inputs to the second traversal.
1407 STACK, SP and DFS_NR are only used during the first traversal. */
1409 /* Allocate and initialize variables for the first traversal. */
1410 max_hdr = (int *) xmalloc (n_basic_blocks * sizeof (int));
1411 dfs_nr = (int *) xcalloc (n_basic_blocks, sizeof (int));
1412 stack = (int *) xmalloc (nr_edges * sizeof (int));
1414 inner = sbitmap_alloc (n_basic_blocks);
1415 sbitmap_ones (inner);
1417 header = sbitmap_alloc (n_basic_blocks);
1418 sbitmap_zero (header);
1420 passed = sbitmap_alloc (nr_edges);
1421 sbitmap_zero (passed);
1423 in_queue = sbitmap_alloc (n_basic_blocks);
1424 sbitmap_zero (in_queue);
1426 in_stack = sbitmap_alloc (n_basic_blocks);
1427 sbitmap_zero (in_stack);
1429 for (i = 0; i < n_basic_blocks; i++)
1430 max_hdr[i] = -1;
1432 /* DFS traversal to find inner loops in the cfg. */
1434 sp = -1;
1435 while (1)
1437 if (current_edge == 0 || TEST_BIT (passed, current_edge))
1439 /* We have reached a leaf node or a node that was already
1440 processed. Pop edges off the stack until we find
1441 an edge that has not yet been processed. */
1442 while (sp >= 0
1443 && (current_edge == 0 || TEST_BIT (passed, current_edge)))
1445 /* Pop entry off the stack. */
1446 current_edge = stack[sp--];
1447 node = FROM_BLOCK (current_edge);
1448 child = TO_BLOCK (current_edge);
1449 RESET_BIT (in_stack, child);
1450 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1451 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1452 current_edge = NEXT_OUT (current_edge);
1455 /* See if have finished the DFS tree traversal. */
1456 if (sp < 0 && TEST_BIT (passed, current_edge))
1457 break;
1459 /* Nope, continue the traversal with the popped node. */
1460 continue;
1463 /* Process a node. */
1464 node = FROM_BLOCK (current_edge);
1465 child = TO_BLOCK (current_edge);
1466 SET_BIT (in_stack, node);
1467 dfs_nr[node] = ++count;
1469 /* If the successor is in the stack, then we've found a loop.
1470 Mark the loop, if it is not a natural loop, then it will
1471 be rejected during the second traversal. */
1472 if (TEST_BIT (in_stack, child))
1474 no_loops = 0;
1475 SET_BIT (header, child);
1476 UPDATE_LOOP_RELATIONS (node, child);
1477 SET_BIT (passed, current_edge);
1478 current_edge = NEXT_OUT (current_edge);
1479 continue;
1482 /* If the child was already visited, then there is no need to visit
1483 it again. Just update the loop relationships and restart
1484 with a new edge. */
1485 if (dfs_nr[child])
1487 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
1488 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
1489 SET_BIT (passed, current_edge);
1490 current_edge = NEXT_OUT (current_edge);
1491 continue;
1494 /* Push an entry on the stack and continue DFS traversal. */
1495 stack[++sp] = current_edge;
1496 SET_BIT (passed, current_edge);
1497 current_edge = OUT_EDGES (child);
1499 /* This is temporary until haifa is converted to use rth's new
1500 cfg routines which have true entry/exit blocks and the
1501 appropriate edges from/to those blocks.
1503 Generally we update dfs_nr for a node when we process its
1504 out edge. However, if the node has no out edge then we will
1505 not set dfs_nr for that node. This can confuse the scheduler
1506 into thinking that we have unreachable blocks, which in turn
1507 disables cross block scheduling.
1509 So, if we have a node with no out edges, go ahead and mark it
1510 as reachable now. */
1511 if (current_edge == 0)
1512 dfs_nr[child] = ++count;
1515 /* Another check for unreachable blocks. The earlier test in
1516 is_cfg_nonregular only finds unreachable blocks that do not
1517 form a loop.
1519 The DFS traversal will mark every block that is reachable from
1520 the entry node by placing a nonzero value in dfs_nr. Thus if
1521 dfs_nr is zero for any block, then it must be unreachable. */
1522 unreachable = 0;
1523 for (i = 0; i < n_basic_blocks; i++)
1524 if (dfs_nr[i] == 0)
1526 unreachable = 1;
1527 break;
1530 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
1531 to hold degree counts. */
1532 degree = dfs_nr;
1534 for (i = 0; i < num_edges; i++)
1536 edge e = INDEX_EDGE (edge_list, i);
1538 if (e->src != ENTRY_BLOCK_PTR)
1539 degree[e->src->index]++;
1542 /* Do not perform region scheduling if there are any unreachable
1543 blocks. */
1544 if (!unreachable)
1546 int *queue;
1548 if (no_loops)
1549 SET_BIT (header, 0);
1551 /* Second travsersal:find reducible inner loops and topologically sort
1552 block of each region. */
1554 queue = (int *) xmalloc (n_basic_blocks * sizeof (int));
1556 /* Find blocks which are inner loop headers. We still have non-reducible
1557 loops to consider at this point. */
1558 for (i = 0; i < n_basic_blocks; i++)
1560 if (TEST_BIT (header, i) && TEST_BIT (inner, i))
1562 edge e;
1563 int j;
1565 /* Now check that the loop is reducible. We do this separate
1566 from finding inner loops so that we do not find a reducible
1567 loop which contains an inner non-reducible loop.
1569 A simple way to find reducible/natural loops is to verify
1570 that each block in the loop is dominated by the loop
1571 header.
1573 If there exists a block that is not dominated by the loop
1574 header, then the block is reachable from outside the loop
1575 and thus the loop is not a natural loop. */
1576 for (j = 0; j < n_basic_blocks; j++)
1578 /* First identify blocks in the loop, except for the loop
1579 entry block. */
1580 if (i == max_hdr[j] && i != j)
1582 /* Now verify that the block is dominated by the loop
1583 header. */
1584 if (!TEST_BIT (dom[j], i))
1585 break;
1589 /* If we exited the loop early, then I is the header of
1590 a non-reducible loop and we should quit processing it
1591 now. */
1592 if (j != n_basic_blocks)
1593 continue;
1595 /* I is a header of an inner loop, or block 0 in a subroutine
1596 with no loops at all. */
1597 head = tail = -1;
1598 too_large_failure = 0;
1599 loop_head = max_hdr[i];
1601 /* Decrease degree of all I's successors for topological
1602 ordering. */
1603 for (e = BASIC_BLOCK (i)->succ; e; e = e->succ_next)
1604 if (e->dest != EXIT_BLOCK_PTR)
1605 --degree[e->dest->index];
1607 /* Estimate # insns, and count # blocks in the region. */
1608 num_bbs = 1;
1609 num_insns = (INSN_LUID (BLOCK_END (i))
1610 - INSN_LUID (BLOCK_HEAD (i)));
1613 /* Find all loop latches (blocks with back edges to the loop
1614 header) or all the leaf blocks in the cfg has no loops.
1616 Place those blocks into the queue. */
1617 if (no_loops)
1619 for (j = 0; j < n_basic_blocks; j++)
1620 /* Leaf nodes have only a single successor which must
1621 be EXIT_BLOCK. */
1622 if (BASIC_BLOCK (j)->succ
1623 && BASIC_BLOCK (j)->succ->dest == EXIT_BLOCK_PTR
1624 && BASIC_BLOCK (j)->succ->succ_next == NULL)
1626 queue[++tail] = j;
1627 SET_BIT (in_queue, j);
1629 if (too_large (j, &num_bbs, &num_insns))
1631 too_large_failure = 1;
1632 break;
1636 else
1638 edge e;
1640 for (e = BASIC_BLOCK (i)->pred; e; e = e->pred_next)
1642 if (e->src == ENTRY_BLOCK_PTR)
1643 continue;
1645 node = e->src->index;
1647 if (max_hdr[node] == loop_head && node != i)
1649 /* This is a loop latch. */
1650 queue[++tail] = node;
1651 SET_BIT (in_queue, node);
1653 if (too_large (node, &num_bbs, &num_insns))
1655 too_large_failure = 1;
1656 break;
1663 /* Now add all the blocks in the loop to the queue.
1665 We know the loop is a natural loop; however the algorithm
1666 above will not always mark certain blocks as being in the
1667 loop. Consider:
1668 node children
1669 a b,c
1671 c a,d
1675 The algorithm in the DFS traversal may not mark B & D as part
1676 of the loop (ie they will not have max_hdr set to A).
1678 We know they can not be loop latches (else they would have
1679 had max_hdr set since they'd have a backedge to a dominator
1680 block). So we don't need them on the initial queue.
1682 We know they are part of the loop because they are dominated
1683 by the loop header and can be reached by a backwards walk of
1684 the edges starting with nodes on the initial queue.
1686 It is safe and desirable to include those nodes in the
1687 loop/scheduling region. To do so we would need to decrease
1688 the degree of a node if it is the target of a backedge
1689 within the loop itself as the node is placed in the queue.
1691 We do not do this because I'm not sure that the actual
1692 scheduling code will properly handle this case. ?!? */
1694 while (head < tail && !too_large_failure)
1696 edge e;
1697 child = queue[++head];
1699 for (e = BASIC_BLOCK (child)->pred; e; e = e->pred_next)
1701 node = e->src->index;
1703 /* See discussion above about nodes not marked as in
1704 this loop during the initial DFS traversal. */
1705 if (e->src == ENTRY_BLOCK_PTR
1706 || max_hdr[node] != loop_head)
1708 tail = -1;
1709 break;
1711 else if (!TEST_BIT (in_queue, node) && node != i)
1713 queue[++tail] = node;
1714 SET_BIT (in_queue, node);
1716 if (too_large (node, &num_bbs, &num_insns))
1718 too_large_failure = 1;
1719 break;
1725 if (tail >= 0 && !too_large_failure)
1727 /* Place the loop header into list of region blocks. */
1728 degree[i] = -1;
1729 rgn_bb_table[idx] = i;
1730 RGN_NR_BLOCKS (nr_regions) = num_bbs;
1731 RGN_BLOCKS (nr_regions) = idx++;
1732 CONTAINING_RGN (i) = nr_regions;
1733 BLOCK_TO_BB (i) = count = 0;
1735 /* Remove blocks from queue[] when their in degree
1736 becomes zero. Repeat until no blocks are left on the
1737 list. This produces a topological list of blocks in
1738 the region. */
1739 while (tail >= 0)
1741 if (head < 0)
1742 head = tail;
1743 child = queue[head];
1744 if (degree[child] == 0)
1746 edge e;
1748 degree[child] = -1;
1749 rgn_bb_table[idx++] = child;
1750 BLOCK_TO_BB (child) = ++count;
1751 CONTAINING_RGN (child) = nr_regions;
1752 queue[head] = queue[tail--];
1754 for (e = BASIC_BLOCK (child)->succ;
1756 e = e->succ_next)
1757 if (e->dest != EXIT_BLOCK_PTR)
1758 --degree[e->dest->index];
1760 else
1761 --head;
1763 ++nr_regions;
1767 free (queue);
1770 /* Any block that did not end up in a region is placed into a region
1771 by itself. */
1772 for (i = 0; i < n_basic_blocks; i++)
1773 if (degree[i] >= 0)
1775 rgn_bb_table[idx] = i;
1776 RGN_NR_BLOCKS (nr_regions) = 1;
1777 RGN_BLOCKS (nr_regions) = idx++;
1778 CONTAINING_RGN (i) = nr_regions++;
1779 BLOCK_TO_BB (i) = 0;
1782 free (max_hdr);
1783 free (dfs_nr);
1784 free (stack);
1785 free (passed);
1786 free (header);
1787 free (inner);
1788 free (in_queue);
1789 free (in_stack);
1793 /* Functions for regions scheduling information. */
1795 /* Compute dominators, probability, and potential-split-edges of bb.
1796 Assume that these values were already computed for bb's predecessors. */
1798 static void
1799 compute_dom_prob_ps (bb)
1800 int bb;
1802 int nxt_in_edge, fst_in_edge, pred;
1803 int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
1805 prob[bb] = 0.0;
1806 if (IS_RGN_ENTRY (bb))
1808 BITSET_ADD (dom[bb], 0, bbset_size);
1809 prob[bb] = 1.0;
1810 return;
1813 fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
1815 /* Intialize dom[bb] to '111..1'. */
1816 BITSET_INVERT (dom[bb], bbset_size);
1820 pred = FROM_BLOCK (nxt_in_edge);
1821 BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
1823 BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
1824 edgeset_size);
1826 BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
1828 nr_out_edges = 1;
1829 nr_rgn_out_edges = 0;
1830 fst_out_edge = OUT_EDGES (pred);
1831 nxt_out_edge = NEXT_OUT (fst_out_edge);
1832 BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
1833 edgeset_size);
1835 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
1837 /* The successor doesn't belong in the region? */
1838 if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
1839 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1840 ++nr_rgn_out_edges;
1842 while (fst_out_edge != nxt_out_edge)
1844 ++nr_out_edges;
1845 /* The successor doesn't belong in the region? */
1846 if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
1847 CONTAINING_RGN (BB_TO_BLOCK (bb)))
1848 ++nr_rgn_out_edges;
1849 BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
1850 nxt_out_edge = NEXT_OUT (nxt_out_edge);
1854 /* Now nr_rgn_out_edges is the number of region-exit edges from
1855 pred, and nr_out_edges will be the number of pred out edges
1856 not leaving the region. */
1857 nr_out_edges -= nr_rgn_out_edges;
1858 if (nr_rgn_out_edges > 0)
1859 prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1860 else
1861 prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
1862 nxt_in_edge = NEXT_IN (nxt_in_edge);
1864 while (fst_in_edge != nxt_in_edge);
1866 BITSET_ADD (dom[bb], bb, bbset_size);
1867 BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
1869 if (sched_verbose >= 2)
1870 fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]));
1871 } /* compute_dom_prob_ps */
1873 /* Functions for target info. */
1875 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
1876 Note that bb_trg dominates bb_src. */
1878 static void
1879 split_edges (bb_src, bb_trg, bl)
1880 int bb_src;
1881 int bb_trg;
1882 edgelst *bl;
1884 int es = edgeset_size;
1885 edgeset src = (edgeset) xmalloc (es * sizeof (HOST_WIDE_INT));
1887 while (es--)
1888 src[es] = (pot_split[bb_src])[es];
1889 BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
1890 extract_bitlst (src, edgeset_size, bl);
1891 free (src);
1895 /* Find the valid candidate-source-blocks for the target block TRG, compute
1896 their probability, and check if they are speculative or not.
1897 For speculative sources, compute their update-blocks and split-blocks. */
1899 static void
1900 compute_trg_info (trg)
1901 int trg;
1903 register candidate *sp;
1904 edgelst el;
1905 int check_block, update_idx;
1906 int i, j, k, fst_edge, nxt_edge;
1908 /* Define some of the fields for the target bb as well. */
1909 sp = candidate_table + trg;
1910 sp->is_valid = 1;
1911 sp->is_speculative = 0;
1912 sp->src_prob = 100;
1914 for (i = trg + 1; i < current_nr_blocks; i++)
1916 sp = candidate_table + i;
1918 sp->is_valid = IS_DOMINATED (i, trg);
1919 if (sp->is_valid)
1921 sp->src_prob = GET_SRC_PROB (i, trg);
1922 sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
1925 if (sp->is_valid)
1927 split_edges (i, trg, &el);
1928 sp->is_speculative = (el.nr_members) ? 1 : 0;
1929 if (sp->is_speculative && !flag_schedule_speculative)
1930 sp->is_valid = 0;
1933 if (sp->is_valid)
1935 sp->split_bbs.first_member = &bblst_table[bblst_last];
1936 sp->split_bbs.nr_members = el.nr_members;
1937 for (j = 0; j < el.nr_members; bblst_last++, j++)
1938 bblst_table[bblst_last] =
1939 TO_BLOCK (rgn_edges[el.first_member[j]]);
1940 sp->update_bbs.first_member = &bblst_table[bblst_last];
1941 update_idx = 0;
1942 for (j = 0; j < el.nr_members; j++)
1944 check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
1945 fst_edge = nxt_edge = OUT_EDGES (check_block);
1948 for (k = 0; k < el.nr_members; k++)
1949 if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
1950 break;
1952 if (k >= el.nr_members)
1954 bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
1955 update_idx++;
1958 nxt_edge = NEXT_OUT (nxt_edge);
1960 while (fst_edge != nxt_edge);
1962 sp->update_bbs.nr_members = update_idx;
1965 else
1967 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
1969 sp->is_speculative = 0;
1970 sp->src_prob = 0;
1973 } /* compute_trg_info */
1976 /* Print candidates info, for debugging purposes. Callable from debugger. */
1978 void
1979 debug_candidate (i)
1980 int i;
1982 if (!candidate_table[i].is_valid)
1983 return;
1985 if (candidate_table[i].is_speculative)
1987 int j;
1988 fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
1990 fprintf (dump, "split path: ");
1991 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
1993 int b = candidate_table[i].split_bbs.first_member[j];
1995 fprintf (dump, " %d ", b);
1997 fprintf (dump, "\n");
1999 fprintf (dump, "update path: ");
2000 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
2002 int b = candidate_table[i].update_bbs.first_member[j];
2004 fprintf (dump, " %d ", b);
2006 fprintf (dump, "\n");
2008 else
2010 fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
2015 /* Print candidates info, for debugging purposes. Callable from debugger. */
2017 void
2018 debug_candidates (trg)
2019 int trg;
2021 int i;
2023 fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
2024 BB_TO_BLOCK (trg), trg);
2025 for (i = trg + 1; i < current_nr_blocks; i++)
2026 debug_candidate (i);
2030 /* Functions for speculative scheduing. */
2032 /* Return 0 if x is a set of a register alive in the beginning of one
2033 of the split-blocks of src, otherwise return 1. */
2035 static int
2036 check_live_1 (src, x)
2037 int src;
2038 rtx x;
2040 register int i;
2041 register int regno;
2042 register rtx reg = SET_DEST (x);
2044 if (reg == 0)
2045 return 1;
2047 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2048 || GET_CODE (reg) == SIGN_EXTRACT
2049 || GET_CODE (reg) == STRICT_LOW_PART)
2050 reg = XEXP (reg, 0);
2052 if (GET_CODE (reg) == PARALLEL
2053 && GET_MODE (reg) == BLKmode)
2055 register int i;
2056 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2057 if (check_live_1 (src, XVECEXP (reg, 0, i)))
2058 return 1;
2059 return 0;
2062 if (GET_CODE (reg) != REG)
2063 return 1;
2065 regno = REGNO (reg);
2067 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
2069 /* Global registers are assumed live. */
2070 return 0;
2072 else
2074 if (regno < FIRST_PSEUDO_REGISTER)
2076 /* Check for hard registers. */
2077 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2078 while (--j >= 0)
2080 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2082 int b = candidate_table[src].split_bbs.first_member[i];
2084 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start,
2085 regno + j))
2087 return 0;
2092 else
2094 /* Check for psuedo registers. */
2095 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
2097 int b = candidate_table[src].split_bbs.first_member[i];
2099 if (REGNO_REG_SET_P (BASIC_BLOCK (b)->global_live_at_start, regno))
2101 return 0;
2107 return 1;
2111 /* If x is a set of a register R, mark that R is alive in the beginning
2112 of every update-block of src. */
2114 static void
2115 update_live_1 (src, x)
2116 int src;
2117 rtx x;
2119 register int i;
2120 register int regno;
2121 register rtx reg = SET_DEST (x);
2123 if (reg == 0)
2124 return;
2126 while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
2127 || GET_CODE (reg) == SIGN_EXTRACT
2128 || GET_CODE (reg) == STRICT_LOW_PART)
2129 reg = XEXP (reg, 0);
2131 if (GET_CODE (reg) == PARALLEL
2132 && GET_MODE (reg) == BLKmode)
2134 register int i;
2135 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
2136 update_live_1 (src, XVECEXP (reg, 0, i));
2137 return;
2140 if (GET_CODE (reg) != REG)
2141 return;
2143 /* Global registers are always live, so the code below does not apply
2144 to them. */
2146 regno = REGNO (reg);
2148 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
2150 if (regno < FIRST_PSEUDO_REGISTER)
2152 int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
2153 while (--j >= 0)
2155 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2157 int b = candidate_table[src].update_bbs.first_member[i];
2159 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start,
2160 regno + j);
2164 else
2166 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
2168 int b = candidate_table[src].update_bbs.first_member[i];
2170 SET_REGNO_REG_SET (BASIC_BLOCK (b)->global_live_at_start, regno);
2177 /* Return 1 if insn can be speculatively moved from block src to trg,
2178 otherwise return 0. Called before first insertion of insn to
2179 ready-list or before the scheduling. */
2181 static int
2182 check_live (insn, src)
2183 rtx insn;
2184 int src;
2186 /* Find the registers set by instruction. */
2187 if (GET_CODE (PATTERN (insn)) == SET
2188 || GET_CODE (PATTERN (insn)) == CLOBBER)
2189 return check_live_1 (src, PATTERN (insn));
2190 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2192 int j;
2193 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2194 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2195 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2196 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
2197 return 0;
2199 return 1;
2202 return 1;
2206 /* Update the live registers info after insn was moved speculatively from
2207 block src to trg. */
2209 static void
2210 update_live (insn, src)
2211 rtx insn;
2212 int src;
2214 /* Find the registers set by instruction. */
2215 if (GET_CODE (PATTERN (insn)) == SET
2216 || GET_CODE (PATTERN (insn)) == CLOBBER)
2217 update_live_1 (src, PATTERN (insn));
2218 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
2220 int j;
2221 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
2222 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
2223 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
2224 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
2228 /* Exception Free Loads:
2230 We define five classes of speculative loads: IFREE, IRISKY,
2231 PFREE, PRISKY, and MFREE.
2233 IFREE loads are loads that are proved to be exception-free, just
2234 by examining the load insn. Examples for such loads are loads
2235 from TOC and loads of global data.
2237 IRISKY loads are loads that are proved to be exception-risky,
2238 just by examining the load insn. Examples for such loads are
2239 volatile loads and loads from shared memory.
2241 PFREE loads are loads for which we can prove, by examining other
2242 insns, that they are exception-free. Currently, this class consists
2243 of loads for which we are able to find a "similar load", either in
2244 the target block, or, if only one split-block exists, in that split
2245 block. Load2 is similar to load1 if both have same single base
2246 register. We identify only part of the similar loads, by finding
2247 an insn upon which both load1 and load2 have a DEF-USE dependence.
2249 PRISKY loads are loads for which we can prove, by examining other
2250 insns, that they are exception-risky. Currently we have two proofs for
2251 such loads. The first proof detects loads that are probably guarded by a
2252 test on the memory address. This proof is based on the
2253 backward and forward data dependence information for the region.
2254 Let load-insn be the examined load.
2255 Load-insn is PRISKY iff ALL the following hold:
2257 - insn1 is not in the same block as load-insn
2258 - there is a DEF-USE dependence chain (insn1, ..., load-insn)
2259 - test-insn is either a compare or a branch, not in the same block
2260 as load-insn
2261 - load-insn is reachable from test-insn
2262 - there is a DEF-USE dependence chain (insn1, ..., test-insn)
2264 This proof might fail when the compare and the load are fed
2265 by an insn not in the region. To solve this, we will add to this
2266 group all loads that have no input DEF-USE dependence.
2268 The second proof detects loads that are directly or indirectly
2269 fed by a speculative load. This proof is affected by the
2270 scheduling process. We will use the flag fed_by_spec_load.
2271 Initially, all insns have this flag reset. After a speculative
2272 motion of an insn, if insn is either a load, or marked as
2273 fed_by_spec_load, we will also mark as fed_by_spec_load every
2274 insn1 for which a DEF-USE dependence (insn, insn1) exists. A
2275 load which is fed_by_spec_load is also PRISKY.
2277 MFREE (maybe-free) loads are all the remaining loads. They may be
2278 exception-free, but we cannot prove it.
2280 Now, all loads in IFREE and PFREE classes are considered
2281 exception-free, while all loads in IRISKY and PRISKY classes are
2282 considered exception-risky. As for loads in the MFREE class,
2283 these are considered either exception-free or exception-risky,
2284 depending on whether we are pessimistic or optimistic. We have
2285 to take the pessimistic approach to assure the safety of
2286 speculative scheduling, but we can take the optimistic approach
2287 by invoking the -fsched_spec_load_dangerous option. */
2289 enum INSN_TRAP_CLASS
2291 TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
2292 PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
2295 #define WORST_CLASS(class1, class2) \
2296 ((class1 > class2) ? class1 : class2)
2298 /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */
2299 #define IS_REACHABLE(bb_from, bb_to) \
2300 (bb_from == bb_to \
2301 || IS_RGN_ENTRY (bb_from) \
2302 || (bitset_member (ancestor_edges[bb_to], \
2303 EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \
2304 edgeset_size)))
2306 /* Non-zero iff the address is comprised from at most 1 register. */
2307 #define CONST_BASED_ADDRESS_P(x) \
2308 (GET_CODE (x) == REG \
2309 || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \
2310 || (GET_CODE (x) == LO_SUM)) \
2311 && (GET_CODE (XEXP (x, 0)) == CONST_INT \
2312 || GET_CODE (XEXP (x, 1)) == CONST_INT)))
2314 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
2316 static void
2317 set_spec_fed (load_insn)
2318 rtx load_insn;
2320 rtx link;
2322 for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
2323 if (GET_MODE (link) == VOIDmode)
2324 FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
2325 } /* set_spec_fed */
2327 /* On the path from the insn to load_insn_bb, find a conditional
2328 branch depending on insn, that guards the speculative load. */
2330 static int
2331 find_conditional_protection (insn, load_insn_bb)
2332 rtx insn;
2333 int load_insn_bb;
2335 rtx link;
2337 /* Iterate through DEF-USE forward dependences. */
2338 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
2340 rtx next = XEXP (link, 0);
2341 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
2342 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
2343 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
2344 && load_insn_bb != INSN_BB (next)
2345 && GET_MODE (link) == VOIDmode
2346 && (GET_CODE (next) == JUMP_INSN
2347 || find_conditional_protection (next, load_insn_bb)))
2348 return 1;
2350 return 0;
2351 } /* find_conditional_protection */
2353 /* Returns 1 if the same insn1 that participates in the computation
2354 of load_insn's address is feeding a conditional branch that is
2355 guarding on load_insn. This is true if we find a the two DEF-USE
2356 chains:
2357 insn1 -> ... -> conditional-branch
2358 insn1 -> ... -> load_insn,
2359 and if a flow path exist:
2360 insn1 -> ... -> conditional-branch -> ... -> load_insn,
2361 and if insn1 is on the path
2362 region-entry -> ... -> bb_trg -> ... load_insn.
2364 Locate insn1 by climbing on LOG_LINKS from load_insn.
2365 Locate the branch by following INSN_DEPEND from insn1. */
2367 static int
2368 is_conditionally_protected (load_insn, bb_src, bb_trg)
2369 rtx load_insn;
2370 int bb_src, bb_trg;
2372 rtx link;
2374 for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
2376 rtx insn1 = XEXP (link, 0);
2378 /* Must be a DEF-USE dependence upon non-branch. */
2379 if (GET_MODE (link) != VOIDmode
2380 || GET_CODE (insn1) == JUMP_INSN)
2381 continue;
2383 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
2384 if (INSN_BB (insn1) == bb_src
2385 || (CONTAINING_RGN (BLOCK_NUM (insn1))
2386 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
2387 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
2388 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
2389 continue;
2391 /* Now search for the conditional-branch. */
2392 if (find_conditional_protection (insn1, bb_src))
2393 return 1;
2395 /* Recursive step: search another insn1, "above" current insn1. */
2396 return is_conditionally_protected (insn1, bb_src, bb_trg);
2399 /* The chain does not exist. */
2400 return 0;
2401 } /* is_conditionally_protected */
2403 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
2404 load_insn can move speculatively from bb_src to bb_trg. All the
2405 following must hold:
2407 (1) both loads have 1 base register (PFREE_CANDIDATEs).
2408 (2) load_insn and load1 have a def-use dependence upon
2409 the same insn 'insn1'.
2410 (3) either load2 is in bb_trg, or:
2411 - there's only one split-block, and
2412 - load1 is on the escape path, and
2414 From all these we can conclude that the two loads access memory
2415 addresses that differ at most by a constant, and hence if moving
2416 load_insn would cause an exception, it would have been caused by
2417 load2 anyhow. */
2419 static int
2420 is_pfree (load_insn, bb_src, bb_trg)
2421 rtx load_insn;
2422 int bb_src, bb_trg;
2424 rtx back_link;
2425 register candidate *candp = candidate_table + bb_src;
2427 if (candp->split_bbs.nr_members != 1)
2428 /* Must have exactly one escape block. */
2429 return 0;
2431 for (back_link = LOG_LINKS (load_insn);
2432 back_link; back_link = XEXP (back_link, 1))
2434 rtx insn1 = XEXP (back_link, 0);
2436 if (GET_MODE (back_link) == VOIDmode)
2438 /* Found a DEF-USE dependence (insn1, load_insn). */
2439 rtx fore_link;
2441 for (fore_link = INSN_DEPEND (insn1);
2442 fore_link; fore_link = XEXP (fore_link, 1))
2444 rtx insn2 = XEXP (fore_link, 0);
2445 if (GET_MODE (fore_link) == VOIDmode)
2447 /* Found a DEF-USE dependence (insn1, insn2). */
2448 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
2449 /* insn2 not guaranteed to be a 1 base reg load. */
2450 continue;
2452 if (INSN_BB (insn2) == bb_trg)
2453 /* insn2 is the similar load, in the target block. */
2454 return 1;
2456 if (*(candp->split_bbs.first_member) == BLOCK_NUM (insn2))
2457 /* insn2 is a similar load, in a split-block. */
2458 return 1;
2464 /* Couldn't find a similar load. */
2465 return 0;
2466 } /* is_pfree */
2468 /* Returns a class that insn with GET_DEST(insn)=x may belong to,
2469 as found by analyzing insn's expression. */
2471 static int
2472 may_trap_exp (x, is_store)
2473 rtx x;
2474 int is_store;
2476 enum rtx_code code;
2478 if (x == 0)
2479 return TRAP_FREE;
2480 code = GET_CODE (x);
2481 if (is_store)
2483 if (code == MEM)
2484 return TRAP_RISKY;
2485 else
2486 return TRAP_FREE;
2488 if (code == MEM)
2490 /* The insn uses memory: a volatile load. */
2491 if (MEM_VOLATILE_P (x))
2492 return IRISKY;
2493 /* An exception-free load. */
2494 if (!may_trap_p (x))
2495 return IFREE;
2496 /* A load with 1 base register, to be further checked. */
2497 if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
2498 return PFREE_CANDIDATE;
2499 /* No info on the load, to be further checked. */
2500 return PRISKY_CANDIDATE;
2502 else
2504 const char *fmt;
2505 int i, insn_class = TRAP_FREE;
2507 /* Neither store nor load, check if it may cause a trap. */
2508 if (may_trap_p (x))
2509 return TRAP_RISKY;
2510 /* Recursive step: walk the insn... */
2511 fmt = GET_RTX_FORMAT (code);
2512 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2514 if (fmt[i] == 'e')
2516 int tmp_class = may_trap_exp (XEXP (x, i), is_store);
2517 insn_class = WORST_CLASS (insn_class, tmp_class);
2519 else if (fmt[i] == 'E')
2521 int j;
2522 for (j = 0; j < XVECLEN (x, i); j++)
2524 int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
2525 insn_class = WORST_CLASS (insn_class, tmp_class);
2526 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2527 break;
2530 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2531 break;
2533 return insn_class;
2535 } /* may_trap_exp */
2538 /* Classifies insn for the purpose of verifying that it can be
2539 moved speculatively, by examining it's patterns, returning:
2540 TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
2541 TRAP_FREE: non-load insn.
2542 IFREE: load from a globaly safe location.
2543 IRISKY: volatile load.
2544 PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
2545 being either PFREE or PRISKY. */
2547 static int
2548 haifa_classify_insn (insn)
2549 rtx insn;
2551 rtx pat = PATTERN (insn);
2552 int tmp_class = TRAP_FREE;
2553 int insn_class = TRAP_FREE;
2554 enum rtx_code code;
2556 if (GET_CODE (pat) == PARALLEL)
2558 int i, len = XVECLEN (pat, 0);
2560 for (i = len - 1; i >= 0; i--)
2562 code = GET_CODE (XVECEXP (pat, 0, i));
2563 switch (code)
2565 case CLOBBER:
2566 /* Test if it is a 'store'. */
2567 tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
2568 break;
2569 case SET:
2570 /* Test if it is a store. */
2571 tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
2572 if (tmp_class == TRAP_RISKY)
2573 break;
2574 /* Test if it is a load. */
2575 tmp_class =
2576 WORST_CLASS (tmp_class,
2577 may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
2578 break;
2579 case TRAP_IF:
2580 tmp_class = TRAP_RISKY;
2581 break;
2582 default:;
2584 insn_class = WORST_CLASS (insn_class, tmp_class);
2585 if (insn_class == TRAP_RISKY || insn_class == IRISKY)
2586 break;
2589 else
2591 code = GET_CODE (pat);
2592 switch (code)
2594 case CLOBBER:
2595 /* Test if it is a 'store'. */
2596 tmp_class = may_trap_exp (XEXP (pat, 0), 1);
2597 break;
2598 case SET:
2599 /* Test if it is a store. */
2600 tmp_class = may_trap_exp (SET_DEST (pat), 1);
2601 if (tmp_class == TRAP_RISKY)
2602 break;
2603 /* Test if it is a load. */
2604 tmp_class =
2605 WORST_CLASS (tmp_class,
2606 may_trap_exp (SET_SRC (pat), 0));
2607 break;
2608 case TRAP_IF:
2609 tmp_class = TRAP_RISKY;
2610 break;
2611 default:;
2613 insn_class = tmp_class;
2616 return insn_class;
2618 } /* haifa_classify_insn */
2620 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
2621 a load moved speculatively, or if load_insn is protected by
2622 a compare on load_insn's address). */
2624 static int
2625 is_prisky (load_insn, bb_src, bb_trg)
2626 rtx load_insn;
2627 int bb_src, bb_trg;
2629 if (FED_BY_SPEC_LOAD (load_insn))
2630 return 1;
2632 if (LOG_LINKS (load_insn) == NULL)
2633 /* Dependence may 'hide' out of the region. */
2634 return 1;
2636 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
2637 return 1;
2639 return 0;
2640 } /* is_prisky */
2642 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
2643 Return 1 if insn is exception-free (and the motion is valid)
2644 and 0 otherwise. */
2646 static int
2647 is_exception_free (insn, bb_src, bb_trg)
2648 rtx insn;
2649 int bb_src, bb_trg;
2651 int insn_class = haifa_classify_insn (insn);
2653 /* Handle non-load insns. */
2654 switch (insn_class)
2656 case TRAP_FREE:
2657 return 1;
2658 case TRAP_RISKY:
2659 return 0;
2660 default:;
2663 /* Handle loads. */
2664 if (!flag_schedule_speculative_load)
2665 return 0;
2666 IS_LOAD_INSN (insn) = 1;
2667 switch (insn_class)
2669 case IFREE:
2670 return (1);
2671 case IRISKY:
2672 return 0;
2673 case PFREE_CANDIDATE:
2674 if (is_pfree (insn, bb_src, bb_trg))
2675 return 1;
2676 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
2677 case PRISKY_CANDIDATE:
2678 if (!flag_schedule_speculative_load_dangerous
2679 || is_prisky (insn, bb_src, bb_trg))
2680 return 0;
2681 break;
2682 default:;
2685 return flag_schedule_speculative_load_dangerous;
2686 } /* is_exception_free */
2689 /* Process an insn's memory dependencies. There are four kinds of
2690 dependencies:
2692 (0) read dependence: read follows read
2693 (1) true dependence: read follows write
2694 (2) anti dependence: write follows read
2695 (3) output dependence: write follows write
2697 We are careful to build only dependencies which actually exist, and
2698 use transitivity to avoid building too many links. */
2700 /* Return the INSN_LIST containing INSN in LIST, or NULL
2701 if LIST does not contain INSN. */
2703 HAIFA_INLINE static rtx
2704 find_insn_list (insn, list)
2705 rtx insn;
2706 rtx list;
2708 while (list)
2710 if (XEXP (list, 0) == insn)
2711 return list;
2712 list = XEXP (list, 1);
2714 return 0;
2718 /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0
2719 otherwise. */
2721 HAIFA_INLINE static char
2722 find_insn_mem_list (insn, x, list, list1)
2723 rtx insn, x;
2724 rtx list, list1;
2726 while (list)
2728 if (XEXP (list, 0) == insn
2729 && XEXP (list1, 0) == x)
2730 return 1;
2731 list = XEXP (list, 1);
2732 list1 = XEXP (list1, 1);
2734 return 0;
2738 /* Compute the function units used by INSN. This caches the value
2739 returned by function_units_used. A function unit is encoded as the
2740 unit number if the value is non-negative and the compliment of a
2741 mask if the value is negative. A function unit index is the
2742 non-negative encoding. */
2744 HAIFA_INLINE static int
2745 insn_unit (insn)
2746 rtx insn;
2748 register int unit = INSN_UNIT (insn);
2750 if (unit == 0)
2752 recog_memoized (insn);
2754 /* A USE insn, or something else we don't need to understand.
2755 We can't pass these directly to function_units_used because it will
2756 trigger a fatal error for unrecognizable insns. */
2757 if (INSN_CODE (insn) < 0)
2758 unit = -1;
2759 else
2761 unit = function_units_used (insn);
2762 /* Increment non-negative values so we can cache zero. */
2763 if (unit >= 0)
2764 unit++;
2766 /* We only cache 16 bits of the result, so if the value is out of
2767 range, don't cache it. */
2768 if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
2769 || unit >= 0
2770 || (unit & ~((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
2771 INSN_UNIT (insn) = unit;
2773 return (unit > 0 ? unit - 1 : unit);
2776 /* Compute the blockage range for executing INSN on UNIT. This caches
2777 the value returned by the blockage_range_function for the unit.
2778 These values are encoded in an int where the upper half gives the
2779 minimum value and the lower half gives the maximum value. */
2781 HAIFA_INLINE static unsigned int
2782 blockage_range (unit, insn)
2783 int unit;
2784 rtx insn;
2786 unsigned int blockage = INSN_BLOCKAGE (insn);
2787 unsigned int range;
2789 if ((int) UNIT_BLOCKED (blockage) != unit + 1)
2791 range = function_units[unit].blockage_range_function (insn);
2792 /* We only cache the blockage range for one unit and then only if
2793 the values fit. */
2794 if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
2795 INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
2797 else
2798 range = BLOCKAGE_RANGE (blockage);
2800 return range;
2803 /* A vector indexed by function unit instance giving the last insn to use
2804 the unit. The value of the function unit instance index for unit U
2805 instance I is (U + I * FUNCTION_UNITS_SIZE). */
2806 static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2808 /* A vector indexed by function unit instance giving the minimum time when
2809 the unit will unblock based on the maximum blockage cost. */
2810 static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
2812 /* A vector indexed by function unit number giving the number of insns
2813 that remain to use the unit. */
2814 static int unit_n_insns[FUNCTION_UNITS_SIZE];
2816 /* Reset the function unit state to the null state. */
2818 static void
2819 clear_units ()
2821 bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
2822 bzero ((char *) unit_tick, sizeof (unit_tick));
2823 bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
2826 /* Return the issue-delay of an insn. */
2828 HAIFA_INLINE static int
2829 insn_issue_delay (insn)
2830 rtx insn;
2832 int i, delay = 0;
2833 int unit = insn_unit (insn);
2835 /* Efficiency note: in fact, we are working 'hard' to compute a
2836 value that was available in md file, and is not available in
2837 function_units[] structure. It would be nice to have this
2838 value there, too. */
2839 if (unit >= 0)
2841 if (function_units[unit].blockage_range_function &&
2842 function_units[unit].blockage_function)
2843 delay = function_units[unit].blockage_function (insn, insn);
2845 else
2846 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2847 if ((unit & 1) != 0 && function_units[i].blockage_range_function
2848 && function_units[i].blockage_function)
2849 delay = MAX (delay, function_units[i].blockage_function (insn, insn));
2851 return delay;
2854 /* Return the actual hazard cost of executing INSN on the unit UNIT,
2855 instance INSTANCE at time CLOCK if the previous actual hazard cost
2856 was COST. */
2858 HAIFA_INLINE static int
2859 actual_hazard_this_instance (unit, instance, insn, clock, cost)
2860 int unit, instance, clock, cost;
2861 rtx insn;
2863 int tick = unit_tick[instance]; /* Issue time of the last issued insn. */
2865 if (tick - clock > cost)
2867 /* The scheduler is operating forward, so unit's last insn is the
2868 executing insn and INSN is the candidate insn. We want a
2869 more exact measure of the blockage if we execute INSN at CLOCK
2870 given when we committed the execution of the unit's last insn.
2872 The blockage value is given by either the unit's max blockage
2873 constant, blockage range function, or blockage function. Use
2874 the most exact form for the given unit. */
2876 if (function_units[unit].blockage_range_function)
2878 if (function_units[unit].blockage_function)
2879 tick += (function_units[unit].blockage_function
2880 (unit_last_insn[instance], insn)
2881 - function_units[unit].max_blockage);
2882 else
2883 tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
2884 - function_units[unit].max_blockage);
2886 if (tick - clock > cost)
2887 cost = tick - clock;
2889 return cost;
2892 /* Record INSN as having begun execution on the units encoded by UNIT at
2893 time CLOCK. */
2895 HAIFA_INLINE static void
2896 schedule_unit (unit, insn, clock)
2897 int unit, clock;
2898 rtx insn;
2900 int i;
2902 if (unit >= 0)
2904 int instance = unit;
2905 #if MAX_MULTIPLICITY > 1
2906 /* Find the first free instance of the function unit and use that
2907 one. We assume that one is free. */
2908 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2910 if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
2911 break;
2912 instance += FUNCTION_UNITS_SIZE;
2914 #endif
2915 unit_last_insn[instance] = insn;
2916 unit_tick[instance] = (clock + function_units[unit].max_blockage);
2918 else
2919 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2920 if ((unit & 1) != 0)
2921 schedule_unit (i, insn, clock);
2924 /* Return the actual hazard cost of executing INSN on the units encoded by
2925 UNIT at time CLOCK if the previous actual hazard cost was COST. */
2927 HAIFA_INLINE static int
2928 actual_hazard (unit, insn, clock, cost)
2929 int unit, clock, cost;
2930 rtx insn;
2932 int i;
2934 if (unit >= 0)
2936 /* Find the instance of the function unit with the minimum hazard. */
2937 int instance = unit;
2938 int best_cost = actual_hazard_this_instance (unit, instance, insn,
2939 clock, cost);
2940 #if MAX_MULTIPLICITY > 1
2941 int this_cost;
2943 if (best_cost > cost)
2945 for (i = function_units[unit].multiplicity - 1; i > 0; i--)
2947 instance += FUNCTION_UNITS_SIZE;
2948 this_cost = actual_hazard_this_instance (unit, instance, insn,
2949 clock, cost);
2950 if (this_cost < best_cost)
2952 best_cost = this_cost;
2953 if (this_cost <= cost)
2954 break;
2958 #endif
2959 cost = MAX (cost, best_cost);
2961 else
2962 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
2963 if ((unit & 1) != 0)
2964 cost = actual_hazard (i, insn, clock, cost);
2966 return cost;
2969 /* Return the potential hazard cost of executing an instruction on the
2970 units encoded by UNIT if the previous potential hazard cost was COST.
2971 An insn with a large blockage time is chosen in preference to one
2972 with a smaller time; an insn that uses a unit that is more likely
2973 to be used is chosen in preference to one with a unit that is less
2974 used. We are trying to minimize a subsequent actual hazard. */
2976 HAIFA_INLINE static int
2977 potential_hazard (unit, insn, cost)
2978 int unit, cost;
2979 rtx insn;
2981 int i, ncost;
2982 unsigned int minb, maxb;
2984 if (unit >= 0)
2986 minb = maxb = function_units[unit].max_blockage;
2987 if (maxb > 1)
2989 if (function_units[unit].blockage_range_function)
2991 maxb = minb = blockage_range (unit, insn);
2992 maxb = MAX_BLOCKAGE_COST (maxb);
2993 minb = MIN_BLOCKAGE_COST (minb);
2996 if (maxb > 1)
2998 /* Make the number of instructions left dominate. Make the
2999 minimum delay dominate the maximum delay. If all these
3000 are the same, use the unit number to add an arbitrary
3001 ordering. Other terms can be added. */
3002 ncost = minb * 0x40 + maxb;
3003 ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
3004 if (ncost > cost)
3005 cost = ncost;
3009 else
3010 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
3011 if ((unit & 1) != 0)
3012 cost = potential_hazard (i, insn, cost);
3014 return cost;
3017 /* Compute cost of executing INSN given the dependence LINK on the insn USED.
3018 This is the number of cycles between instruction issue and
3019 instruction results. */
3021 HAIFA_INLINE static int
3022 insn_cost (insn, link, used)
3023 rtx insn, link, used;
3025 register int cost = INSN_COST (insn);
3027 if (cost == 0)
3029 recog_memoized (insn);
3031 /* A USE insn, or something else we don't need to understand.
3032 We can't pass these directly to result_ready_cost because it will
3033 trigger a fatal error for unrecognizable insns. */
3034 if (INSN_CODE (insn) < 0)
3036 INSN_COST (insn) = 1;
3037 return 1;
3039 else
3041 cost = result_ready_cost (insn);
3043 if (cost < 1)
3044 cost = 1;
3046 INSN_COST (insn) = cost;
3050 /* In this case estimate cost without caring how insn is used. */
3051 if (link == 0 && used == 0)
3052 return cost;
3054 /* A USE insn should never require the value used to be computed. This
3055 allows the computation of a function's result and parameter values to
3056 overlap the return and call. */
3057 recog_memoized (used);
3058 if (INSN_CODE (used) < 0)
3059 LINK_COST_FREE (link) = 1;
3061 /* If some dependencies vary the cost, compute the adjustment. Most
3062 commonly, the adjustment is complete: either the cost is ignored
3063 (in the case of an output- or anti-dependence), or the cost is
3064 unchanged. These values are cached in the link as LINK_COST_FREE
3065 and LINK_COST_ZERO. */
3067 if (LINK_COST_FREE (link))
3068 cost = 0;
3069 #ifdef ADJUST_COST
3070 else if (!LINK_COST_ZERO (link))
3072 int ncost = cost;
3074 ADJUST_COST (used, link, insn, ncost);
3075 if (ncost < 1)
3077 LINK_COST_FREE (link) = 1;
3078 ncost = 0;
3080 if (cost == ncost)
3081 LINK_COST_ZERO (link) = 1;
3082 cost = ncost;
3084 #endif
3085 return cost;
3088 /* Compute the priority number for INSN. */
3090 static int
3091 priority (insn)
3092 rtx insn;
3094 int this_priority;
3095 rtx link;
3097 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3098 return 0;
3100 if ((this_priority = INSN_PRIORITY (insn)) == 0)
3102 if (INSN_DEPEND (insn) == 0)
3103 this_priority = insn_cost (insn, 0, 0);
3104 else
3105 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
3107 rtx next;
3108 int next_priority;
3110 if (RTX_INTEGRATED_P (link))
3111 continue;
3113 next = XEXP (link, 0);
3115 /* Critical path is meaningful in block boundaries only. */
3116 if (BLOCK_NUM (next) != BLOCK_NUM (insn))
3117 continue;
3119 next_priority = insn_cost (insn, link, next) + priority (next);
3120 if (next_priority > this_priority)
3121 this_priority = next_priority;
3123 INSN_PRIORITY (insn) = this_priority;
3125 return this_priority;
3129 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
3130 them to the unused_*_list variables, so that they can be reused. */
3132 static void
3133 free_pending_lists ()
3135 int bb;
3137 for (bb = 0; bb < current_nr_blocks; bb++)
3139 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
3140 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
3141 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
3142 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
3146 /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
3147 The MEM is a memory reference contained within INSN, which we are saving
3148 so that we can do memory aliasing on it. */
3150 static void
3151 add_insn_mem_dependence (deps, insn_list, mem_list, insn, mem)
3152 struct deps *deps;
3153 rtx *insn_list, *mem_list, insn, mem;
3155 register rtx link;
3157 link = alloc_INSN_LIST (insn, *insn_list);
3158 *insn_list = link;
3160 link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list);
3161 *mem_list = link;
3163 deps->pending_lists_length++;
3166 /* Make a dependency between every memory reference on the pending lists
3167 and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush
3168 the read list. */
3170 static void
3171 flush_pending_lists (deps, insn, only_write)
3172 struct deps *deps;
3173 rtx insn;
3174 int only_write;
3176 rtx u;
3177 rtx link;
3179 while (deps->pending_read_insns && ! only_write)
3181 add_dependence (insn, XEXP (deps->pending_read_insns, 0),
3182 REG_DEP_ANTI);
3184 link = deps->pending_read_insns;
3185 deps->pending_read_insns = XEXP (deps->pending_read_insns, 1);
3186 free_INSN_LIST_node (link);
3188 link = deps->pending_read_mems;
3189 deps->pending_read_mems = XEXP (deps->pending_read_mems, 1);
3190 free_EXPR_LIST_node (link);
3192 while (deps->pending_write_insns)
3194 add_dependence (insn, XEXP (deps->pending_write_insns, 0),
3195 REG_DEP_ANTI);
3197 link = deps->pending_write_insns;
3198 deps->pending_write_insns = XEXP (deps->pending_write_insns, 1);
3199 free_INSN_LIST_node (link);
3201 link = deps->pending_write_mems;
3202 deps->pending_write_mems = XEXP (deps->pending_write_mems, 1);
3203 free_EXPR_LIST_node (link);
3205 deps->pending_lists_length = 0;
3207 /* last_pending_memory_flush is now a list of insns. */
3208 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3209 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3211 free_INSN_LIST_list (&deps->last_pending_memory_flush);
3212 deps->last_pending_memory_flush = alloc_INSN_LIST (insn, NULL_RTX);
3215 /* Analyze a single SET, CLOBBER, PRE_DEC, POST_DEC, PRE_INC or POST_INC
3216 rtx, X, creating all dependencies generated by the write to the
3217 destination of X, and reads of everything mentioned. */
3219 static void
3220 sched_analyze_1 (deps, x, insn)
3221 struct deps *deps;
3222 rtx x;
3223 rtx insn;
3225 register int regno;
3226 register rtx dest = XEXP (x, 0);
3227 enum rtx_code code = GET_CODE (x);
3229 if (dest == 0)
3230 return;
3232 if (GET_CODE (dest) == PARALLEL
3233 && GET_MODE (dest) == BLKmode)
3235 register int i;
3236 for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
3237 sched_analyze_1 (deps, XVECEXP (dest, 0, i), insn);
3238 if (GET_CODE (x) == SET)
3239 sched_analyze_2 (deps, SET_SRC (x), insn);
3240 return;
3243 while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
3244 || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3246 if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
3248 /* The second and third arguments are values read by this insn. */
3249 sched_analyze_2 (deps, XEXP (dest, 1), insn);
3250 sched_analyze_2 (deps, XEXP (dest, 2), insn);
3252 dest = XEXP (dest, 0);
3255 if (GET_CODE (dest) == REG)
3257 register int i;
3259 regno = REGNO (dest);
3261 /* A hard reg in a wide mode may really be multiple registers.
3262 If so, mark all of them just like the first. */
3263 if (regno < FIRST_PSEUDO_REGISTER)
3265 i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
3266 while (--i >= 0)
3268 int r = regno + i;
3269 rtx u;
3271 for (u = deps->reg_last_uses[r]; u; u = XEXP (u, 1))
3272 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3274 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3275 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3277 /* Clobbers need not be ordered with respect to one
3278 another, but sets must be ordered with respect to a
3279 pending clobber. */
3280 if (code == SET)
3282 free_INSN_LIST_list (&deps->reg_last_uses[r]);
3283 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3284 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3285 SET_REGNO_REG_SET (reg_pending_sets, r);
3287 else
3288 SET_REGNO_REG_SET (reg_pending_clobbers, r);
3290 /* Function calls clobber all call_used regs. */
3291 if (global_regs[r] || (code == SET && call_used_regs[r]))
3292 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3293 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3296 else
3298 rtx u;
3300 for (u = deps->reg_last_uses[regno]; u; u = XEXP (u, 1))
3301 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3303 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3304 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3306 if (code == SET)
3308 free_INSN_LIST_list (&deps->reg_last_uses[regno]);
3309 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3310 add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
3311 SET_REGNO_REG_SET (reg_pending_sets, regno);
3313 else
3314 SET_REGNO_REG_SET (reg_pending_clobbers, regno);
3316 /* Pseudos that are REG_EQUIV to something may be replaced
3317 by that during reloading. We need only add dependencies for
3318 the address in the REG_EQUIV note. */
3319 if (!reload_completed
3320 && reg_known_equiv_p[regno]
3321 && GET_CODE (reg_known_value[regno]) == MEM)
3322 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3324 /* Don't let it cross a call after scheduling if it doesn't
3325 already cross one. */
3327 if (REG_N_CALLS_CROSSED (regno) == 0)
3328 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3329 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3332 else if (GET_CODE (dest) == MEM)
3334 /* Writing memory. */
3336 if (deps->pending_lists_length > 32)
3338 /* Flush all pending reads and writes to prevent the pending lists
3339 from getting any larger. Insn scheduling runs too slowly when
3340 these lists get long. The number 32 was chosen because it
3341 seems like a reasonable number. When compiling GCC with itself,
3342 this flush occurs 8 times for sparc, and 10 times for m88k using
3343 the number 32. */
3344 flush_pending_lists (deps, insn, 0);
3346 else
3348 rtx u;
3349 rtx pending, pending_mem;
3351 pending = deps->pending_read_insns;
3352 pending_mem = deps->pending_read_mems;
3353 while (pending)
3355 if (anti_dependence (XEXP (pending_mem, 0), dest))
3356 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3358 pending = XEXP (pending, 1);
3359 pending_mem = XEXP (pending_mem, 1);
3362 pending = deps->pending_write_insns;
3363 pending_mem = deps->pending_write_mems;
3364 while (pending)
3366 if (output_dependence (XEXP (pending_mem, 0), dest))
3367 add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
3369 pending = XEXP (pending, 1);
3370 pending_mem = XEXP (pending_mem, 1);
3373 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3374 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3376 add_insn_mem_dependence (deps, &deps->pending_write_insns,
3377 &deps->pending_write_mems, insn, dest);
3379 sched_analyze_2 (deps, XEXP (dest, 0), insn);
3382 /* Analyze reads. */
3383 if (GET_CODE (x) == SET)
3384 sched_analyze_2 (deps, SET_SRC (x), insn);
3387 /* Analyze the uses of memory and registers in rtx X in INSN. */
3389 static void
3390 sched_analyze_2 (deps, x, insn)
3391 struct deps *deps;
3392 rtx x;
3393 rtx insn;
3395 register int i;
3396 register int j;
3397 register enum rtx_code code;
3398 register const char *fmt;
3400 if (x == 0)
3401 return;
3403 code = GET_CODE (x);
3405 switch (code)
3407 case CONST_INT:
3408 case CONST_DOUBLE:
3409 case SYMBOL_REF:
3410 case CONST:
3411 case LABEL_REF:
3412 /* Ignore constants. Note that we must handle CONST_DOUBLE here
3413 because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
3414 this does not mean that this insn is using cc0. */
3415 return;
3417 #ifdef HAVE_cc0
3418 case CC0:
3420 rtx link, prev;
3422 /* User of CC0 depends on immediately preceding insn. */
3423 SCHED_GROUP_P (insn) = 1;
3425 /* There may be a note before this insn now, but all notes will
3426 be removed before we actually try to schedule the insns, so
3427 it won't cause a problem later. We must avoid it here though. */
3428 prev = prev_nonnote_insn (insn);
3430 /* Make a copy of all dependencies on the immediately previous insn,
3431 and add to this insn. This is so that all the dependencies will
3432 apply to the group. Remove an explicit dependence on this insn
3433 as SCHED_GROUP_P now represents it. */
3435 if (find_insn_list (prev, LOG_LINKS (insn)))
3436 remove_dependence (insn, prev);
3438 for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
3439 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3441 return;
3443 #endif
3445 case REG:
3447 rtx u;
3448 int regno = REGNO (x);
3449 if (regno < FIRST_PSEUDO_REGISTER)
3451 int i;
3453 i = HARD_REGNO_NREGS (regno, GET_MODE (x));
3454 while (--i >= 0)
3456 int r = regno + i;
3457 deps->reg_last_uses[r]
3458 = alloc_INSN_LIST (insn, deps->reg_last_uses[r]);
3460 for (u = deps->reg_last_sets[r]; u; u = XEXP (u, 1))
3461 add_dependence (insn, XEXP (u, 0), 0);
3463 /* ??? This should never happen. */
3464 for (u = deps->reg_last_clobbers[r]; u; u = XEXP (u, 1))
3465 add_dependence (insn, XEXP (u, 0), 0);
3467 if (call_used_regs[r] || global_regs[r])
3468 /* Function calls clobber all call_used regs. */
3469 for (u = deps->last_function_call; u; u = XEXP (u, 1))
3470 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3473 else
3475 deps->reg_last_uses[regno]
3476 = alloc_INSN_LIST (insn, deps->reg_last_uses[regno]);
3478 for (u = deps->reg_last_sets[regno]; u; u = XEXP (u, 1))
3479 add_dependence (insn, XEXP (u, 0), 0);
3481 /* ??? This should never happen. */
3482 for (u = deps->reg_last_clobbers[regno]; u; u = XEXP (u, 1))
3483 add_dependence (insn, XEXP (u, 0), 0);
3485 /* Pseudos that are REG_EQUIV to something may be replaced
3486 by that during reloading. We need only add dependencies for
3487 the address in the REG_EQUIV note. */
3488 if (!reload_completed
3489 && reg_known_equiv_p[regno]
3490 && GET_CODE (reg_known_value[regno]) == MEM)
3491 sched_analyze_2 (deps, XEXP (reg_known_value[regno], 0), insn);
3493 /* If the register does not already cross any calls, then add this
3494 insn to the sched_before_next_call list so that it will still
3495 not cross calls after scheduling. */
3496 if (REG_N_CALLS_CROSSED (regno) == 0)
3497 add_dependence (deps->sched_before_next_call, insn,
3498 REG_DEP_ANTI);
3500 return;
3503 case MEM:
3505 /* Reading memory. */
3506 rtx u;
3507 rtx pending, pending_mem;
3509 pending = deps->pending_read_insns;
3510 pending_mem = deps->pending_read_mems;
3511 while (pending)
3513 if (read_dependence (XEXP (pending_mem, 0), x))
3514 add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
3516 pending = XEXP (pending, 1);
3517 pending_mem = XEXP (pending_mem, 1);
3520 pending = deps->pending_write_insns;
3521 pending_mem = deps->pending_write_mems;
3522 while (pending)
3524 if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
3525 x, rtx_varies_p))
3526 add_dependence (insn, XEXP (pending, 0), 0);
3528 pending = XEXP (pending, 1);
3529 pending_mem = XEXP (pending_mem, 1);
3532 for (u = deps->last_pending_memory_flush; u; u = XEXP (u, 1))
3533 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3535 /* Always add these dependencies to pending_reads, since
3536 this insn may be followed by a write. */
3537 add_insn_mem_dependence (deps, &deps->pending_read_insns,
3538 &deps->pending_read_mems, insn, x);
3540 /* Take advantage of tail recursion here. */
3541 sched_analyze_2 (deps, XEXP (x, 0), insn);
3542 return;
3545 /* Force pending stores to memory in case a trap handler needs them. */
3546 case TRAP_IF:
3547 flush_pending_lists (deps, insn, 1);
3548 break;
3550 case ASM_OPERANDS:
3551 case ASM_INPUT:
3552 case UNSPEC_VOLATILE:
3554 rtx u;
3556 /* Traditional and volatile asm instructions must be considered to use
3557 and clobber all hard registers, all pseudo-registers and all of
3558 memory. So must TRAP_IF and UNSPEC_VOLATILE operations.
3560 Consider for instance a volatile asm that changes the fpu rounding
3561 mode. An insn should not be moved across this even if it only uses
3562 pseudo-regs because it might give an incorrectly rounded result. */
3563 if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
3565 int max_reg = max_reg_num ();
3566 for (i = 0; i < max_reg; i++)
3568 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3569 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3570 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3572 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3573 add_dependence (insn, XEXP (u, 0), 0);
3575 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3576 add_dependence (insn, XEXP (u, 0), 0);
3578 reg_pending_sets_all = 1;
3580 flush_pending_lists (deps, insn, 0);
3583 /* For all ASM_OPERANDS, we must traverse the vector of input operands.
3584 We can not just fall through here since then we would be confused
3585 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
3586 traditional asms unlike their normal usage. */
3588 if (code == ASM_OPERANDS)
3590 for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
3591 sched_analyze_2 (deps, ASM_OPERANDS_INPUT (x, j), insn);
3592 return;
3594 break;
3597 case PRE_DEC:
3598 case POST_DEC:
3599 case PRE_INC:
3600 case POST_INC:
3601 /* These both read and modify the result. We must handle them as writes
3602 to get proper dependencies for following instructions. We must handle
3603 them as reads to get proper dependencies from this to previous
3604 instructions. Thus we need to pass them to both sched_analyze_1
3605 and sched_analyze_2. We must call sched_analyze_2 first in order
3606 to get the proper antecedent for the read. */
3607 sched_analyze_2 (deps, XEXP (x, 0), insn);
3608 sched_analyze_1 (deps, x, insn);
3609 return;
3611 default:
3612 break;
3615 /* Other cases: walk the insn. */
3616 fmt = GET_RTX_FORMAT (code);
3617 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3619 if (fmt[i] == 'e')
3620 sched_analyze_2 (deps, XEXP (x, i), insn);
3621 else if (fmt[i] == 'E')
3622 for (j = 0; j < XVECLEN (x, i); j++)
3623 sched_analyze_2 (deps, XVECEXP (x, i, j), insn);
3627 /* Analyze an INSN with pattern X to find all dependencies. */
3629 static void
3630 sched_analyze_insn (deps, x, insn, loop_notes)
3631 struct deps *deps;
3632 rtx x, insn;
3633 rtx loop_notes;
3635 register RTX_CODE code = GET_CODE (x);
3636 rtx link;
3637 int maxreg = max_reg_num ();
3638 int i;
3640 if (code == SET || code == CLOBBER)
3641 sched_analyze_1 (deps, x, insn);
3642 else if (code == PARALLEL)
3644 register int i;
3645 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
3647 code = GET_CODE (XVECEXP (x, 0, i));
3648 if (code == SET || code == CLOBBER)
3649 sched_analyze_1 (deps, XVECEXP (x, 0, i), insn);
3650 else
3651 sched_analyze_2 (deps, XVECEXP (x, 0, i), insn);
3654 else
3655 sched_analyze_2 (deps, x, insn);
3657 /* Mark registers CLOBBERED or used by called function. */
3658 if (GET_CODE (insn) == CALL_INSN)
3659 for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
3661 if (GET_CODE (XEXP (link, 0)) == CLOBBER)
3662 sched_analyze_1 (deps, XEXP (link, 0), insn);
3663 else
3664 sched_analyze_2 (deps, XEXP (link, 0), insn);
3667 /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic
3668 block, then we must be sure that no instructions are scheduled across it.
3669 Otherwise, the reg_n_refs info (which depends on loop_depth) would
3670 become incorrect. */
3672 if (loop_notes)
3674 int max_reg = max_reg_num ();
3675 int schedule_barrier_found = 0;
3676 rtx link;
3678 /* Update loop_notes with any notes from this insn. Also determine
3679 if any of the notes on the list correspond to instruction scheduling
3680 barriers (loop, eh & setjmp notes, but not range notes. */
3681 link = loop_notes;
3682 while (XEXP (link, 1))
3684 if (INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_BEG
3685 || INTVAL (XEXP (link, 0)) == NOTE_INSN_LOOP_END
3686 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_BEG
3687 || INTVAL (XEXP (link, 0)) == NOTE_INSN_EH_REGION_END
3688 || INTVAL (XEXP (link, 0)) == NOTE_INSN_SETJMP)
3689 schedule_barrier_found = 1;
3691 link = XEXP (link, 1);
3693 XEXP (link, 1) = REG_NOTES (insn);
3694 REG_NOTES (insn) = loop_notes;
3696 /* Add dependencies if a scheduling barrier was found. */
3697 if (schedule_barrier_found)
3699 for (i = 0; i < max_reg; i++)
3701 rtx u;
3702 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3703 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3704 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3706 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3707 add_dependence (insn, XEXP (u, 0), 0);
3709 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3710 add_dependence (insn, XEXP (u, 0), 0);
3712 reg_pending_sets_all = 1;
3714 flush_pending_lists (deps, insn, 0);
3719 /* Accumulate clobbers until the next set so that it will be output dependent
3720 on all of them. At the next set we can clear the clobber list, since
3721 subsequent sets will be output dependent on it. */
3722 EXECUTE_IF_SET_IN_REG_SET
3723 (reg_pending_sets, 0, i,
3725 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3726 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3727 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3729 EXECUTE_IF_SET_IN_REG_SET
3730 (reg_pending_clobbers, 0, i,
3732 deps->reg_last_clobbers[i]
3733 = alloc_INSN_LIST (insn, deps->reg_last_clobbers[i]);
3735 CLEAR_REG_SET (reg_pending_sets);
3736 CLEAR_REG_SET (reg_pending_clobbers);
3738 if (reg_pending_sets_all)
3740 for (i = 0; i < maxreg; i++)
3742 free_INSN_LIST_list (&deps->reg_last_sets[i]);
3743 free_INSN_LIST_list (&deps->reg_last_clobbers[i]);
3744 deps->reg_last_sets[i] = alloc_INSN_LIST (insn, NULL_RTX);
3747 reg_pending_sets_all = 0;
3750 /* Handle function calls and function returns created by the epilogue
3751 threading code. */
3752 if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
3754 rtx dep_insn;
3755 rtx prev_dep_insn;
3757 /* When scheduling instructions, we make sure calls don't lose their
3758 accompanying USE insns by depending them one on another in order.
3760 Also, we must do the same thing for returns created by the epilogue
3761 threading code. Note this code works only in this special case,
3762 because other passes make no guarantee that they will never emit
3763 an instruction between a USE and a RETURN. There is such a guarantee
3764 for USE instructions immediately before a call. */
3766 prev_dep_insn = insn;
3767 dep_insn = PREV_INSN (insn);
3768 while (GET_CODE (dep_insn) == INSN
3769 && GET_CODE (PATTERN (dep_insn)) == USE
3770 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG)
3772 SCHED_GROUP_P (prev_dep_insn) = 1;
3774 /* Make a copy of all dependencies on dep_insn, and add to insn.
3775 This is so that all of the dependencies will apply to the
3776 group. */
3778 for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1))
3779 add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
3781 prev_dep_insn = dep_insn;
3782 dep_insn = PREV_INSN (dep_insn);
3787 /* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
3788 for every dependency. */
3790 static void
3791 sched_analyze (deps, head, tail)
3792 struct deps *deps;
3793 rtx head, tail;
3795 register rtx insn;
3796 register rtx u;
3797 rtx loop_notes = 0;
3799 for (insn = head;; insn = NEXT_INSN (insn))
3801 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
3803 /* Clear out the stale LOG_LINKS from flow. */
3804 free_INSN_LIST_list (&LOG_LINKS (insn));
3806 /* Make each JUMP_INSN a scheduling barrier for memory
3807 references. */
3808 if (GET_CODE (insn) == JUMP_INSN)
3809 deps->last_pending_memory_flush
3810 = alloc_INSN_LIST (insn, deps->last_pending_memory_flush);
3811 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3812 loop_notes = 0;
3814 else if (GET_CODE (insn) == CALL_INSN)
3816 rtx x;
3817 register int i;
3819 CANT_MOVE (insn) = 1;
3821 /* Clear out the stale LOG_LINKS from flow. */
3822 free_INSN_LIST_list (&LOG_LINKS (insn));
3824 /* Any instruction using a hard register which may get clobbered
3825 by a call needs to be marked as dependent on this call.
3826 This prevents a use of a hard return reg from being moved
3827 past a void call (i.e. it does not explicitly set the hard
3828 return reg). */
3830 /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
3831 all registers, not just hard registers, may be clobbered by this
3832 call. */
3834 /* Insn, being a CALL_INSN, magically depends on
3835 `last_function_call' already. */
3837 if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
3838 && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
3840 int max_reg = max_reg_num ();
3841 for (i = 0; i < max_reg; i++)
3843 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3844 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3845 free_INSN_LIST_list (&deps->reg_last_uses[i]);
3847 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3848 add_dependence (insn, XEXP (u, 0), 0);
3850 for (u = deps->reg_last_clobbers[i]; u; u = XEXP (u, 1))
3851 add_dependence (insn, XEXP (u, 0), 0);
3853 reg_pending_sets_all = 1;
3855 /* Add a pair of REG_SAVE_NOTEs which we will later
3856 convert back into a NOTE_INSN_SETJMP note. See
3857 reemit_notes for why we use a pair of NOTEs. */
3858 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3859 GEN_INT (0),
3860 REG_NOTES (insn));
3861 REG_NOTES (insn) = alloc_EXPR_LIST (REG_SAVE_NOTE,
3862 GEN_INT (NOTE_INSN_SETJMP),
3863 REG_NOTES (insn));
3865 else
3867 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3868 if (call_used_regs[i] || global_regs[i])
3870 for (u = deps->reg_last_uses[i]; u; u = XEXP (u, 1))
3871 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3873 for (u = deps->reg_last_sets[i]; u; u = XEXP (u, 1))
3874 add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
3876 SET_REGNO_REG_SET (reg_pending_clobbers, i);
3880 /* For each insn which shouldn't cross a call, add a dependence
3881 between that insn and this call insn. */
3882 x = LOG_LINKS (deps->sched_before_next_call);
3883 while (x)
3885 add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
3886 x = XEXP (x, 1);
3888 free_INSN_LIST_list (&LOG_LINKS (deps->sched_before_next_call));
3890 sched_analyze_insn (deps, PATTERN (insn), insn, loop_notes);
3891 loop_notes = 0;
3893 /* In the absence of interprocedural alias analysis, we must flush
3894 all pending reads and writes, and start new dependencies starting
3895 from here. But only flush writes for constant calls (which may
3896 be passed a pointer to something we haven't written yet). */
3897 flush_pending_lists (deps, insn, CONST_CALL_P (insn));
3899 /* Depend this function call (actually, the user of this
3900 function call) on all hard register clobberage. */
3902 /* last_function_call is now a list of insns. */
3903 free_INSN_LIST_list (&deps->last_function_call);
3904 deps->last_function_call = alloc_INSN_LIST (insn, NULL_RTX);
3907 /* See comments on reemit_notes as to why we do this.
3908 ??? Actually, the reemit_notes just say what is done, not why. */
3910 else if (GET_CODE (insn) == NOTE
3911 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_START
3912 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_RANGE_END))
3914 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE, NOTE_RANGE_INFO (insn),
3915 loop_notes);
3916 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3917 GEN_INT (NOTE_LINE_NUMBER (insn)),
3918 loop_notes);
3920 else if (GET_CODE (insn) == NOTE
3921 && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
3922 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
3923 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3924 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
3925 || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
3926 && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
3928 rtx rtx_region;
3930 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
3931 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
3932 rtx_region = GEN_INT (NOTE_EH_HANDLER (insn));
3933 else
3934 rtx_region = GEN_INT (0);
3936 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3937 rtx_region,
3938 loop_notes);
3939 loop_notes = alloc_EXPR_LIST (REG_SAVE_NOTE,
3940 GEN_INT (NOTE_LINE_NUMBER (insn)),
3941 loop_notes);
3942 CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
3945 if (insn == tail)
3946 return;
3948 abort ();
3951 /* Macros and functions for keeping the priority queue sorted, and
3952 dealing with queueing and dequeueing of instructions. */
3954 #define SCHED_SORT(READY, N_READY) \
3955 do { if ((N_READY) == 2) \
3956 swap_sort (READY, N_READY); \
3957 else if ((N_READY) > 2) \
3958 qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); } \
3959 while (0)
3961 /* Returns a positive value if x is preferred; returns a negative value if
3962 y is preferred. Should never return 0, since that will make the sort
3963 unstable. */
3965 static int
3966 rank_for_schedule (x, y)
3967 const PTR x;
3968 const PTR y;
3970 rtx tmp = *(rtx *)y;
3971 rtx tmp2 = *(rtx *)x;
3972 rtx link;
3973 int tmp_class, tmp2_class, depend_count1, depend_count2;
3974 int val, priority_val, spec_val, prob_val, weight_val;
3977 /* Prefer insn with higher priority. */
3978 priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
3979 if (priority_val)
3980 return priority_val;
3982 /* Prefer an insn with smaller contribution to registers-pressure. */
3983 if (!reload_completed &&
3984 (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
3985 return (weight_val);
3987 /* Some comparison make sense in interblock scheduling only. */
3988 if (INSN_BB (tmp) != INSN_BB (tmp2))
3990 /* Prefer an inblock motion on an interblock motion. */
3991 if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
3992 return 1;
3993 if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
3994 return -1;
3996 /* Prefer a useful motion on a speculative one. */
3997 if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
3998 return (spec_val);
4000 /* Prefer a more probable (speculative) insn. */
4001 prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
4002 if (prob_val)
4003 return (prob_val);
4006 /* Compare insns based on their relation to the last-scheduled-insn. */
4007 if (last_scheduled_insn)
4009 /* Classify the instructions into three classes:
4010 1) Data dependent on last schedule insn.
4011 2) Anti/Output dependent on last scheduled insn.
4012 3) Independent of last scheduled insn, or has latency of one.
4013 Choose the insn from the highest numbered class if different. */
4014 link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
4015 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
4016 tmp_class = 3;
4017 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4018 tmp_class = 1;
4019 else
4020 tmp_class = 2;
4022 link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
4023 if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
4024 tmp2_class = 3;
4025 else if (REG_NOTE_KIND (link) == 0) /* Data dependence. */
4026 tmp2_class = 1;
4027 else
4028 tmp2_class = 2;
4030 if ((val = tmp2_class - tmp_class))
4031 return val;
4034 /* Prefer the insn which has more later insns that depend on it.
4035 This gives the scheduler more freedom when scheduling later
4036 instructions at the expense of added register pressure. */
4037 depend_count1 = 0;
4038 for (link = INSN_DEPEND (tmp); link; link = XEXP (link, 1))
4039 depend_count1++;
4041 depend_count2 = 0;
4042 for (link = INSN_DEPEND (tmp2); link; link = XEXP (link, 1))
4043 depend_count2++;
4045 val = depend_count2 - depend_count1;
4046 if (val)
4047 return val;
4049 /* If insns are equally good, sort by INSN_LUID (original insn order),
4050 so that we make the sort stable. This minimizes instruction movement,
4051 thus minimizing sched's effect on debugging and cross-jumping. */
4052 return INSN_LUID (tmp) - INSN_LUID (tmp2);
4055 /* Resort the array A in which only element at index N may be out of order. */
4057 HAIFA_INLINE static void
4058 swap_sort (a, n)
4059 rtx *a;
4060 int n;
4062 rtx insn = a[n - 1];
4063 int i = n - 2;
4065 while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
4067 a[i + 1] = a[i];
4068 i -= 1;
4070 a[i + 1] = insn;
4073 static int max_priority;
4075 /* Add INSN to the insn queue so that it can be executed at least
4076 N_CYCLES after the currently executing insn. Preserve insns
4077 chain for debugging purposes. */
4079 HAIFA_INLINE static void
4080 queue_insn (insn, n_cycles)
4081 rtx insn;
4082 int n_cycles;
4084 int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
4085 rtx link = alloc_INSN_LIST (insn, insn_queue[next_q]);
4086 insn_queue[next_q] = link;
4087 q_size += 1;
4089 if (sched_verbose >= 2)
4091 fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
4093 if (INSN_BB (insn) != target_bb)
4094 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4096 fprintf (dump, "queued for %d cycles.\n", n_cycles);
4101 /* PREV is an insn that is ready to execute. Adjust its priority if that
4102 will help shorten or lengthen register lifetimes as appropriate. Also
4103 provide a hook for the target to tweek itself. */
4105 HAIFA_INLINE static void
4106 adjust_priority (prev)
4107 rtx prev ATTRIBUTE_UNUSED;
4109 /* ??? There used to be code here to try and estimate how an insn
4110 affected register lifetimes, but it did it by looking at REG_DEAD
4111 notes, which we removed in schedule_region. Nor did it try to
4112 take into account register pressure or anything useful like that.
4114 Revisit when we have a machine model to work with and not before. */
4116 #ifdef ADJUST_PRIORITY
4117 ADJUST_PRIORITY (prev);
4118 #endif
4121 /* Clock at which the previous instruction was issued. */
4122 static int last_clock_var;
4124 /* INSN is the "currently executing insn". Launch each insn which was
4125 waiting on INSN. READY is a vector of insns which are ready to fire.
4126 N_READY is the number of elements in READY. CLOCK is the current
4127 cycle. */
4129 static int
4130 schedule_insn (insn, ready, n_ready, clock)
4131 rtx insn;
4132 rtx *ready;
4133 int n_ready;
4134 int clock;
4136 rtx link;
4137 int unit;
4139 unit = insn_unit (insn);
4141 if (sched_verbose >= 2)
4143 fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ",
4144 INSN_UID (insn));
4145 insn_print_units (insn);
4146 fprintf (dump, "\n");
4149 if (sched_verbose && unit == -1)
4150 visualize_no_unit (insn);
4152 if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
4153 schedule_unit (unit, insn, clock);
4155 if (INSN_DEPEND (insn) == 0)
4156 return n_ready;
4158 /* This is used by the function adjust_priority above. */
4159 if (n_ready > 0)
4160 max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
4161 else
4162 max_priority = INSN_PRIORITY (insn);
4164 for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
4166 rtx next = XEXP (link, 0);
4167 int cost = insn_cost (insn, link, next);
4169 INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
4171 if ((INSN_DEP_COUNT (next) -= 1) == 0)
4173 int effective_cost = INSN_TICK (next) - clock;
4175 /* For speculative insns, before inserting to ready/queue,
4176 check live, exception-free, and issue-delay. */
4177 if (INSN_BB (next) != target_bb
4178 && (!IS_VALID (INSN_BB (next))
4179 || CANT_MOVE (next)
4180 || (IS_SPECULATIVE_INSN (next)
4181 && (insn_issue_delay (next) > 3
4182 || !check_live (next, INSN_BB (next))
4183 || !is_exception_free (next, INSN_BB (next), target_bb)))))
4184 continue;
4186 if (sched_verbose >= 2)
4188 fprintf (dump, ";;\t\tdependences resolved: insn %d ",
4189 INSN_UID (next));
4191 if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
4192 fprintf (dump, "/b%d ", BLOCK_NUM (next));
4194 if (effective_cost < 1)
4195 fprintf (dump, "into ready\n");
4196 else
4197 fprintf (dump, "into queue with cost=%d\n", effective_cost);
4200 /* Adjust the priority of NEXT and either put it on the ready
4201 list or queue it. */
4202 adjust_priority (next);
4203 if (effective_cost < 1)
4204 ready[n_ready++] = next;
4205 else
4206 queue_insn (next, effective_cost);
4210 /* Annotate the instruction with issue information -- TImode
4211 indicates that the instruction is expected not to be able
4212 to issue on the same cycle as the previous insn. A machine
4213 may use this information to decide how the instruction should
4214 be aligned. */
4215 if (reload_completed && issue_rate > 1)
4217 PUT_MODE (insn, clock > last_clock_var ? TImode : VOIDmode);
4218 last_clock_var = clock;
4221 return n_ready;
4224 /* Functions for handling of notes. */
4226 /* Delete notes beginning with INSN and put them in the chain
4227 of notes ended by NOTE_LIST.
4228 Returns the insn following the notes. */
4230 static rtx
4231 unlink_other_notes (insn, tail)
4232 rtx insn, tail;
4234 rtx prev = PREV_INSN (insn);
4236 while (insn != tail && GET_CODE (insn) == NOTE)
4238 rtx next = NEXT_INSN (insn);
4239 /* Delete the note from its current position. */
4240 if (prev)
4241 NEXT_INSN (prev) = next;
4242 if (next)
4243 PREV_INSN (next) = prev;
4245 /* See sched_analyze to see how these are handled. */
4246 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
4247 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
4248 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
4249 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_START
4250 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_RANGE_END
4251 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
4252 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
4254 /* Insert the note at the end of the notes list. */
4255 PREV_INSN (insn) = note_list;
4256 if (note_list)
4257 NEXT_INSN (note_list) = insn;
4258 note_list = insn;
4261 insn = next;
4263 return insn;
4266 /* Delete line notes beginning with INSN. Record line-number notes so
4267 they can be reused. Returns the insn following the notes. */
4269 static rtx
4270 unlink_line_notes (insn, tail)
4271 rtx insn, tail;
4273 rtx prev = PREV_INSN (insn);
4275 while (insn != tail && GET_CODE (insn) == NOTE)
4277 rtx next = NEXT_INSN (insn);
4279 if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
4281 /* Delete the note from its current position. */
4282 if (prev)
4283 NEXT_INSN (prev) = next;
4284 if (next)
4285 PREV_INSN (next) = prev;
4287 /* Record line-number notes so they can be reused. */
4288 LINE_NOTE (insn) = insn;
4290 else
4291 prev = insn;
4293 insn = next;
4295 return insn;
4298 /* Return the head and tail pointers of BB. */
4300 HAIFA_INLINE static void
4301 get_block_head_tail (b, headp, tailp)
4302 int b;
4303 rtx *headp;
4304 rtx *tailp;
4307 rtx head;
4308 rtx tail;
4310 /* HEAD and TAIL delimit the basic block being scheduled. */
4311 head = BLOCK_HEAD (b);
4312 tail = BLOCK_END (b);
4314 /* Don't include any notes or labels at the beginning of the
4315 basic block, or notes at the ends of basic blocks. */
4316 while (head != tail)
4318 if (GET_CODE (head) == NOTE)
4319 head = NEXT_INSN (head);
4320 else if (GET_CODE (tail) == NOTE)
4321 tail = PREV_INSN (tail);
4322 else if (GET_CODE (head) == CODE_LABEL)
4323 head = NEXT_INSN (head);
4324 else
4325 break;
4328 *headp = head;
4329 *tailp = tail;
4332 HAIFA_INLINE static void
4333 get_bb_head_tail (bb, headp, tailp)
4334 int bb;
4335 rtx *headp;
4336 rtx *tailp;
4338 get_block_head_tail (BB_TO_BLOCK (bb), headp, tailp);
4341 /* Delete line notes from bb. Save them so they can be later restored
4342 (in restore_line_notes ()). */
4344 static void
4345 rm_line_notes (bb)
4346 int bb;
4348 rtx next_tail;
4349 rtx tail;
4350 rtx head;
4351 rtx insn;
4353 get_bb_head_tail (bb, &head, &tail);
4355 if (head == tail
4356 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4357 return;
4359 next_tail = NEXT_INSN (tail);
4360 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4362 rtx prev;
4364 /* Farm out notes, and maybe save them in NOTE_LIST.
4365 This is needed to keep the debugger from
4366 getting completely deranged. */
4367 if (GET_CODE (insn) == NOTE)
4369 prev = insn;
4370 insn = unlink_line_notes (insn, next_tail);
4372 if (prev == tail)
4373 abort ();
4374 if (prev == head)
4375 abort ();
4376 if (insn == next_tail)
4377 abort ();
4382 /* Save line number notes for each insn in bb. */
4384 static void
4385 save_line_notes (bb)
4386 int bb;
4388 rtx head, tail;
4389 rtx next_tail;
4391 /* We must use the true line number for the first insn in the block
4392 that was computed and saved at the start of this pass. We can't
4393 use the current line number, because scheduling of the previous
4394 block may have changed the current line number. */
4396 rtx line = line_note_head[BB_TO_BLOCK (bb)];
4397 rtx insn;
4399 get_bb_head_tail (bb, &head, &tail);
4400 next_tail = NEXT_INSN (tail);
4402 for (insn = BLOCK_HEAD (BB_TO_BLOCK (bb));
4403 insn != next_tail;
4404 insn = NEXT_INSN (insn))
4405 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4406 line = insn;
4407 else
4408 LINE_NOTE (insn) = line;
4412 /* After bb was scheduled, insert line notes into the insns list. */
4414 static void
4415 restore_line_notes (bb)
4416 int bb;
4418 rtx line, note, prev, new;
4419 int added_notes = 0;
4420 int b;
4421 rtx head, next_tail, insn;
4423 b = BB_TO_BLOCK (bb);
4425 head = BLOCK_HEAD (b);
4426 next_tail = NEXT_INSN (BLOCK_END (b));
4428 /* Determine the current line-number. We want to know the current
4429 line number of the first insn of the block here, in case it is
4430 different from the true line number that was saved earlier. If
4431 different, then we need a line number note before the first insn
4432 of this block. If it happens to be the same, then we don't want to
4433 emit another line number note here. */
4434 for (line = head; line; line = PREV_INSN (line))
4435 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
4436 break;
4438 /* Walk the insns keeping track of the current line-number and inserting
4439 the line-number notes as needed. */
4440 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4441 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4442 line = insn;
4443 /* This used to emit line number notes before every non-deleted note.
4444 However, this confuses a debugger, because line notes not separated
4445 by real instructions all end up at the same address. I can find no
4446 use for line number notes before other notes, so none are emitted. */
4447 else if (GET_CODE (insn) != NOTE
4448 && (note = LINE_NOTE (insn)) != 0
4449 && note != line
4450 && (line == 0
4451 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
4452 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
4454 line = note;
4455 prev = PREV_INSN (insn);
4456 if (LINE_NOTE (note))
4458 /* Re-use the original line-number note. */
4459 LINE_NOTE (note) = 0;
4460 PREV_INSN (note) = prev;
4461 NEXT_INSN (prev) = note;
4462 PREV_INSN (insn) = note;
4463 NEXT_INSN (note) = insn;
4465 else
4467 added_notes++;
4468 new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
4469 NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
4470 RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
4473 if (sched_verbose && added_notes)
4474 fprintf (dump, ";; added %d line-number notes\n", added_notes);
4477 /* After scheduling the function, delete redundant line notes from the
4478 insns list. */
4480 static void
4481 rm_redundant_line_notes ()
4483 rtx line = 0;
4484 rtx insn = get_insns ();
4485 int active_insn = 0;
4486 int notes = 0;
4488 /* Walk the insns deleting redundant line-number notes. Many of these
4489 are already present. The remainder tend to occur at basic
4490 block boundaries. */
4491 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4492 if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
4494 /* If there are no active insns following, INSN is redundant. */
4495 if (active_insn == 0)
4497 notes++;
4498 NOTE_SOURCE_FILE (insn) = 0;
4499 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4501 /* If the line number is unchanged, LINE is redundant. */
4502 else if (line
4503 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
4504 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
4506 notes++;
4507 NOTE_SOURCE_FILE (line) = 0;
4508 NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
4509 line = insn;
4511 else
4512 line = insn;
4513 active_insn = 0;
4515 else if (!((GET_CODE (insn) == NOTE
4516 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
4517 || (GET_CODE (insn) == INSN
4518 && (GET_CODE (PATTERN (insn)) == USE
4519 || GET_CODE (PATTERN (insn)) == CLOBBER))))
4520 active_insn++;
4522 if (sched_verbose && notes)
4523 fprintf (dump, ";; deleted %d line-number notes\n", notes);
4526 /* Delete notes between head and tail and put them in the chain
4527 of notes ended by NOTE_LIST. */
4529 static void
4530 rm_other_notes (head, tail)
4531 rtx head;
4532 rtx tail;
4534 rtx next_tail;
4535 rtx insn;
4537 if (head == tail
4538 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
4539 return;
4541 next_tail = NEXT_INSN (tail);
4542 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4544 rtx prev;
4546 /* Farm out notes, and maybe save them in NOTE_LIST.
4547 This is needed to keep the debugger from
4548 getting completely deranged. */
4549 if (GET_CODE (insn) == NOTE)
4551 prev = insn;
4553 insn = unlink_other_notes (insn, next_tail);
4555 if (prev == tail)
4556 abort ();
4557 if (prev == head)
4558 abort ();
4559 if (insn == next_tail)
4560 abort ();
4565 /* Functions for computation of registers live/usage info. */
4567 /* Calculate INSN_REG_WEIGHT for all insns of a block. */
4569 static void
4570 find_insn_reg_weight (b)
4571 int b;
4573 rtx insn, next_tail, head, tail;
4575 get_block_head_tail (b, &head, &tail);
4576 next_tail = NEXT_INSN (tail);
4578 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
4580 int reg_weight = 0;
4581 rtx x;
4583 /* Handle register life information. */
4584 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4585 continue;
4587 /* Increment weight for each register born here. */
4588 x = PATTERN (insn);
4589 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4590 && register_operand (SET_DEST (x), VOIDmode))
4591 reg_weight++;
4592 else if (GET_CODE (x) == PARALLEL)
4594 int j;
4595 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
4597 x = XVECEXP (PATTERN (insn), 0, j);
4598 if ((GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
4599 && register_operand (SET_DEST (x), VOIDmode))
4600 reg_weight++;
4604 /* Decrement weight for each register that dies here. */
4605 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
4607 if (REG_NOTE_KIND (x) == REG_DEAD
4608 || REG_NOTE_KIND (x) == REG_UNUSED)
4609 reg_weight--;
4612 INSN_REG_WEIGHT (insn) = reg_weight;
4616 /* Scheduling clock, modified in schedule_block() and queue_to_ready (). */
4617 static int clock_var;
4619 /* Move insns that became ready to fire from queue to ready list. */
4621 static int
4622 queue_to_ready (ready, n_ready)
4623 rtx ready[];
4624 int n_ready;
4626 rtx insn;
4627 rtx link;
4629 q_ptr = NEXT_Q (q_ptr);
4631 /* Add all pending insns that can be scheduled without stalls to the
4632 ready list. */
4633 for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
4636 insn = XEXP (link, 0);
4637 q_size -= 1;
4639 if (sched_verbose >= 2)
4640 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4642 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4643 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4645 ready[n_ready++] = insn;
4646 if (sched_verbose >= 2)
4647 fprintf (dump, "moving to ready without stalls\n");
4649 insn_queue[q_ptr] = 0;
4651 /* If there are no ready insns, stall until one is ready and add all
4652 of the pending insns at that point to the ready list. */
4653 if (n_ready == 0)
4655 register int stalls;
4657 for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
4659 if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
4661 for (; link; link = XEXP (link, 1))
4663 insn = XEXP (link, 0);
4664 q_size -= 1;
4666 if (sched_verbose >= 2)
4667 fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
4669 if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
4670 fprintf (dump, "(b%d) ", BLOCK_NUM (insn));
4672 ready[n_ready++] = insn;
4673 if (sched_verbose >= 2)
4674 fprintf (dump, "moving to ready with %d stalls\n", stalls);
4676 insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
4678 if (n_ready)
4679 break;
4683 if (sched_verbose && stalls)
4684 visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
4685 q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
4686 clock_var += stalls;
4688 return n_ready;
4691 /* Print the ready list for debugging purposes. Callable from debugger. */
4693 static void
4694 debug_ready_list (ready, n_ready)
4695 rtx ready[];
4696 int n_ready;
4698 int i;
4700 for (i = 0; i < n_ready; i++)
4702 fprintf (dump, " %d", INSN_UID (ready[i]));
4703 if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
4704 fprintf (dump, "/b%d", BLOCK_NUM (ready[i]));
4706 fprintf (dump, "\n");
4709 /* Print names of units on which insn can/should execute, for debugging. */
4711 static void
4712 insn_print_units (insn)
4713 rtx insn;
4715 int i;
4716 int unit = insn_unit (insn);
4718 if (unit == -1)
4719 fprintf (dump, "none");
4720 else if (unit >= 0)
4721 fprintf (dump, "%s", function_units[unit].name);
4722 else
4724 fprintf (dump, "[");
4725 for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
4726 if (unit & 1)
4728 fprintf (dump, "%s", function_units[i].name);
4729 if (unit != 1)
4730 fprintf (dump, " ");
4732 fprintf (dump, "]");
4736 /* MAX_VISUAL_LINES is the maximum number of lines in visualization table
4737 of a basic block. If more lines are needed, table is splitted to two.
4738 n_visual_lines is the number of lines printed so far for a block.
4739 visual_tbl contains the block visualization info.
4740 vis_no_unit holds insns in a cycle that are not mapped to any unit. */
4741 #define MAX_VISUAL_LINES 100
4742 #define INSN_LEN 30
4743 int n_visual_lines;
4744 char *visual_tbl;
4745 int n_vis_no_unit;
4746 rtx vis_no_unit[10];
4748 /* Finds units that are in use in this fuction. Required only
4749 for visualization. */
4751 static void
4752 init_target_units ()
4754 rtx insn;
4755 int unit;
4757 for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
4759 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4760 continue;
4762 unit = insn_unit (insn);
4764 if (unit < 0)
4765 target_units |= ~unit;
4766 else
4767 target_units |= (1 << unit);
4771 /* Return the length of the visualization table. */
4773 static int
4774 get_visual_tbl_length ()
4776 int unit, i;
4777 int n, n1;
4778 char *s;
4780 /* Compute length of one field in line. */
4781 s = (char *) alloca (INSN_LEN + 6);
4782 sprintf (s, " %33s", "uname");
4783 n1 = strlen (s);
4785 /* Compute length of one line. */
4786 n = strlen (";; ");
4787 n += n1;
4788 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
4789 if (function_units[unit].bitmask & target_units)
4790 for (i = 0; i < function_units[unit].multiplicity; i++)
4791 n += n1;
4792 n += n1;
4793 n += strlen ("\n") + 2;
4795 /* Compute length of visualization string. */
4796 return (MAX_VISUAL_LINES * n);
4799 /* Init block visualization debugging info. */
4801 static void
4802 init_block_visualization ()
4804 strcpy (visual_tbl, "");
4805 n_visual_lines = 0;
4806 n_vis_no_unit = 0;
4809 #define BUF_LEN 256
4811 static char *
4812 safe_concat (buf, cur, str)
4813 char *buf;
4814 char *cur;
4815 const char *str;
4817 char *end = buf + BUF_LEN - 2; /* Leave room for null. */
4818 int c;
4820 if (cur > end)
4822 *end = '\0';
4823 return end;
4826 while (cur < end && (c = *str++) != '\0')
4827 *cur++ = c;
4829 *cur = '\0';
4830 return cur;
4833 /* This recognizes rtx, I classified as expressions. These are always
4834 represent some action on values or results of other expression, that
4835 may be stored in objects representing values. */
4837 static void
4838 print_exp (buf, x, verbose)
4839 char *buf;
4840 rtx x;
4841 int verbose;
4843 char tmp[BUF_LEN];
4844 const char *st[4];
4845 char *cur = buf;
4846 const char *fun = (char *)0;
4847 const char *sep;
4848 rtx op[4];
4849 int i;
4851 for (i = 0; i < 4; i++)
4853 st[i] = (char *)0;
4854 op[i] = NULL_RTX;
4857 switch (GET_CODE (x))
4859 case PLUS:
4860 op[0] = XEXP (x, 0);
4861 if (GET_CODE (XEXP (x, 1)) == CONST_INT
4862 && INTVAL (XEXP (x, 1)) < 0)
4864 st[1] = "-";
4865 op[1] = GEN_INT (-INTVAL (XEXP (x, 1)));
4867 else
4869 st[1] = "+";
4870 op[1] = XEXP (x, 1);
4872 break;
4873 case LO_SUM:
4874 op[0] = XEXP (x, 0);
4875 st[1] = "+low(";
4876 op[1] = XEXP (x, 1);
4877 st[2] = ")";
4878 break;
4879 case MINUS:
4880 op[0] = XEXP (x, 0);
4881 st[1] = "-";
4882 op[1] = XEXP (x, 1);
4883 break;
4884 case COMPARE:
4885 fun = "cmp";
4886 op[0] = XEXP (x, 0);
4887 op[1] = XEXP (x, 1);
4888 break;
4889 case NEG:
4890 st[0] = "-";
4891 op[0] = XEXP (x, 0);
4892 break;
4893 case MULT:
4894 op[0] = XEXP (x, 0);
4895 st[1] = "*";
4896 op[1] = XEXP (x, 1);
4897 break;
4898 case DIV:
4899 op[0] = XEXP (x, 0);
4900 st[1] = "/";
4901 op[1] = XEXP (x, 1);
4902 break;
4903 case UDIV:
4904 fun = "udiv";
4905 op[0] = XEXP (x, 0);
4906 op[1] = XEXP (x, 1);
4907 break;
4908 case MOD:
4909 op[0] = XEXP (x, 0);
4910 st[1] = "%";
4911 op[1] = XEXP (x, 1);
4912 break;
4913 case UMOD:
4914 fun = "umod";
4915 op[0] = XEXP (x, 0);
4916 op[1] = XEXP (x, 1);
4917 break;
4918 case SMIN:
4919 fun = "smin";
4920 op[0] = XEXP (x, 0);
4921 op[1] = XEXP (x, 1);
4922 break;
4923 case SMAX:
4924 fun = "smax";
4925 op[0] = XEXP (x, 0);
4926 op[1] = XEXP (x, 1);
4927 break;
4928 case UMIN:
4929 fun = "umin";
4930 op[0] = XEXP (x, 0);
4931 op[1] = XEXP (x, 1);
4932 break;
4933 case UMAX:
4934 fun = "umax";
4935 op[0] = XEXP (x, 0);
4936 op[1] = XEXP (x, 1);
4937 break;
4938 case NOT:
4939 st[0] = "!";
4940 op[0] = XEXP (x, 0);
4941 break;
4942 case AND:
4943 op[0] = XEXP (x, 0);
4944 st[1] = "&";
4945 op[1] = XEXP (x, 1);
4946 break;
4947 case IOR:
4948 op[0] = XEXP (x, 0);
4949 st[1] = "|";
4950 op[1] = XEXP (x, 1);
4951 break;
4952 case XOR:
4953 op[0] = XEXP (x, 0);
4954 st[1] = "^";
4955 op[1] = XEXP (x, 1);
4956 break;
4957 case ASHIFT:
4958 op[0] = XEXP (x, 0);
4959 st[1] = "<<";
4960 op[1] = XEXP (x, 1);
4961 break;
4962 case LSHIFTRT:
4963 op[0] = XEXP (x, 0);
4964 st[1] = " 0>>";
4965 op[1] = XEXP (x, 1);
4966 break;
4967 case ASHIFTRT:
4968 op[0] = XEXP (x, 0);
4969 st[1] = ">>";
4970 op[1] = XEXP (x, 1);
4971 break;
4972 case ROTATE:
4973 op[0] = XEXP (x, 0);
4974 st[1] = "<-<";
4975 op[1] = XEXP (x, 1);
4976 break;
4977 case ROTATERT:
4978 op[0] = XEXP (x, 0);
4979 st[1] = ">->";
4980 op[1] = XEXP (x, 1);
4981 break;
4982 case ABS:
4983 fun = "abs";
4984 op[0] = XEXP (x, 0);
4985 break;
4986 case SQRT:
4987 fun = "sqrt";
4988 op[0] = XEXP (x, 0);
4989 break;
4990 case FFS:
4991 fun = "ffs";
4992 op[0] = XEXP (x, 0);
4993 break;
4994 case EQ:
4995 op[0] = XEXP (x, 0);
4996 st[1] = "==";
4997 op[1] = XEXP (x, 1);
4998 break;
4999 case NE:
5000 op[0] = XEXP (x, 0);
5001 st[1] = "!=";
5002 op[1] = XEXP (x, 1);
5003 break;
5004 case GT:
5005 op[0] = XEXP (x, 0);
5006 st[1] = ">";
5007 op[1] = XEXP (x, 1);
5008 break;
5009 case GTU:
5010 fun = "gtu";
5011 op[0] = XEXP (x, 0);
5012 op[1] = XEXP (x, 1);
5013 break;
5014 case LT:
5015 op[0] = XEXP (x, 0);
5016 st[1] = "<";
5017 op[1] = XEXP (x, 1);
5018 break;
5019 case LTU:
5020 fun = "ltu";
5021 op[0] = XEXP (x, 0);
5022 op[1] = XEXP (x, 1);
5023 break;
5024 case GE:
5025 op[0] = XEXP (x, 0);
5026 st[1] = ">=";
5027 op[1] = XEXP (x, 1);
5028 break;
5029 case GEU:
5030 fun = "geu";
5031 op[0] = XEXP (x, 0);
5032 op[1] = XEXP (x, 1);
5033 break;
5034 case LE:
5035 op[0] = XEXP (x, 0);
5036 st[1] = "<=";
5037 op[1] = XEXP (x, 1);
5038 break;
5039 case LEU:
5040 fun = "leu";
5041 op[0] = XEXP (x, 0);
5042 op[1] = XEXP (x, 1);
5043 break;
5044 case SIGN_EXTRACT:
5045 fun = (verbose) ? "sign_extract" : "sxt";
5046 op[0] = XEXP (x, 0);
5047 op[1] = XEXP (x, 1);
5048 op[2] = XEXP (x, 2);
5049 break;
5050 case ZERO_EXTRACT:
5051 fun = (verbose) ? "zero_extract" : "zxt";
5052 op[0] = XEXP (x, 0);
5053 op[1] = XEXP (x, 1);
5054 op[2] = XEXP (x, 2);
5055 break;
5056 case SIGN_EXTEND:
5057 fun = (verbose) ? "sign_extend" : "sxn";
5058 op[0] = XEXP (x, 0);
5059 break;
5060 case ZERO_EXTEND:
5061 fun = (verbose) ? "zero_extend" : "zxn";
5062 op[0] = XEXP (x, 0);
5063 break;
5064 case FLOAT_EXTEND:
5065 fun = (verbose) ? "float_extend" : "fxn";
5066 op[0] = XEXP (x, 0);
5067 break;
5068 case TRUNCATE:
5069 fun = (verbose) ? "trunc" : "trn";
5070 op[0] = XEXP (x, 0);
5071 break;
5072 case FLOAT_TRUNCATE:
5073 fun = (verbose) ? "float_trunc" : "ftr";
5074 op[0] = XEXP (x, 0);
5075 break;
5076 case FLOAT:
5077 fun = (verbose) ? "float" : "flt";
5078 op[0] = XEXP (x, 0);
5079 break;
5080 case UNSIGNED_FLOAT:
5081 fun = (verbose) ? "uns_float" : "ufl";
5082 op[0] = XEXP (x, 0);
5083 break;
5084 case FIX:
5085 fun = "fix";
5086 op[0] = XEXP (x, 0);
5087 break;
5088 case UNSIGNED_FIX:
5089 fun = (verbose) ? "uns_fix" : "ufx";
5090 op[0] = XEXP (x, 0);
5091 break;
5092 case PRE_DEC:
5093 st[0] = "--";
5094 op[0] = XEXP (x, 0);
5095 break;
5096 case PRE_INC:
5097 st[0] = "++";
5098 op[0] = XEXP (x, 0);
5099 break;
5100 case POST_DEC:
5101 op[0] = XEXP (x, 0);
5102 st[1] = "--";
5103 break;
5104 case POST_INC:
5105 op[0] = XEXP (x, 0);
5106 st[1] = "++";
5107 break;
5108 case CALL:
5109 st[0] = "call ";
5110 op[0] = XEXP (x, 0);
5111 if (verbose)
5113 st[1] = " argc:";
5114 op[1] = XEXP (x, 1);
5116 break;
5117 case IF_THEN_ELSE:
5118 st[0] = "{(";
5119 op[0] = XEXP (x, 0);
5120 st[1] = ")?";
5121 op[1] = XEXP (x, 1);
5122 st[2] = ":";
5123 op[2] = XEXP (x, 2);
5124 st[3] = "}";
5125 break;
5126 case TRAP_IF:
5127 fun = "trap_if";
5128 op[0] = TRAP_CONDITION (x);
5129 break;
5130 case UNSPEC:
5131 case UNSPEC_VOLATILE:
5133 cur = safe_concat (buf, cur, "unspec");
5134 if (GET_CODE (x) == UNSPEC_VOLATILE)
5135 cur = safe_concat (buf, cur, "/v");
5136 cur = safe_concat (buf, cur, "[");
5137 sep = "";
5138 for (i = 0; i < XVECLEN (x, 0); i++)
5140 print_pattern (tmp, XVECEXP (x, 0, i), verbose);
5141 cur = safe_concat (buf, cur, sep);
5142 cur = safe_concat (buf, cur, tmp);
5143 sep = ",";
5145 cur = safe_concat (buf, cur, "] ");
5146 sprintf (tmp, "%d", XINT (x, 1));
5147 cur = safe_concat (buf, cur, tmp);
5149 break;
5150 default:
5151 /* If (verbose) debug_rtx (x); */
5152 st[0] = GET_RTX_NAME (GET_CODE (x));
5153 break;
5156 /* Print this as a function? */
5157 if (fun)
5159 cur = safe_concat (buf, cur, fun);
5160 cur = safe_concat (buf, cur, "(");
5163 for (i = 0; i < 4; i++)
5165 if (st[i])
5166 cur = safe_concat (buf, cur, st[i]);
5168 if (op[i])
5170 if (fun && i != 0)
5171 cur = safe_concat (buf, cur, ",");
5173 print_value (tmp, op[i], verbose);
5174 cur = safe_concat (buf, cur, tmp);
5178 if (fun)
5179 cur = safe_concat (buf, cur, ")");
5180 } /* print_exp */
5182 /* Prints rtxes, I customly classified as values. They're constants,
5183 registers, labels, symbols and memory accesses. */
5185 static void
5186 print_value (buf, x, verbose)
5187 char *buf;
5188 rtx x;
5189 int verbose;
5191 char t[BUF_LEN];
5192 char *cur = buf;
5194 switch (GET_CODE (x))
5196 case CONST_INT:
5197 sprintf (t, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
5198 cur = safe_concat (buf, cur, t);
5199 break;
5200 case CONST_DOUBLE:
5201 sprintf (t, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
5202 cur = safe_concat (buf, cur, t);
5203 break;
5204 case CONST_STRING:
5205 cur = safe_concat (buf, cur, "\"");
5206 cur = safe_concat (buf, cur, XSTR (x, 0));
5207 cur = safe_concat (buf, cur, "\"");
5208 break;
5209 case SYMBOL_REF:
5210 cur = safe_concat (buf, cur, "`");
5211 cur = safe_concat (buf, cur, XSTR (x, 0));
5212 cur = safe_concat (buf, cur, "'");
5213 break;
5214 case LABEL_REF:
5215 sprintf (t, "L%d", INSN_UID (XEXP (x, 0)));
5216 cur = safe_concat (buf, cur, t);
5217 break;
5218 case CONST:
5219 print_value (t, XEXP (x, 0), verbose);
5220 cur = safe_concat (buf, cur, "const(");
5221 cur = safe_concat (buf, cur, t);
5222 cur = safe_concat (buf, cur, ")");
5223 break;
5224 case HIGH:
5225 print_value (t, XEXP (x, 0), verbose);
5226 cur = safe_concat (buf, cur, "high(");
5227 cur = safe_concat (buf, cur, t);
5228 cur = safe_concat (buf, cur, ")");
5229 break;
5230 case REG:
5231 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
5233 int c = reg_names[ REGNO (x) ][0];
5234 if (c >= '0' && c <= '9')
5235 cur = safe_concat (buf, cur, "%");
5237 cur = safe_concat (buf, cur, reg_names[ REGNO (x) ]);
5239 else
5241 sprintf (t, "r%d", REGNO (x));
5242 cur = safe_concat (buf, cur, t);
5244 break;
5245 case SUBREG:
5246 print_value (t, SUBREG_REG (x), verbose);
5247 cur = safe_concat (buf, cur, t);
5248 sprintf (t, "#%d", SUBREG_WORD (x));
5249 cur = safe_concat (buf, cur, t);
5250 break;
5251 case SCRATCH:
5252 cur = safe_concat (buf, cur, "scratch");
5253 break;
5254 case CC0:
5255 cur = safe_concat (buf, cur, "cc0");
5256 break;
5257 case PC:
5258 cur = safe_concat (buf, cur, "pc");
5259 break;
5260 case MEM:
5261 print_value (t, XEXP (x, 0), verbose);
5262 cur = safe_concat (buf, cur, "[");
5263 cur = safe_concat (buf, cur, t);
5264 cur = safe_concat (buf, cur, "]");
5265 break;
5266 default:
5267 print_exp (t, x, verbose);
5268 cur = safe_concat (buf, cur, t);
5269 break;
5271 } /* print_value */
5273 /* The next step in insn detalization, its pattern recognition. */
5275 static void
5276 print_pattern (buf, x, verbose)
5277 char *buf;
5278 rtx x;
5279 int verbose;
5281 char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
5283 switch (GET_CODE (x))
5285 case SET:
5286 print_value (t1, SET_DEST (x), verbose);
5287 print_value (t2, SET_SRC (x), verbose);
5288 sprintf (buf, "%s=%s", t1, t2);
5289 break;
5290 case RETURN:
5291 sprintf (buf, "return");
5292 break;
5293 case CALL:
5294 print_exp (buf, x, verbose);
5295 break;
5296 case CLOBBER:
5297 print_value (t1, XEXP (x, 0), verbose);
5298 sprintf (buf, "clobber %s", t1);
5299 break;
5300 case USE:
5301 print_value (t1, XEXP (x, 0), verbose);
5302 sprintf (buf, "use %s", t1);
5303 break;
5304 case PARALLEL:
5306 int i;
5308 sprintf (t1, "{");
5309 for (i = 0; i < XVECLEN (x, 0); i++)
5311 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5312 sprintf (t3, "%s%s;", t1, t2);
5313 strcpy (t1, t3);
5315 sprintf (buf, "%s}", t1);
5317 break;
5318 case SEQUENCE:
5320 int i;
5322 sprintf (t1, "%%{");
5323 for (i = 0; i < XVECLEN (x, 0); i++)
5325 print_insn (t2, XVECEXP (x, 0, i), verbose);
5326 sprintf (t3, "%s%s;", t1, t2);
5327 strcpy (t1, t3);
5329 sprintf (buf, "%s%%}", t1);
5331 break;
5332 case ASM_INPUT:
5333 sprintf (buf, "asm {%s}", XSTR (x, 0));
5334 break;
5335 case ADDR_VEC:
5336 break;
5337 case ADDR_DIFF_VEC:
5338 print_value (buf, XEXP (x, 0), verbose);
5339 break;
5340 case TRAP_IF:
5341 print_value (t1, TRAP_CONDITION (x), verbose);
5342 sprintf (buf, "trap_if %s", t1);
5343 break;
5344 case UNSPEC:
5346 int i;
5348 sprintf (t1, "unspec{");
5349 for (i = 0; i < XVECLEN (x, 0); i++)
5351 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5352 sprintf (t3, "%s%s;", t1, t2);
5353 strcpy (t1, t3);
5355 sprintf (buf, "%s}", t1);
5357 break;
5358 case UNSPEC_VOLATILE:
5360 int i;
5362 sprintf (t1, "unspec/v{");
5363 for (i = 0; i < XVECLEN (x, 0); i++)
5365 print_pattern (t2, XVECEXP (x, 0, i), verbose);
5366 sprintf (t3, "%s%s;", t1, t2);
5367 strcpy (t1, t3);
5369 sprintf (buf, "%s}", t1);
5371 break;
5372 default:
5373 print_value (buf, x, verbose);
5375 } /* print_pattern */
5377 /* This is the main function in rtl visualization mechanism. It
5378 accepts an rtx and tries to recognize it as an insn, then prints it
5379 properly in human readable form, resembling assembler mnemonics.
5380 For every insn it prints its UID and BB the insn belongs too.
5381 (Probably the last "option" should be extended somehow, since it
5382 depends now on sched.c inner variables ...) */
5384 static void
5385 print_insn (buf, x, verbose)
5386 char *buf;
5387 rtx x;
5388 int verbose;
5390 char t[BUF_LEN];
5391 rtx insn = x;
5393 switch (GET_CODE (x))
5395 case INSN:
5396 print_pattern (t, PATTERN (x), verbose);
5397 if (verbose)
5398 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
5399 INSN_UID (x), t);
5400 else
5401 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5402 break;
5403 case JUMP_INSN:
5404 print_pattern (t, PATTERN (x), verbose);
5405 if (verbose)
5406 sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
5407 INSN_UID (x), t);
5408 else
5409 sprintf (buf, "%-4d %s", INSN_UID (x), t);
5410 break;
5411 case CALL_INSN:
5412 x = PATTERN (insn);
5413 if (GET_CODE (x) == PARALLEL)
5415 x = XVECEXP (x, 0, 0);
5416 print_pattern (t, x, verbose);
5418 else
5419 strcpy (t, "call <...>");
5420 if (verbose)
5421 sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
5422 INSN_UID (insn), t);
5423 else
5424 sprintf (buf, "%-4d %s", INSN_UID (insn), t);
5425 break;
5426 case CODE_LABEL:
5427 sprintf (buf, "L%d:", INSN_UID (x));
5428 break;
5429 case BARRIER:
5430 sprintf (buf, "i% 4d: barrier", INSN_UID (x));
5431 break;
5432 case NOTE:
5433 if (NOTE_LINE_NUMBER (x) > 0)
5434 sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
5435 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
5436 else
5437 sprintf (buf, "%4d %s", INSN_UID (x),
5438 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
5439 break;
5440 default:
5441 if (verbose)
5443 sprintf (buf, "Not an INSN at all\n");
5444 debug_rtx (x);
5446 else
5447 sprintf (buf, "i%-4d <What?>", INSN_UID (x));
5449 } /* print_insn */
5451 /* Print visualization debugging info. */
5453 static void
5454 print_block_visualization (b, s)
5455 int b;
5456 const char *s;
5458 int unit, i;
5460 /* Print header. */
5461 fprintf (dump, "\n;; ==================== scheduling visualization for block %d %s \n", b, s);
5463 /* Print names of units. */
5464 fprintf (dump, ";; %-8s", "clock");
5465 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5466 if (function_units[unit].bitmask & target_units)
5467 for (i = 0; i < function_units[unit].multiplicity; i++)
5468 fprintf (dump, " %-33s", function_units[unit].name);
5469 fprintf (dump, " %-8s\n", "no-unit");
5471 fprintf (dump, ";; %-8s", "=====");
5472 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5473 if (function_units[unit].bitmask & target_units)
5474 for (i = 0; i < function_units[unit].multiplicity; i++)
5475 fprintf (dump, " %-33s", "==============================");
5476 fprintf (dump, " %-8s\n", "=======");
5478 /* Print insns in each cycle. */
5479 fprintf (dump, "%s\n", visual_tbl);
5482 /* Print insns in the 'no_unit' column of visualization. */
5484 static void
5485 visualize_no_unit (insn)
5486 rtx insn;
5488 vis_no_unit[n_vis_no_unit] = insn;
5489 n_vis_no_unit++;
5492 /* Print insns scheduled in clock, for visualization. */
5494 static void
5495 visualize_scheduled_insns (b, clock)
5496 int b, clock;
5498 int i, unit;
5500 /* If no more room, split table into two. */
5501 if (n_visual_lines >= MAX_VISUAL_LINES)
5503 print_block_visualization (b, "(incomplete)");
5504 init_block_visualization ();
5507 n_visual_lines++;
5509 sprintf (visual_tbl + strlen (visual_tbl), ";; %-8d", clock);
5510 for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
5511 if (function_units[unit].bitmask & target_units)
5512 for (i = 0; i < function_units[unit].multiplicity; i++)
5514 int instance = unit + i * FUNCTION_UNITS_SIZE;
5515 rtx insn = unit_last_insn[instance];
5517 /* Print insns that still keep the unit busy. */
5518 if (insn &&
5519 actual_hazard_this_instance (unit, instance, insn, clock, 0))
5521 char str[BUF_LEN];
5522 print_insn (str, insn, 0);
5523 str[INSN_LEN] = '\0';
5524 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", str);
5526 else
5527 sprintf (visual_tbl + strlen (visual_tbl), " %-33s", "------------------------------");
5530 /* Print insns that are not assigned to any unit. */
5531 for (i = 0; i < n_vis_no_unit; i++)
5532 sprintf (visual_tbl + strlen (visual_tbl), " %-8d",
5533 INSN_UID (vis_no_unit[i]));
5534 n_vis_no_unit = 0;
5536 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5539 /* Print stalled cycles. */
5541 static void
5542 visualize_stall_cycles (b, stalls)
5543 int b, stalls;
5545 int i;
5547 /* If no more room, split table into two. */
5548 if (n_visual_lines >= MAX_VISUAL_LINES)
5550 print_block_visualization (b, "(incomplete)");
5551 init_block_visualization ();
5554 n_visual_lines++;
5556 sprintf (visual_tbl + strlen (visual_tbl), ";; ");
5557 for (i = 0; i < stalls; i++)
5558 sprintf (visual_tbl + strlen (visual_tbl), ".");
5559 sprintf (visual_tbl + strlen (visual_tbl), "\n");
5562 /* move_insn1: Remove INSN from insn chain, and link it after LAST insn. */
5564 static rtx
5565 move_insn1 (insn, last)
5566 rtx insn, last;
5568 NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
5569 PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
5571 NEXT_INSN (insn) = NEXT_INSN (last);
5572 PREV_INSN (NEXT_INSN (last)) = insn;
5574 NEXT_INSN (last) = insn;
5575 PREV_INSN (insn) = last;
5577 return insn;
5580 /* Search INSN for REG_SAVE_NOTE note pairs for NOTE_INSN_SETJMP,
5581 NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
5582 NOTEs. The REG_SAVE_NOTE note following first one is contains the
5583 saved value for NOTE_BLOCK_NUMBER which is useful for
5584 NOTE_INSN_EH_REGION_{BEG,END} NOTEs. LAST is the last instruction
5585 output by the instruction scheduler. Return the new value of LAST. */
5587 static rtx
5588 reemit_notes (insn, last)
5589 rtx insn;
5590 rtx last;
5592 rtx note, retval;
5594 retval = last;
5595 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
5597 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5599 int note_type = INTVAL (XEXP (note, 0));
5600 if (note_type == NOTE_INSN_SETJMP)
5602 retval = emit_note_after (NOTE_INSN_SETJMP, insn);
5603 CONST_CALL_P (retval) = CONST_CALL_P (note);
5604 remove_note (insn, note);
5605 note = XEXP (note, 1);
5607 else if (note_type == NOTE_INSN_RANGE_START
5608 || note_type == NOTE_INSN_RANGE_END)
5610 last = emit_note_before (note_type, last);
5611 remove_note (insn, note);
5612 note = XEXP (note, 1);
5613 NOTE_RANGE_INFO (last) = XEXP (note, 0);
5615 else
5617 last = emit_note_before (note_type, last);
5618 remove_note (insn, note);
5619 note = XEXP (note, 1);
5620 if (note_type == NOTE_INSN_EH_REGION_BEG
5621 || note_type == NOTE_INSN_EH_REGION_END)
5622 NOTE_EH_HANDLER (last) = INTVAL (XEXP (note, 0));
5624 remove_note (insn, note);
5627 return retval;
5630 /* Move INSN, and all insns which should be issued before it,
5631 due to SCHED_GROUP_P flag. Reemit notes if needed.
5633 Return the last insn emitted by the scheduler, which is the
5634 return value from the first call to reemit_notes. */
5636 static rtx
5637 move_insn (insn, last)
5638 rtx insn, last;
5640 rtx retval = NULL;
5642 /* If INSN has SCHED_GROUP_P set, then issue it and any other
5643 insns with SCHED_GROUP_P set first. */
5644 while (SCHED_GROUP_P (insn))
5646 rtx prev = PREV_INSN (insn);
5648 /* Move a SCHED_GROUP_P insn. */
5649 move_insn1 (insn, last);
5650 /* If this is the first call to reemit_notes, then record
5651 its return value. */
5652 if (retval == NULL_RTX)
5653 retval = reemit_notes (insn, insn);
5654 else
5655 reemit_notes (insn, insn);
5656 insn = prev;
5659 /* Now move the first non SCHED_GROUP_P insn. */
5660 move_insn1 (insn, last);
5662 /* If this is the first call to reemit_notes, then record
5663 its return value. */
5664 if (retval == NULL_RTX)
5665 retval = reemit_notes (insn, insn);
5666 else
5667 reemit_notes (insn, insn);
5669 return retval;
5672 /* Return an insn which represents a SCHED_GROUP, which is
5673 the last insn in the group. */
5675 static rtx
5676 group_leader (insn)
5677 rtx insn;
5679 rtx prev;
5683 prev = insn;
5684 insn = next_nonnote_insn (insn);
5686 while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
5688 return prev;
5691 /* Use forward list scheduling to rearrange insns of block BB in region RGN,
5692 possibly bringing insns from subsequent blocks in the same region.
5693 Return number of insns scheduled. */
5695 static int
5696 schedule_block (bb, rgn_n_insns)
5697 int bb;
5698 int rgn_n_insns;
5700 /* Local variables. */
5701 rtx insn, last;
5702 rtx *ready;
5703 int n_ready = 0;
5704 int can_issue_more;
5706 /* Flow block of this bb. */
5707 int b = BB_TO_BLOCK (bb);
5709 /* target_n_insns == number of insns in b before scheduling starts.
5710 sched_target_n_insns == how many of b's insns were scheduled.
5711 sched_n_insns == how many insns were scheduled in b. */
5712 int target_n_insns = 0;
5713 int sched_target_n_insns = 0;
5714 int sched_n_insns = 0;
5716 #define NEED_NOTHING 0
5717 #define NEED_HEAD 1
5718 #define NEED_TAIL 2
5719 int new_needs;
5721 /* Head/tail info for this block. */
5722 rtx prev_head;
5723 rtx next_tail;
5724 rtx head;
5725 rtx tail;
5726 int bb_src;
5728 /* We used to have code to avoid getting parameters moved from hard
5729 argument registers into pseudos.
5731 However, it was removed when it proved to be of marginal benefit
5732 and caused problems because schedule_block and compute_forward_dependences
5733 had different notions of what the "head" insn was. */
5734 get_bb_head_tail (bb, &head, &tail);
5736 /* Interblock scheduling could have moved the original head insn from this
5737 block into a proceeding block. This may also cause schedule_block and
5738 compute_forward_dependences to have different notions of what the
5739 "head" insn was.
5741 If the interblock movement happened to make this block start with
5742 some notes (LOOP, EH or SETJMP) before the first real insn, then
5743 HEAD will have various special notes attached to it which must be
5744 removed so that we don't end up with extra copies of the notes. */
5745 if (GET_RTX_CLASS (GET_CODE (head)) == 'i')
5747 rtx note;
5749 for (note = REG_NOTES (head); note; note = XEXP (note, 1))
5750 if (REG_NOTE_KIND (note) == REG_SAVE_NOTE)
5751 remove_note (head, note);
5754 next_tail = NEXT_INSN (tail);
5755 prev_head = PREV_INSN (head);
5757 /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
5758 to schedule this block. */
5759 if (head == tail
5760 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5761 return (sched_n_insns);
5763 /* Debug info. */
5764 if (sched_verbose)
5766 fprintf (dump, ";; ======================================================\n");
5767 fprintf (dump,
5768 ";; -- basic block %d from %d to %d -- %s reload\n",
5769 b, INSN_UID (BLOCK_HEAD (b)), INSN_UID (BLOCK_END (b)),
5770 (reload_completed ? "after" : "before"));
5771 fprintf (dump, ";; ======================================================\n");
5772 fprintf (dump, "\n");
5774 visual_tbl = (char *) alloca (get_visual_tbl_length ());
5775 init_block_visualization ();
5778 /* Remove remaining note insns from the block, save them in
5779 note_list. These notes are restored at the end of
5780 schedule_block (). */
5781 note_list = 0;
5782 rm_other_notes (head, tail);
5784 target_bb = bb;
5786 /* Prepare current target block info. */
5787 if (current_nr_blocks > 1)
5789 candidate_table = (candidate *) xmalloc (current_nr_blocks
5790 * sizeof (candidate));
5792 bblst_last = 0;
5793 /* ??? It is not clear why bblst_size is computed this way. The original
5794 number was clearly too small as it resulted in compiler failures.
5795 Multiplying by the original number by 2 (to account for update_bbs
5796 members) seems to be a reasonable solution. */
5797 /* ??? Or perhaps there is a bug somewhere else in this file? */
5798 bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
5799 bblst_table = (int *) xmalloc (bblst_size * sizeof (int));
5801 bitlst_table_last = 0;
5802 bitlst_table_size = rgn_nr_edges;
5803 bitlst_table = (int *) xmalloc (rgn_nr_edges * sizeof (int));
5805 compute_trg_info (bb);
5808 clear_units ();
5810 /* Allocate the ready list. */
5811 ready = (rtx *) xmalloc ((rgn_n_insns + 1) * sizeof (rtx));
5813 /* Print debugging information. */
5814 if (sched_verbose >= 5)
5815 debug_dependencies ();
5818 /* Initialize ready list with all 'ready' insns in target block.
5819 Count number of insns in the target block being scheduled. */
5820 n_ready = 0;
5821 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
5823 rtx next;
5825 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5826 continue;
5827 next = NEXT_INSN (insn);
5829 if (INSN_DEP_COUNT (insn) == 0
5830 && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5831 ready[n_ready++] = insn;
5832 if (!(SCHED_GROUP_P (insn)))
5833 target_n_insns++;
5836 /* Add to ready list all 'ready' insns in valid source blocks.
5837 For speculative insns, check-live, exception-free, and
5838 issue-delay. */
5839 for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
5840 if (IS_VALID (bb_src))
5842 rtx src_head;
5843 rtx src_next_tail;
5844 rtx tail, head;
5846 get_bb_head_tail (bb_src, &head, &tail);
5847 src_next_tail = NEXT_INSN (tail);
5848 src_head = head;
5850 if (head == tail
5851 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
5852 continue;
5854 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
5856 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
5857 continue;
5859 if (!CANT_MOVE (insn)
5860 && (!IS_SPECULATIVE_INSN (insn)
5861 || (insn_issue_delay (insn) <= 3
5862 && check_live (insn, bb_src)
5863 && is_exception_free (insn, bb_src, target_bb))))
5865 rtx next;
5867 /* Note that we havn't squirrled away the notes for
5868 blocks other than the current. So if this is a
5869 speculative insn, NEXT might otherwise be a note. */
5870 next = next_nonnote_insn (insn);
5871 if (INSN_DEP_COUNT (insn) == 0
5872 && (! next
5873 || SCHED_GROUP_P (next) == 0
5874 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
5875 ready[n_ready++] = insn;
5880 #ifdef MD_SCHED_INIT
5881 MD_SCHED_INIT (dump, sched_verbose);
5882 #endif
5884 /* No insns scheduled in this block yet. */
5885 last_scheduled_insn = 0;
5887 /* Q_SIZE is the total number of insns in the queue. */
5888 q_ptr = 0;
5889 q_size = 0;
5890 last_clock_var = 0;
5891 bzero ((char *) insn_queue, sizeof (insn_queue));
5893 /* Start just before the beginning of time. */
5894 clock_var = -1;
5896 /* We start inserting insns after PREV_HEAD. */
5897 last = prev_head;
5899 /* Initialize INSN_QUEUE, LIST and NEW_NEEDS. */
5900 new_needs = (NEXT_INSN (prev_head) == BLOCK_HEAD (b)
5901 ? NEED_HEAD : NEED_NOTHING);
5902 if (PREV_INSN (next_tail) == BLOCK_END (b))
5903 new_needs |= NEED_TAIL;
5905 /* Loop until all the insns in BB are scheduled. */
5906 while (sched_target_n_insns < target_n_insns)
5908 clock_var++;
5910 /* Add to the ready list all pending insns that can be issued now.
5911 If there are no ready insns, increment clock until one
5912 is ready and add all pending insns at that point to the ready
5913 list. */
5914 n_ready = queue_to_ready (ready, n_ready);
5916 if (n_ready == 0)
5917 abort ();
5919 if (sched_verbose >= 2)
5921 fprintf (dump, ";;\t\tReady list after queue_to_ready: ");
5922 debug_ready_list (ready, n_ready);
5925 /* Sort the ready list based on priority. */
5926 SCHED_SORT (ready, n_ready);
5928 /* Allow the target to reorder the list, typically for
5929 better instruction bundling. */
5930 #ifdef MD_SCHED_REORDER
5931 MD_SCHED_REORDER (dump, sched_verbose, ready, n_ready, clock_var,
5932 can_issue_more);
5933 #else
5934 can_issue_more = issue_rate;
5935 #endif
5937 if (sched_verbose)
5939 fprintf (dump, "\n;;\tReady list (t =%3d): ", clock_var);
5940 debug_ready_list (ready, n_ready);
5943 /* Issue insns from ready list. */
5944 while (n_ready != 0 && can_issue_more)
5946 /* Select and remove the insn from the ready list. */
5947 rtx insn = ready[--n_ready];
5948 int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
5950 if (cost >= 1)
5952 queue_insn (insn, cost);
5953 continue;
5956 /* An interblock motion? */
5957 if (INSN_BB (insn) != target_bb)
5959 rtx temp;
5960 basic_block b1;
5962 if (IS_SPECULATIVE_INSN (insn))
5964 if (!check_live (insn, INSN_BB (insn)))
5965 continue;
5966 update_live (insn, INSN_BB (insn));
5968 /* For speculative load, mark insns fed by it. */
5969 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
5970 set_spec_fed (insn);
5972 nr_spec++;
5974 nr_inter++;
5976 /* Find the beginning of the scheduling group. */
5977 /* ??? Ought to update basic block here, but later bits of
5978 schedule_block assumes the original insn block is
5979 still intact. */
5981 temp = insn;
5982 while (SCHED_GROUP_P (temp))
5983 temp = PREV_INSN (temp);
5985 /* Update source block boundaries. */
5986 b1 = BLOCK_FOR_INSN (temp);
5987 if (temp == b1->head && insn == b1->end)
5989 /* We moved all the insns in the basic block.
5990 Emit a note after the last insn and update the
5991 begin/end boundaries to point to the note. */
5992 rtx note = emit_note_after (NOTE_INSN_DELETED, insn);
5993 b1->head = note;
5994 b1->end = note;
5996 else if (insn == b1->end)
5998 /* We took insns from the end of the basic block,
5999 so update the end of block boundary so that it
6000 points to the first insn we did not move. */
6001 b1->end = PREV_INSN (temp);
6003 else if (temp == b1->head)
6005 /* We took insns from the start of the basic block,
6006 so update the start of block boundary so that
6007 it points to the first insn we did not move. */
6008 b1->head = NEXT_INSN (insn);
6011 else
6013 /* In block motion. */
6014 sched_target_n_insns++;
6017 last_scheduled_insn = insn;
6018 last = move_insn (insn, last);
6019 sched_n_insns++;
6021 #ifdef MD_SCHED_VARIABLE_ISSUE
6022 MD_SCHED_VARIABLE_ISSUE (dump, sched_verbose, insn,
6023 can_issue_more);
6024 #else
6025 can_issue_more--;
6026 #endif
6028 n_ready = schedule_insn (insn, ready, n_ready, clock_var);
6030 /* Close this block after scheduling its jump. */
6031 if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
6032 break;
6035 /* Debug info. */
6036 if (sched_verbose)
6037 visualize_scheduled_insns (b, clock_var);
6040 /* Debug info. */
6041 if (sched_verbose)
6043 fprintf (dump, ";;\tReady list (final): ");
6044 debug_ready_list (ready, n_ready);
6045 print_block_visualization (b, "");
6048 /* Sanity check -- queue must be empty now. Meaningless if region has
6049 multiple bbs. */
6050 if (current_nr_blocks > 1)
6051 if (!flag_schedule_interblock && q_size != 0)
6052 abort ();
6054 /* Update head/tail boundaries. */
6055 head = NEXT_INSN (prev_head);
6056 tail = last;
6058 /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
6059 previously found among the insns. Insert them at the beginning
6060 of the insns. */
6061 if (note_list != 0)
6063 rtx note_head = note_list;
6065 while (PREV_INSN (note_head))
6067 note_head = PREV_INSN (note_head);
6070 PREV_INSN (note_head) = PREV_INSN (head);
6071 NEXT_INSN (PREV_INSN (head)) = note_head;
6072 PREV_INSN (head) = note_list;
6073 NEXT_INSN (note_list) = head;
6074 head = note_head;
6077 /* Update target block boundaries. */
6078 if (new_needs & NEED_HEAD)
6079 BLOCK_HEAD (b) = head;
6081 if (new_needs & NEED_TAIL)
6082 BLOCK_END (b) = tail;
6084 /* Debugging. */
6085 if (sched_verbose)
6087 fprintf (dump, ";; total time = %d\n;; new basic block head = %d\n",
6088 clock_var, INSN_UID (BLOCK_HEAD (b)));
6089 fprintf (dump, ";; new basic block end = %d\n\n",
6090 INSN_UID (BLOCK_END (b)));
6093 /* Clean up. */
6094 if (current_nr_blocks > 1)
6096 free (candidate_table);
6097 free (bblst_table);
6098 free (bitlst_table);
6100 free (ready);
6102 return (sched_n_insns);
6103 } /* schedule_block () */
6106 /* Print the bit-set of registers, S, callable from debugger. */
6108 extern void
6109 debug_reg_vector (s)
6110 regset s;
6112 int regno;
6114 EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
6116 fprintf (dump, " %d", regno);
6119 fprintf (dump, "\n");
6122 /* Use the backward dependences from LOG_LINKS to build
6123 forward dependences in INSN_DEPEND. */
6125 static void
6126 compute_block_forward_dependences (bb)
6127 int bb;
6129 rtx insn, link;
6130 rtx tail, head;
6131 rtx next_tail;
6132 enum reg_note dep_type;
6134 get_bb_head_tail (bb, &head, &tail);
6135 next_tail = NEXT_INSN (tail);
6136 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6138 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6139 continue;
6141 insn = group_leader (insn);
6143 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
6145 rtx x = group_leader (XEXP (link, 0));
6146 rtx new_link;
6148 if (x != XEXP (link, 0))
6149 continue;
6151 #ifdef ENABLE_CHECKING
6152 /* If add_dependence is working properly there should never
6153 be notes, deleted insns or duplicates in the backward
6154 links. Thus we need not check for them here.
6156 However, if we have enabled checking we might as well go
6157 ahead and verify that add_dependence worked properly. */
6158 if (GET_CODE (x) == NOTE
6159 || INSN_DELETED_P (x)
6160 || find_insn_list (insn, INSN_DEPEND (x)))
6161 abort ();
6162 #endif
6164 new_link = alloc_INSN_LIST (insn, INSN_DEPEND (x));
6166 dep_type = REG_NOTE_KIND (link);
6167 PUT_REG_NOTE_KIND (new_link, dep_type);
6169 INSN_DEPEND (x) = new_link;
6170 INSN_DEP_COUNT (insn) += 1;
6175 /* Initialize variables for region data dependence analysis.
6176 n_bbs is the number of region blocks. */
6178 static void
6179 init_deps (deps)
6180 struct deps *deps;
6182 int maxreg = max_reg_num ();
6183 deps->reg_last_uses = (rtx *) xcalloc (maxreg, sizeof (rtx));
6184 deps->reg_last_sets = (rtx *) xcalloc (maxreg, sizeof (rtx));
6185 deps->reg_last_clobbers = (rtx *) xcalloc (maxreg, sizeof (rtx));
6187 deps->pending_read_insns = 0;
6188 deps->pending_read_mems = 0;
6189 deps->pending_write_insns = 0;
6190 deps->pending_write_mems = 0;
6191 deps->pending_lists_length = 0;
6192 deps->last_pending_memory_flush = 0;
6193 deps->last_function_call = 0;
6195 deps->sched_before_next_call
6196 = gen_rtx_INSN (VOIDmode, 0, NULL_RTX, NULL_RTX,
6197 NULL_RTX, 0, NULL_RTX, NULL_RTX);
6198 LOG_LINKS (deps->sched_before_next_call) = 0;
6201 /* Add dependences so that branches are scheduled to run last in their
6202 block. */
6204 static void
6205 add_branch_dependences (head, tail)
6206 rtx head, tail;
6208 rtx insn, last;
6210 /* For all branches, calls, uses, clobbers, and cc0 setters, force them
6211 to remain in order at the end of the block by adding dependencies and
6212 giving the last a high priority. There may be notes present, and
6213 prev_head may also be a note.
6215 Branches must obviously remain at the end. Calls should remain at the
6216 end since moving them results in worse register allocation. Uses remain
6217 at the end to ensure proper register allocation. cc0 setters remaim
6218 at the end because they can't be moved away from their cc0 user. */
6219 insn = tail;
6220 last = 0;
6221 while (GET_CODE (insn) == CALL_INSN
6222 || GET_CODE (insn) == JUMP_INSN
6223 || (GET_CODE (insn) == INSN
6224 && (GET_CODE (PATTERN (insn)) == USE
6225 || GET_CODE (PATTERN (insn)) == CLOBBER
6226 #ifdef HAVE_cc0
6227 || sets_cc0_p (PATTERN (insn))
6228 #endif
6230 || GET_CODE (insn) == NOTE)
6232 if (GET_CODE (insn) != NOTE)
6234 if (last != 0
6235 && !find_insn_list (insn, LOG_LINKS (last)))
6237 add_dependence (last, insn, REG_DEP_ANTI);
6238 INSN_REF_COUNT (insn)++;
6241 CANT_MOVE (insn) = 1;
6243 last = insn;
6244 /* Skip over insns that are part of a group.
6245 Make each insn explicitly depend on the previous insn.
6246 This ensures that only the group header will ever enter
6247 the ready queue (and, when scheduled, will automatically
6248 schedule the SCHED_GROUP_P block). */
6249 while (SCHED_GROUP_P (insn))
6251 rtx temp = prev_nonnote_insn (insn);
6252 add_dependence (insn, temp, REG_DEP_ANTI);
6253 insn = temp;
6257 /* Don't overrun the bounds of the basic block. */
6258 if (insn == head)
6259 break;
6261 insn = PREV_INSN (insn);
6264 /* Make sure these insns are scheduled last in their block. */
6265 insn = last;
6266 if (insn != 0)
6267 while (insn != head)
6269 insn = prev_nonnote_insn (insn);
6271 if (INSN_REF_COUNT (insn) != 0)
6272 continue;
6274 add_dependence (last, insn, REG_DEP_ANTI);
6275 INSN_REF_COUNT (insn) = 1;
6277 /* Skip over insns that are part of a group. */
6278 while (SCHED_GROUP_P (insn))
6279 insn = prev_nonnote_insn (insn);
6283 /* After computing the dependencies for block BB, propagate the dependencies
6284 found in TMP_DEPS to the successors of the block. MAX_REG is the number
6285 of registers. */
6286 static void
6287 propagate_deps (bb, tmp_deps, max_reg)
6288 int bb;
6289 struct deps *tmp_deps;
6290 int max_reg;
6292 int b = BB_TO_BLOCK (bb);
6293 int e, first_edge;
6294 int reg;
6295 rtx link_insn, link_mem;
6296 rtx u;
6298 /* These lists should point to the right place, for correct
6299 freeing later. */
6300 bb_deps[bb].pending_read_insns = tmp_deps->pending_read_insns;
6301 bb_deps[bb].pending_read_mems = tmp_deps->pending_read_mems;
6302 bb_deps[bb].pending_write_insns = tmp_deps->pending_write_insns;
6303 bb_deps[bb].pending_write_mems = tmp_deps->pending_write_mems;
6305 /* bb's structures are inherited by its successors. */
6306 first_edge = e = OUT_EDGES (b);
6307 if (e <= 0)
6308 return;
6312 rtx x;
6313 int b_succ = TO_BLOCK (e);
6314 int bb_succ = BLOCK_TO_BB (b_succ);
6315 struct deps *succ_deps = bb_deps + bb_succ;
6317 /* Only bbs "below" bb, in the same region, are interesting. */
6318 if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
6319 || bb_succ <= bb)
6321 e = NEXT_OUT (e);
6322 continue;
6325 for (reg = 0; reg < max_reg; reg++)
6327 /* reg-last-uses lists are inherited by bb_succ. */
6328 for (u = tmp_deps->reg_last_uses[reg]; u; u = XEXP (u, 1))
6330 if (find_insn_list (XEXP (u, 0),
6331 succ_deps->reg_last_uses[reg]))
6332 continue;
6334 succ_deps->reg_last_uses[reg]
6335 = alloc_INSN_LIST (XEXP (u, 0),
6336 succ_deps->reg_last_uses[reg]);
6339 /* reg-last-defs lists are inherited by bb_succ. */
6340 for (u = tmp_deps->reg_last_sets[reg]; u; u = XEXP (u, 1))
6342 if (find_insn_list (XEXP (u, 0),
6343 succ_deps->reg_last_sets[reg]))
6344 continue;
6346 succ_deps->reg_last_sets[reg]
6347 = alloc_INSN_LIST (XEXP (u, 0),
6348 succ_deps->reg_last_sets[reg]);
6351 for (u = tmp_deps->reg_last_clobbers[reg]; u; u = XEXP (u, 1))
6353 if (find_insn_list (XEXP (u, 0),
6354 succ_deps->reg_last_clobbers[reg]))
6355 continue;
6357 succ_deps->reg_last_clobbers[reg]
6358 = alloc_INSN_LIST (XEXP (u, 0),
6359 succ_deps->reg_last_clobbers[reg]);
6363 /* Mem read/write lists are inherited by bb_succ. */
6364 link_insn = tmp_deps->pending_read_insns;
6365 link_mem = tmp_deps->pending_read_mems;
6366 while (link_insn)
6368 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6369 XEXP (link_mem, 0),
6370 succ_deps->pending_read_insns,
6371 succ_deps->pending_read_mems)))
6372 add_insn_mem_dependence (succ_deps, &succ_deps->pending_read_insns,
6373 &succ_deps->pending_read_mems,
6374 XEXP (link_insn, 0), XEXP (link_mem, 0));
6375 link_insn = XEXP (link_insn, 1);
6376 link_mem = XEXP (link_mem, 1);
6379 link_insn = tmp_deps->pending_write_insns;
6380 link_mem = tmp_deps->pending_write_mems;
6381 while (link_insn)
6383 if (!(find_insn_mem_list (XEXP (link_insn, 0),
6384 XEXP (link_mem, 0),
6385 succ_deps->pending_write_insns,
6386 succ_deps->pending_write_mems)))
6387 add_insn_mem_dependence (succ_deps,
6388 &succ_deps->pending_write_insns,
6389 &succ_deps->pending_write_mems,
6390 XEXP (link_insn, 0), XEXP (link_mem, 0));
6392 link_insn = XEXP (link_insn, 1);
6393 link_mem = XEXP (link_mem, 1);
6396 /* last_function_call is inherited by bb_succ. */
6397 for (u = tmp_deps->last_function_call; u; u = XEXP (u, 1))
6399 if (find_insn_list (XEXP (u, 0),
6400 succ_deps->last_function_call))
6401 continue;
6403 succ_deps->last_function_call
6404 = alloc_INSN_LIST (XEXP (u, 0),
6405 succ_deps->last_function_call);
6408 /* last_pending_memory_flush is inherited by bb_succ. */
6409 for (u = tmp_deps->last_pending_memory_flush; u; u = XEXP (u, 1))
6411 if (find_insn_list (XEXP (u, 0),
6412 succ_deps->last_pending_memory_flush))
6413 continue;
6415 succ_deps->last_pending_memory_flush
6416 = alloc_INSN_LIST (XEXP (u, 0),
6417 succ_deps->last_pending_memory_flush);
6420 /* sched_before_next_call is inherited by bb_succ. */
6421 x = LOG_LINKS (tmp_deps->sched_before_next_call);
6422 for (; x; x = XEXP (x, 1))
6423 add_dependence (succ_deps->sched_before_next_call,
6424 XEXP (x, 0), REG_DEP_ANTI);
6426 e = NEXT_OUT (e);
6428 while (e != first_edge);
6431 /* Compute backward dependences inside bb. In a multiple blocks region:
6432 (1) a bb is analyzed after its predecessors, and (2) the lists in
6433 effect at the end of bb (after analyzing for bb) are inherited by
6434 bb's successrs.
6436 Specifically for reg-reg data dependences, the block insns are
6437 scanned by sched_analyze () top-to-bottom. Two lists are
6438 maintained by sched_analyze (): reg_last_sets[] for register DEFs,
6439 and reg_last_uses[] for register USEs.
6441 When analysis is completed for bb, we update for its successors:
6442 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
6443 ; - USES[succ] = Union (USES [succ], DEFS [bb])
6445 The mechanism for computing mem-mem data dependence is very
6446 similar, and the result is interblock dependences in the region. */
6448 static void
6449 compute_block_backward_dependences (bb)
6450 int bb;
6452 int i;
6453 rtx head, tail;
6454 int max_reg = max_reg_num ();
6455 struct deps tmp_deps;
6457 tmp_deps = bb_deps[bb];
6459 /* Do the analysis for this block. */
6460 get_bb_head_tail (bb, &head, &tail);
6461 sched_analyze (&tmp_deps, head, tail);
6462 add_branch_dependences (head, tail);
6464 if (current_nr_blocks > 1)
6465 propagate_deps (bb, &tmp_deps, max_reg);
6467 /* Free up the INSN_LISTs.
6469 Note this loop is executed max_reg * nr_regions times. It's first
6470 implementation accounted for over 90% of the calls to free_INSN_LIST_list.
6471 The list was empty for the vast majority of those calls. On the PA, not
6472 calling free_INSN_LIST_list in those cases improves -O2 compile times by
6473 3-5% on average. */
6474 for (i = 0; i < max_reg; ++i)
6476 if (tmp_deps.reg_last_clobbers[i])
6477 free_INSN_LIST_list (&tmp_deps.reg_last_clobbers[i]);
6478 if (tmp_deps.reg_last_sets[i])
6479 free_INSN_LIST_list (&tmp_deps.reg_last_sets[i]);
6480 if (tmp_deps.reg_last_uses[i])
6481 free_INSN_LIST_list (&tmp_deps.reg_last_uses[i]);
6484 /* Assert that we won't need bb_reg_last_* for this block anymore. */
6485 free (bb_deps[bb].reg_last_uses);
6486 free (bb_deps[bb].reg_last_sets);
6487 free (bb_deps[bb].reg_last_clobbers);
6488 bb_deps[bb].reg_last_uses = 0;
6489 bb_deps[bb].reg_last_sets = 0;
6490 bb_deps[bb].reg_last_clobbers = 0;
6493 /* Print dependences for debugging, callable from debugger. */
6495 void
6496 debug_dependencies ()
6498 int bb;
6500 fprintf (dump, ";; --------------- forward dependences: ------------ \n");
6501 for (bb = 0; bb < current_nr_blocks; bb++)
6503 if (1)
6505 rtx head, tail;
6506 rtx next_tail;
6507 rtx insn;
6509 get_bb_head_tail (bb, &head, &tail);
6510 next_tail = NEXT_INSN (tail);
6511 fprintf (dump, "\n;; --- Region Dependences --- b %d bb %d \n",
6512 BB_TO_BLOCK (bb), bb);
6514 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6515 "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
6516 fprintf (dump, ";; %7s%6s%6s%6s%6s%6s%11s%6s\n",
6517 "----", "----", "--", "---", "----", "----", "--------", "-----");
6518 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
6520 rtx link;
6521 int unit, range;
6523 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
6525 int n;
6526 fprintf (dump, ";; %6d ", INSN_UID (insn));
6527 if (GET_CODE (insn) == NOTE)
6529 n = NOTE_LINE_NUMBER (insn);
6530 if (n < 0)
6531 fprintf (dump, "%s\n", GET_NOTE_INSN_NAME (n));
6532 else
6533 fprintf (dump, "line %d, file %s\n", n,
6534 NOTE_SOURCE_FILE (insn));
6536 else
6537 fprintf (dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
6538 continue;
6541 unit = insn_unit (insn);
6542 range = (unit < 0
6543 || function_units[unit].blockage_range_function == 0) ? 0 :
6544 function_units[unit].blockage_range_function (insn);
6545 fprintf (dump,
6546 ";; %s%5d%6d%6d%6d%6d%6d %3d -%3d ",
6547 (SCHED_GROUP_P (insn) ? "+" : " "),
6548 INSN_UID (insn),
6549 INSN_CODE (insn),
6550 INSN_BB (insn),
6551 INSN_DEP_COUNT (insn),
6552 INSN_PRIORITY (insn),
6553 insn_cost (insn, 0, 0),
6554 (int) MIN_BLOCKAGE_COST (range),
6555 (int) MAX_BLOCKAGE_COST (range));
6556 insn_print_units (insn);
6557 fprintf (dump, "\t: ");
6558 for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
6559 fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
6560 fprintf (dump, "\n");
6564 fprintf (dump, "\n");
6567 /* Set_priorities: compute priority of each insn in the block. */
6569 static int
6570 set_priorities (bb)
6571 int bb;
6573 rtx insn;
6574 int n_insn;
6576 rtx tail;
6577 rtx prev_head;
6578 rtx head;
6580 get_bb_head_tail (bb, &head, &tail);
6581 prev_head = PREV_INSN (head);
6583 if (head == tail
6584 && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
6585 return 0;
6587 n_insn = 0;
6588 for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
6591 if (GET_CODE (insn) == NOTE)
6592 continue;
6594 if (!(SCHED_GROUP_P (insn)))
6595 n_insn++;
6596 (void) priority (insn);
6599 return n_insn;
6602 /* Schedule a region. A region is either an inner loop, a loop-free
6603 subroutine, or a single basic block. Each bb in the region is
6604 scheduled after its flow predecessors. */
6606 static void
6607 schedule_region (rgn)
6608 int rgn;
6610 int bb;
6611 int rgn_n_insns = 0;
6612 int sched_rgn_n_insns = 0;
6614 /* Set variables for the current region. */
6615 current_nr_blocks = RGN_NR_BLOCKS (rgn);
6616 current_blocks = RGN_BLOCKS (rgn);
6618 reg_pending_sets = ALLOCA_REG_SET ();
6619 reg_pending_clobbers = ALLOCA_REG_SET ();
6620 reg_pending_sets_all = 0;
6622 /* Initializations for region data dependence analyisis. */
6623 bb_deps = (struct deps *) xmalloc (sizeof (struct deps) * current_nr_blocks);
6624 for (bb = 0; bb < current_nr_blocks; bb++)
6625 init_deps (bb_deps + bb);
6627 /* Compute LOG_LINKS. */
6628 for (bb = 0; bb < current_nr_blocks; bb++)
6629 compute_block_backward_dependences (bb);
6631 /* Compute INSN_DEPEND. */
6632 for (bb = current_nr_blocks - 1; bb >= 0; bb--)
6633 compute_block_forward_dependences (bb);
6635 /* Delete line notes and set priorities. */
6636 for (bb = 0; bb < current_nr_blocks; bb++)
6638 if (write_symbols != NO_DEBUG)
6640 save_line_notes (bb);
6641 rm_line_notes (bb);
6644 rgn_n_insns += set_priorities (bb);
6647 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
6648 if (current_nr_blocks > 1)
6650 int i;
6652 prob = (float *) xmalloc ((current_nr_blocks) * sizeof (float));
6654 bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
6655 dom = (bbset *) xmalloc (current_nr_blocks * sizeof (bbset));
6656 for (i = 0; i < current_nr_blocks; i++)
6657 dom[i] = (bbset) xcalloc (bbset_size, sizeof (HOST_WIDE_INT));
6659 /* Edge to bit. */
6660 rgn_nr_edges = 0;
6661 edge_to_bit = (int *) xmalloc (nr_edges * sizeof (int));
6662 for (i = 1; i < nr_edges; i++)
6663 if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
6664 EDGE_TO_BIT (i) = rgn_nr_edges++;
6665 rgn_edges = (int *) xmalloc (rgn_nr_edges * sizeof (int));
6667 rgn_nr_edges = 0;
6668 for (i = 1; i < nr_edges; i++)
6669 if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
6670 rgn_edges[rgn_nr_edges++] = i;
6672 /* Split edges. */
6673 edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
6674 pot_split = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6675 ancestor_edges
6676 = (edgeset *) xmalloc (current_nr_blocks * sizeof (edgeset));
6677 for (i = 0; i < current_nr_blocks; i++)
6679 pot_split[i] =
6680 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6681 ancestor_edges[i] =
6682 (edgeset) xcalloc (edgeset_size, sizeof (HOST_WIDE_INT));
6685 /* Compute probabilities, dominators, split_edges. */
6686 for (bb = 0; bb < current_nr_blocks; bb++)
6687 compute_dom_prob_ps (bb);
6690 /* Now we can schedule all blocks. */
6691 for (bb = 0; bb < current_nr_blocks; bb++)
6692 sched_rgn_n_insns += schedule_block (bb, rgn_n_insns);
6694 /* Sanity check: verify that all region insns were scheduled. */
6695 if (sched_rgn_n_insns != rgn_n_insns)
6696 abort ();
6698 /* Restore line notes. */
6699 if (write_symbols != NO_DEBUG)
6701 for (bb = 0; bb < current_nr_blocks; bb++)
6702 restore_line_notes (bb);
6705 /* Done with this region. */
6706 free_pending_lists ();
6708 FREE_REG_SET (reg_pending_sets);
6709 FREE_REG_SET (reg_pending_clobbers);
6711 free (bb_deps);
6713 if (current_nr_blocks > 1)
6715 int i;
6717 free (prob);
6718 for (i = 0; i < current_nr_blocks; ++i)
6720 free (dom[i]);
6721 free (pot_split[i]);
6722 free (ancestor_edges[i]);
6724 free (dom);
6725 free (edge_to_bit);
6726 free (rgn_edges);
6727 free (pot_split);
6728 free (ancestor_edges);
6732 /* The one entry point in this file. DUMP_FILE is the dump file for
6733 this pass. */
6735 void
6736 schedule_insns (dump_file)
6737 FILE *dump_file;
6739 int *deaths_in_region;
6740 sbitmap blocks, large_region_blocks;
6741 int max_uid;
6742 int b;
6743 rtx insn;
6744 int rgn;
6745 int luid;
6746 int any_large_regions;
6748 /* Disable speculative loads in their presence if cc0 defined. */
6749 #ifdef HAVE_cc0
6750 flag_schedule_speculative_load = 0;
6751 #endif
6753 /* Taking care of this degenerate case makes the rest of
6754 this code simpler. */
6755 if (n_basic_blocks == 0)
6756 return;
6758 /* Set dump and sched_verbose for the desired debugging output. If no
6759 dump-file was specified, but -fsched-verbose-N (any N), print to stderr.
6760 For -fsched-verbose-N, N>=10, print everything to stderr. */
6761 sched_verbose = sched_verbose_param;
6762 if (sched_verbose_param == 0 && dump_file)
6763 sched_verbose = 1;
6764 dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
6766 nr_inter = 0;
6767 nr_spec = 0;
6769 /* Initialize issue_rate. */
6770 issue_rate = ISSUE_RATE;
6772 split_all_insns (1);
6774 /* We use LUID 0 for the fake insn (UID 0) which holds dependencies for
6775 pseudos which do not cross calls. */
6776 max_uid = get_max_uid () + 1;
6778 h_i_d = (struct haifa_insn_data *) xcalloc (max_uid, sizeof (*h_i_d));
6780 h_i_d[0].luid = 0;
6781 luid = 1;
6782 for (b = 0; b < n_basic_blocks; b++)
6783 for (insn = BLOCK_HEAD (b);; insn = NEXT_INSN (insn))
6785 INSN_LUID (insn) = luid;
6787 /* Increment the next luid, unless this is a note. We don't
6788 really need separate IDs for notes and we don't want to
6789 schedule differently depending on whether or not there are
6790 line-number notes, i.e., depending on whether or not we're
6791 generating debugging information. */
6792 if (GET_CODE (insn) != NOTE)
6793 ++luid;
6795 if (insn == BLOCK_END (b))
6796 break;
6799 /* ?!? We could save some memory by computing a per-region luid mapping
6800 which could reduce both the number of vectors in the cache and the size
6801 of each vector. Instead we just avoid the cache entirely unless the
6802 average number of instructions in a basic block is very high. See
6803 the comment before the declaration of true_dependency_cache for
6804 what we consider "very high". */
6805 if (luid / n_basic_blocks > 100 * 5)
6807 true_dependency_cache = sbitmap_vector_alloc (luid, luid);
6808 sbitmap_vector_zero (true_dependency_cache, luid);
6811 nr_regions = 0;
6812 rgn_table = (region *) xmalloc ((n_basic_blocks) * sizeof (region));
6813 rgn_bb_table = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6814 block_to_bb = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6815 containing_rgn = (int *) xmalloc ((n_basic_blocks) * sizeof (int));
6817 blocks = sbitmap_alloc (n_basic_blocks);
6818 large_region_blocks = sbitmap_alloc (n_basic_blocks);
6820 compute_bb_for_insn (max_uid);
6822 /* Compute regions for scheduling. */
6823 if (reload_completed
6824 || n_basic_blocks == 1
6825 || !flag_schedule_interblock)
6827 find_single_block_region ();
6829 else
6831 /* Verify that a 'good' control flow graph can be built. */
6832 if (is_cfg_nonregular ())
6834 find_single_block_region ();
6836 else
6838 sbitmap *dom;
6839 struct edge_list *edge_list;
6841 dom = sbitmap_vector_alloc (n_basic_blocks, n_basic_blocks);
6843 /* The scheduler runs after flow; therefore, we can't blindly call
6844 back into find_basic_blocks since doing so could invalidate the
6845 info in global_live_at_start.
6847 Consider a block consisting entirely of dead stores; after life
6848 analysis it would be a block of NOTE_INSN_DELETED notes. If
6849 we call find_basic_blocks again, then the block would be removed
6850 entirely and invalidate our the register live information.
6852 We could (should?) recompute register live information. Doing
6853 so may even be beneficial. */
6854 edge_list = create_edge_list ();
6856 /* Compute the dominators and post dominators. We don't
6857 currently use post dominators, but we should for
6858 speculative motion analysis. */
6859 compute_flow_dominators (dom, NULL);
6861 /* build_control_flow will return nonzero if it detects unreachable
6862 blocks or any other irregularity with the cfg which prevents
6863 cross block scheduling. */
6864 if (build_control_flow (edge_list) != 0)
6865 find_single_block_region ();
6866 else
6867 find_rgns (edge_list, dom);
6869 if (sched_verbose >= 3)
6870 debug_regions ();
6872 /* For now. This will move as more and more of haifa is converted
6873 to using the cfg code in flow.c. */
6874 free (dom);
6878 deaths_in_region = (int *) xmalloc (sizeof(int) * nr_regions);
6880 init_alias_analysis ();
6882 if (write_symbols != NO_DEBUG)
6884 rtx line;
6886 line_note_head = (rtx *) xcalloc (n_basic_blocks, sizeof (rtx));
6888 /* Save-line-note-head:
6889 Determine the line-number at the start of each basic block.
6890 This must be computed and saved now, because after a basic block's
6891 predecessor has been scheduled, it is impossible to accurately
6892 determine the correct line number for the first insn of the block. */
6894 for (b = 0; b < n_basic_blocks; b++)
6895 for (line = BLOCK_HEAD (b); line; line = PREV_INSN (line))
6896 if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
6898 line_note_head[b] = line;
6899 break;
6903 /* Find units used in this fuction, for visualization. */
6904 if (sched_verbose)
6905 init_target_units ();
6907 /* ??? Add a NOTE after the last insn of the last basic block. It is not
6908 known why this is done. */
6910 insn = BLOCK_END (n_basic_blocks - 1);
6911 if (NEXT_INSN (insn) == 0
6912 || (GET_CODE (insn) != NOTE
6913 && GET_CODE (insn) != CODE_LABEL
6914 /* Don't emit a NOTE if it would end up between an unconditional
6915 jump and a BARRIER. */
6916 && !(GET_CODE (insn) == JUMP_INSN
6917 && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
6918 emit_note_after (NOTE_INSN_DELETED, BLOCK_END (n_basic_blocks - 1));
6920 /* Compute INSN_REG_WEIGHT for all blocks. We must do this before
6921 removing death notes. */
6922 for (b = n_basic_blocks - 1; b >= 0; b--)
6923 find_insn_reg_weight (b);
6925 /* Remove all death notes from the subroutine. */
6926 for (rgn = 0; rgn < nr_regions; rgn++)
6928 sbitmap_zero (blocks);
6929 for (b = RGN_NR_BLOCKS (rgn) - 1; b >= 0; --b)
6930 SET_BIT (blocks, rgn_bb_table [RGN_BLOCKS (rgn) + b]);
6932 deaths_in_region[rgn] = count_or_remove_death_notes (blocks, 1);
6935 /* Schedule every region in the subroutine. */
6936 for (rgn = 0; rgn < nr_regions; rgn++)
6937 schedule_region (rgn);
6939 /* Update life analysis for the subroutine. Do single block regions
6940 first so that we can verify that live_at_start didn't change. Then
6941 do all other blocks. */
6942 /* ??? There is an outside possibility that update_life_info, or more
6943 to the point propagate_block, could get called with non-zero flags
6944 more than once for one basic block. This would be kinda bad if it
6945 were to happen, since REG_INFO would be accumulated twice for the
6946 block, and we'd have twice the REG_DEAD notes.
6948 I'm fairly certain that this _shouldn't_ happen, since I don't think
6949 that live_at_start should change at region heads. Not sure what the
6950 best way to test for this kind of thing... */
6952 allocate_reg_life_data ();
6953 compute_bb_for_insn (max_uid);
6955 any_large_regions = 0;
6956 sbitmap_ones (large_region_blocks);
6958 for (rgn = 0; rgn < nr_regions; rgn++)
6959 if (RGN_NR_BLOCKS (rgn) > 1)
6960 any_large_regions = 1;
6961 else
6963 sbitmap_zero (blocks);
6964 SET_BIT (blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
6965 RESET_BIT (large_region_blocks, rgn_bb_table[RGN_BLOCKS (rgn)]);
6967 update_life_info (blocks, UPDATE_LIFE_LOCAL,
6968 PROP_DEATH_NOTES | PROP_REG_INFO);
6970 /* In the single block case, the count of registers that died should
6971 not have changed during the schedule. */
6972 if (count_or_remove_death_notes (blocks, 0) != deaths_in_region[rgn])
6973 abort ();
6976 if (any_large_regions)
6978 update_life_info (large_region_blocks, UPDATE_LIFE_GLOBAL,
6979 PROP_DEATH_NOTES | PROP_REG_INFO);
6982 /* Reposition the prologue and epilogue notes in case we moved the
6983 prologue/epilogue insns. */
6984 if (reload_completed)
6985 reposition_prologue_and_epilogue_notes (get_insns ());
6987 /* Delete redundant line notes. */
6988 if (write_symbols != NO_DEBUG)
6989 rm_redundant_line_notes ();
6991 if (sched_verbose)
6993 if (reload_completed == 0 && flag_schedule_interblock)
6995 fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n",
6996 nr_inter, nr_spec);
6998 else
7000 if (nr_inter > 0)
7001 abort ();
7003 fprintf (dump, "\n\n");
7006 /* Clean up. */
7007 end_alias_analysis ();
7009 if (true_dependency_cache)
7011 free (true_dependency_cache);
7012 true_dependency_cache = NULL;
7014 free (rgn_table);
7015 free (rgn_bb_table);
7016 free (block_to_bb);
7017 free (containing_rgn);
7019 free (h_i_d);
7021 if (write_symbols != NO_DEBUG)
7022 free (line_note_head);
7024 if (edge_table)
7026 free (edge_table);
7027 edge_table = NULL;
7030 if (in_edges)
7032 free (in_edges);
7033 in_edges = NULL;
7035 if (out_edges)
7037 free (out_edges);
7038 out_edges = NULL;
7041 sbitmap_free (blocks);
7042 sbitmap_free (large_region_blocks);
7044 free (deaths_in_region);
7047 #endif /* INSN_SCHEDULING */