1 (***********************************************************************)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. All rights reserved. This file is distributed *)
9 (* under the terms of the Q Public License version 1.0. *)
11 (***********************************************************************)
15 (* Instruction scheduling *)
22 (* Representation of the code DAG. *)
25 { instr
: instruction
; (* The instruction *)
26 delay
: int; (* How many cycles before result is available *)
27 mutable sons
: (code_dag_node
* int) list
;
28 (* Instructions that depend on it *)
29 mutable date
: int; (* Start date *)
30 mutable length
: int; (* Length of longest path to result *)
31 mutable ancestors
: int; (* Number of ancestors *)
32 mutable emitted_ancestors
: int } (* Number of emitted ancestors *)
35 { instr
= end_instr
; delay
= 0; sons
= []; date
= 0;
36 length
= -1; ancestors
= 0; emitted_ancestors
= 0 }
38 (* The code dag itself is represented by two tables from registers to nodes:
39 - "results" maps registers to the instructions that produced them;
40 - "uses" maps registers to the instructions that use them.
42 - code_stores contains the latest store nodes emitted so far
43 - code_loads contains all load nodes emitted since the last store
44 - code_checkbounds contains the latest checkbound node not matched
45 by a subsequent load or store. *)
47 let code_results = (Hashtbl.create
31 : (location
, code_dag_node
) Hashtbl.t
)
48 let code_uses = (Hashtbl.create
31 : (location
, code_dag_node
) Hashtbl.t
)
49 let code_stores = ref ([] : code_dag_node list
)
50 let code_loads = ref ([] : code_dag_node list
)
51 let code_checkbounds = ref ([] : code_dag_node list
)
53 let clear_code_dag () =
54 Hashtbl.clear
code_results;
55 Hashtbl.clear
code_uses;
58 code_checkbounds := []
60 (* Add an edge to the code DAG *)
62 let add_edge ancestor son delay
=
63 ancestor
.sons
<- (son
, delay
) :: ancestor
.sons
;
64 son
.ancestors
<- son
.ancestors
+ 1
66 let add_edge_after son ancestor
= add_edge ancestor son
0
68 (* Compute length of longest path to a result.
69 For leafs of the DAG, see whether their result is used in the instruction
70 immediately following the basic block (a "critical" output). *)
72 let is_critical critical_outputs results
=
74 for i
= 0 to Array.length results
- 1 do
75 let r = results
.(i
).loc
in
76 for j
= 0 to Array.length critical_outputs
- 1 do
77 if critical_outputs
.(j
).loc
= r then raise Exit
84 let rec longest_path critical_outputs node
=
85 if node
.length
< 0 then begin
89 if is_critical critical_outputs node
.instr
.res
90 || node
.instr
.desc
= Lreloadretaddr
(* alway critical *)
96 (fun len
(son
, delay
) ->
97 max len
(longest_path critical_outputs son
+ delay
))
102 (* Remove an instruction from the ready queue *)
104 let rec remove_instr node
= function
107 if instr
== node
then rem
else instr
:: remove_instr node rem
109 (* We treat Lreloadretaddr as a word-sized load *)
111 let some_load = (Iload
(Cmm.Word
, Arch.identity_addressing
))
113 (* The generic scheduler *)
115 class virtual scheduler_generic
= object (self
)
117 (* Determine whether an operation ends a basic block or not.
118 Can be overriden for some processors to signal specific instructions
119 that terminate a basic block. *)
121 method oper_in_basic_block
= function
123 | Icall_imm _
-> false
124 | Itailcall_ind
-> false
125 | Itailcall_imm _
-> false
126 | Iextcall _
-> false
127 | Istackoffset _
-> false
131 (* Determine whether an instruction ends a basic block or not *)
133 method private instr_in_basic_block instr
=
134 match instr
.desc
with
135 Lop op
-> self#oper_in_basic_block op
136 | Lreloadretaddr
-> true
139 (* Determine whether an operation is a memory store or a memory load.
140 Can be overriden for some processors to signal specific
141 load or store instructions (e.g. on the I386). *)
143 method is_store
= function
147 method is_load
= function
151 method is_checkbound
= function
152 Iintop Icheckbound
-> true
153 | Iintop_imm
(Icheckbound
, _
) -> true
156 method private instr_is_store instr
=
157 match instr
.desc
with
158 Lop op
-> self#is_store op
161 method private instr_is_load instr
=
162 match instr
.desc
with
163 Lop op
-> self#is_load op
166 method private instr_is_checkbound instr
=
167 match instr
.desc
with
168 Lop op
-> self#is_checkbound op
171 (* Estimate the latency of an operation. *)
173 method virtual oper_latency
: Mach.operation
-> int
175 (* Estimate the latency of a Lreloadretaddr operation. *)
177 method reload_retaddr_latency
= self#oper_latency
some_load
179 (* Estimate the delay needed to evaluate an instruction *)
181 method private instr_latency instr
=
182 match instr
.desc
with
183 Lop op
-> self#oper_latency op
184 | Lreloadretaddr
-> self#reload_retaddr_latency
187 (* Estimate the number of cycles consumed by emitting an operation. *)
189 method virtual oper_issue_cycles
: Mach.operation
-> int
191 (* Estimate the number of cycles consumed by emitting a Lreloadretaddr. *)
193 method reload_retaddr_issue_cycles
= self#oper_issue_cycles
some_load
195 (* Estimate the number of cycles consumed by emitting an instruction. *)
197 method private instr_issue_cycles instr
=
198 match instr
.desc
with
199 Lop op
-> self#oper_issue_cycles op
200 | Lreloadretaddr
-> self#reload_retaddr_issue_cycles
203 (* Add an instruction to the code dag *)
205 method private add_instruction ready_queue instr
=
206 let delay = self#instr_latency instr
in
214 emitted_ancestors
= 0 } in
215 (* Add edges from all instructions that define one of the registers used
216 (RAW dependencies) *)
217 for i
= 0 to Array.length instr
.arg
- 1 do
219 let ancestor = Hashtbl.find
code_results instr
.arg
.(i
).loc
in
220 add_edge ancestor node ancestor.delay
224 (* Also add edges from all instructions that use one of the result regs
225 of this instruction (WAR dependencies). *)
226 for i
= 0 to Array.length instr
.res
- 1 do
227 let ancestors = Hashtbl.find_all
code_uses instr
.res
.(i
).loc
in
228 List.iter
(add_edge_after node) ancestors
230 (* Also add edges from all instructions that have already defined one
231 of the results of this instruction (WAW dependencies). *)
232 for i
= 0 to Array.length instr
.res
- 1 do
234 let ancestor = Hashtbl.find
code_results instr
.res
.(i
).loc
in
235 add_edge ancestor node 0
239 (* If this is a load, add edges from the most recent store viewed so
240 far (if any) and remember the load. Also add edges from the most
241 recent checkbound and forget that checkbound. *)
242 if self#instr_is_load instr
then begin
243 List.iter
(add_edge_after node) !code_stores;
244 code_loads := node :: !code_loads;
245 List.iter
(add_edge_after node) !code_checkbounds;
246 code_checkbounds := []
248 (* If this is a store, add edges from the most recent store,
249 as well as all loads viewed since then, and also the most recent
250 checkbound. Remember the store,
251 discarding the previous stores, loads and checkbounds. *)
252 else if self#instr_is_store instr
then begin
253 List.iter
(add_edge_after node) !code_stores;
254 List.iter
(add_edge_after node) !code_loads;
255 List.iter
(add_edge_after node) !code_checkbounds;
256 code_stores := [node];
258 code_checkbounds := []
260 else if self#instr_is_checkbound instr
then begin
261 code_checkbounds := [node]
263 (* Remember the registers used and produced by this instruction *)
264 for i
= 0 to Array.length instr
.res
- 1 do
265 Hashtbl.add
code_results instr
.res
.(i
).loc
node
267 for i
= 0 to Array.length instr
.arg
- 1 do
268 Hashtbl.add
code_uses instr
.arg
.(i
).loc
node
270 (* If this is a root instruction (all arguments already computed),
271 add it to the ready queue *)
272 if node.ancestors = 0 then node :: ready_queue
else ready_queue
274 (* Given a list of instructions and a date, choose one or several
275 that are ready to be computed (start date <= current date)
276 and that we can emit in one cycle. Favor instructions with
277 maximal distance to result. If we can't find any, return None.
278 This does not take multiple issues into account, though. *)
280 method private ready_instruction date queue
=
281 let rec extract best
= function
283 if best
== dummy_node then None
else Some best
286 if instr
.date
<= date
&& instr
.length
> best
.length
287 then instr
else best
in
288 extract new_best rem
in
289 extract dummy_node queue
291 (* Schedule a basic block, adding its instructions in front of the given
292 instruction sequence *)
294 method private reschedule ready_queue date cont
=
295 if ready_queue
= [] then cont
else begin
296 match self#ready_instruction date ready_queue
with
298 self#reschedule ready_queue
(date
+ 1) cont
300 (* Remove node from queue *)
301 let new_queue = ref (remove_instr node ready_queue
) in
302 (* Update the start date and number of ancestors emitted of
303 all descendents of this node. Enter those that become ready
305 let issue_cycles = self#instr_issue_cycles
node.instr
in
308 let completion_date = date
+ issue_cycles + delay - 1 in
309 if son
.date
< completion_date then son
.date
<- completion_date;
310 son
.emitted_ancestors
<- son
.emitted_ancestors
+ 1;
311 if son
.emitted_ancestors
= son
.ancestors then
312 new_queue := son
:: !new_queue)
314 instr_cons
node.instr
.desc
node.instr
.arg
node.instr
.res
315 (self#reschedule
!new_queue (date
+ issue_cycles) cont
)
319 (* Don't bother to schedule for initialization code and the like. *)
321 method schedule_fundecl f
=
327 if self#instr_in_basic_block i
then begin
331 { i
with next
= schedule i
.next
}
333 and schedule_block ready_queue i
=
334 if self#instr_in_basic_block i
then
335 schedule_block
(self#add_instruction ready_queue i
) i
.next
337 let critical_outputs =
339 Lop
(Icall_ind
| Itailcall_ind
) -> [| i
.arg
.(0) |]
340 | Lop
(Icall_imm _
| Itailcall_imm _
| Iextcall _
) -> [||]
343 List.iter
(fun x
-> ignore
(longest_path critical_outputs x
)) ready_queue
;
344 self#reschedule ready_queue
0 (schedule i
)
347 if f
.fun_fast
then begin
348 let new_body = schedule f
.fun_body
in
350 { fun_name
= f
.fun_name
;
352 fun_fast
= f
.fun_fast
}