1 ;; AMD Athlon Scheduling
3 ;; The Athlon does contain three pipelined FP units, three integer units and
4 ;; three address generation units.
6 ;; The predecode logic is determining boundaries of instructions in the 64
7 ;; byte cache line. So the cache line straddling problem of K6 might be issue
8 ;; here as well, but it is not noted in the documentation.
10 ;; Three DirectPath instructions decoders and only one VectorPath decoder
11 ;; is available. They can decode three DirectPath instructions or one VectorPath
12 ;; instruction per cycle.
13 ;; Decoded macro instructions are then passed to 72 entry instruction control
15 ;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
17 ;; The load/store queue unit is not attached to the schedulers but
18 ;; communicates with all the execution units separately instead.
20 (define_attr "athlon_decode" "direct,vector,double"
21 (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
22 (const_string "vector")
23 (and (eq_attr "type" "push")
24 (match_operand 1 "memory_operand" ""))
25 (const_string "vector")
26 (and (eq_attr "type" "fmov")
27 (and (eq_attr "memory" "load,store")
28 (eq_attr "mode" "XF")))
29 (const_string "vector")]
30 (const_string "direct")))
33 ;; decode0 decode1 decode2
35 ;; instruction control unit (72 entry scheduler)
37 ;; integer scheduler (18) stack map
38 ;; / | | | | \ stack rename
39 ;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler
40 ;; | agu0 | agu1 agu2 register file
42 ;; \ /\ | / fadd fmul fstore
43 ;; \ / \ | / fadd fmul fstore
44 ;; imul load/store (2x) fadd fmul fstore
46 (define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
47 (define_cpu_unit "athlon-decode0" "athlon")
48 (define_cpu_unit "athlon-decode1" "athlon")
49 (define_cpu_unit "athlon-decode2" "athlon")
50 (define_cpu_unit "athlon-decodev" "athlon")
51 ;; Model the fact that double decoded instruction may take 2 cycles
52 ;; to decode when decoder2 and decoder0 in next cycle
53 ;; is used (this is needed to allow troughput of 1.5 double decoded
54 ;; instructions per cycle).
56 ;; In order to avoid dependence between reservation of decoder
57 ;; and other units, we model decoder as two stage fully pipelined unit
58 ;; and only double decoded instruction may occupy unit in the first cycle.
59 ;; With this scheme however two double instructions can be issued cycle0.
61 ;; Avoid this by using presence set requiring decoder0 to be allocated
62 ;; too. Vector decoded instructions then can't be issued when
63 ;; modeled as consuming decoder0+decoder1+decoder2.
64 ;; We solve that by specialized vector decoder unit and exclusion set.
65 (presence_set "athlon-decode2" "athlon-decode0")
66 (exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
67 (define_reservation "athlon-vector" "nothing,athlon-decodev")
68 (define_reservation "athlon-direct0" "nothing,athlon-decode0")
69 (define_reservation "athlon-direct" "nothing,
70 (athlon-decode0 | athlon-decode1
72 ;; Double instructions behaves like two direct instructions.
73 (define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
74 | (nothing,(athlon-decode0 + athlon-decode1))
75 | (nothing,(athlon-decode1 + athlon-decode2)))")
77 ;; Agu and ieu unit results in extremely large automatons and
78 ;; in our approximation they are hardly filled in. Only ieu
79 ;; unit can, as issue rate is 3 and agu unit is always used
80 ;; first in the insn reservations. Skip the models.
82 ;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
83 ;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
84 ;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
85 ;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
86 (define_reservation "athlon-ieu" "nothing")
87 (define_cpu_unit "athlon-ieu0" "athlon")
88 ;(define_cpu_unit "athlon-agu0" "athlon_agu")
89 ;(define_cpu_unit "athlon-agu1" "athlon_agu")
90 ;(define_cpu_unit "athlon-agu2" "athlon_agu")
91 ;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
92 (define_reservation "athlon-agu" "nothing")
94 (define_cpu_unit "athlon-mult" "athlon_mult")
96 (define_cpu_unit "athlon-load0" "athlon_load")
97 (define_cpu_unit "athlon-load1" "athlon_load")
98 (define_reservation "athlon-load" "athlon-agu,
99 (athlon-load0 | athlon-load1),nothing")
100 ;; 128bit SSE instructions issue two loads at once
101 (define_reservation "athlon-load2" "athlon-agu,
102 (athlon-load0 + athlon-load1),nothing")
104 (define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
105 ;; 128bit SSE instructions issue two stores at once
106 (define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
109 ;; The FP operations start to execute at stage 12 in the pipeline, while
110 ;; integer operations start to execute at stage 9 for Athlon and 11 for K8
111 ;; Compensate the difference for Athlon because it results in significantly
113 (define_reservation "athlon-fpsched" "nothing,nothing,nothing")
114 ;; The floating point loads.
115 (define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
116 (define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
117 (define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
118 (define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
121 ;; The three fp units are fully pipelined with latency of 3
122 (define_cpu_unit "athlon-fadd" "athlon_fp")
123 (define_cpu_unit "athlon-fmul" "athlon_fp")
124 (define_cpu_unit "athlon-fstore" "athlon_fp")
125 (define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
126 (define_reservation "athlon-faddmul" "(athlon-fmul | athlon-fadd)")
128 ;; Vector operations usually consume many of pipes.
129 (define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
132 ;; Jump instructions are executed in the branch unit completely transparent to us
133 (define_insn_reservation "athlon_branch" 0
134 (and (eq_attr "cpu" "athlon,k8")
135 (eq_attr "type" "ibr"))
136 "athlon-direct,athlon-ieu")
137 (define_insn_reservation "athlon_call" 0
138 (and (eq_attr "cpu" "athlon,k8")
139 (eq_attr "type" "call,callv"))
140 "athlon-vector,athlon-ieu")
142 ;; Latency of push operation is 3 cycles, but ESP value is available
144 (define_insn_reservation "athlon_push" 2
145 (and (eq_attr "cpu" "athlon,k8")
146 (eq_attr "type" "push"))
147 "athlon-direct,athlon-agu,athlon-store")
148 (define_insn_reservation "athlon_pop" 4
149 (and (eq_attr "cpu" "athlon,k8")
150 (eq_attr "type" "pop"))
151 "athlon-vector,athlon-load,athlon-ieu")
152 (define_insn_reservation "athlon_pop_k8" 3
153 (and (eq_attr "cpu" "k8")
154 (eq_attr "type" "pop"))
155 "athlon-double,(athlon-ieu+athlon-load)")
156 (define_insn_reservation "athlon_leave" 3
157 (and (eq_attr "cpu" "athlon")
158 (eq_attr "type" "leave"))
159 "athlon-vector,(athlon-ieu+athlon-load)")
160 (define_insn_reservation "athlon_leave_k8" 3
161 (and (eq_attr "cpu" "k8")
162 (eq_attr "type" "leave"))
163 "athlon-double,(athlon-ieu+athlon-load)")
165 ;; Lea executes in AGU unit with 2 cycles latency.
166 (define_insn_reservation "athlon_lea" 2
167 (and (eq_attr "cpu" "athlon,k8")
168 (eq_attr "type" "lea"))
169 "athlon-direct,athlon-agu,nothing")
171 ;; Mul executes in special multiplier unit attached to IEU0
172 (define_insn_reservation "athlon_imul" 5
173 (and (eq_attr "cpu" "athlon")
174 (and (eq_attr "type" "imul")
175 (eq_attr "memory" "none,unknown")))
176 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
177 ;; ??? Widening multiply is vector or double.
178 (define_insn_reservation "athlon_imul_k8_DI" 4
179 (and (eq_attr "cpu" "k8")
180 (and (eq_attr "type" "imul")
181 (and (eq_attr "mode" "DI")
182 (eq_attr "memory" "none,unknown"))))
183 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
184 (define_insn_reservation "athlon_imul_k8" 3
185 (and (eq_attr "cpu" "k8")
186 (and (eq_attr "type" "imul")
187 (eq_attr "memory" "none,unknown")))
188 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
189 (define_insn_reservation "athlon_imul_mem" 8
190 (and (eq_attr "cpu" "athlon")
191 (and (eq_attr "type" "imul")
192 (eq_attr "memory" "load,both")))
193 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
194 (define_insn_reservation "athlon_imul_mem_k8_DI" 7
195 (and (eq_attr "cpu" "k8")
196 (and (eq_attr "type" "imul")
197 (and (eq_attr "mode" "DI")
198 (eq_attr "memory" "load,both"))))
199 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
200 (define_insn_reservation "athlon_imul_mem_k8" 6
201 (and (eq_attr "cpu" "k8")
202 (and (eq_attr "type" "imul")
203 (eq_attr "memory" "load,both")))
204 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
206 ;; Idiv cannot execute in parallel with other instructions. Dealing with it
207 ;; as with short latency vector instruction is good approximation avoiding
208 ;; scheduler from trying too hard to can hide it's latency by overlap with
209 ;; other instructions.
210 ;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
213 (define_insn_reservation "athlon_idiv" 6
214 (and (eq_attr "cpu" "athlon,k8")
215 (and (eq_attr "type" "idiv")
216 (eq_attr "memory" "none,unknown")))
217 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
218 (define_insn_reservation "athlon_idiv_mem" 9
219 (and (eq_attr "cpu" "athlon,k8")
220 (and (eq_attr "type" "idiv")
221 (eq_attr "memory" "load,both")))
222 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
223 ;; The parallelism of string instructions is not documented. Model it same way
224 ;; as idiv to create smaller automata. This probably does not matter much.
225 (define_insn_reservation "athlon_str" 6
226 (and (eq_attr "cpu" "athlon,k8")
227 (and (eq_attr "type" "str")
228 (eq_attr "memory" "load,both,store")))
229 "athlon-vector,athlon-load,athlon-ieu0*6")
231 (define_insn_reservation "athlon_idirect" 1
232 (and (eq_attr "cpu" "athlon,k8")
233 (and (eq_attr "athlon_decode" "direct")
234 (and (eq_attr "unit" "integer,unknown")
235 (eq_attr "memory" "none,unknown"))))
236 "athlon-direct,athlon-ieu")
237 (define_insn_reservation "athlon_ivector" 2
238 (and (eq_attr "cpu" "athlon,k8")
239 (and (eq_attr "athlon_decode" "vector")
240 (and (eq_attr "unit" "integer,unknown")
241 (eq_attr "memory" "none,unknown"))))
242 "athlon-vector,athlon-ieu,athlon-ieu")
243 (define_insn_reservation "athlon_idirect_loadmov" 3
244 (and (eq_attr "cpu" "athlon,k8")
245 (and (eq_attr "type" "imov")
246 (eq_attr "memory" "load")))
247 "athlon-direct,athlon-load")
248 (define_insn_reservation "athlon_idirect_load" 4
249 (and (eq_attr "cpu" "athlon,k8")
250 (and (eq_attr "athlon_decode" "direct")
251 (and (eq_attr "unit" "integer,unknown")
252 (eq_attr "memory" "load"))))
253 "athlon-direct,athlon-load,athlon-ieu")
254 (define_insn_reservation "athlon_ivector_load" 6
255 (and (eq_attr "cpu" "athlon,k8")
256 (and (eq_attr "athlon_decode" "vector")
257 (and (eq_attr "unit" "integer,unknown")
258 (eq_attr "memory" "load"))))
259 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
260 (define_insn_reservation "athlon_idirect_movstore" 1
261 (and (eq_attr "cpu" "athlon,k8")
262 (and (eq_attr "type" "imov")
263 (eq_attr "memory" "store")))
264 "athlon-direct,athlon-agu,athlon-store")
265 (define_insn_reservation "athlon_idirect_both" 4
266 (and (eq_attr "cpu" "athlon,k8")
267 (and (eq_attr "athlon_decode" "direct")
268 (and (eq_attr "unit" "integer,unknown")
269 (eq_attr "memory" "both"))))
270 "athlon-direct,athlon-load,
271 athlon-ieu,athlon-store,
273 (define_insn_reservation "athlon_ivector_both" 6
274 (and (eq_attr "cpu" "athlon,k8")
275 (and (eq_attr "athlon_decode" "vector")
276 (and (eq_attr "unit" "integer,unknown")
277 (eq_attr "memory" "both"))))
278 "athlon-vector,athlon-load,
282 (define_insn_reservation "athlon_idirect_store" 1
283 (and (eq_attr "cpu" "athlon,k8")
284 (and (eq_attr "athlon_decode" "direct")
285 (and (eq_attr "unit" "integer,unknown")
286 (eq_attr "memory" "store"))))
287 "athlon-direct,(athlon-ieu+athlon-agu),
289 (define_insn_reservation "athlon_ivector_store" 2
290 (and (eq_attr "cpu" "athlon,k8")
291 (and (eq_attr "athlon_decode" "vector")
292 (and (eq_attr "unit" "integer,unknown")
293 (eq_attr "memory" "store"))))
294 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
297 ;; Athlon floatin point unit
298 (define_insn_reservation "athlon_fldxf" 12
299 (and (eq_attr "cpu" "athlon")
300 (and (eq_attr "type" "fmov")
301 (and (eq_attr "memory" "load")
302 (eq_attr "mode" "XF"))))
303 "athlon-vector,athlon-fpload2,athlon-fvector*9")
304 (define_insn_reservation "athlon_fldxf_k8" 13
305 (and (eq_attr "cpu" "k8")
306 (and (eq_attr "type" "fmov")
307 (and (eq_attr "memory" "load")
308 (eq_attr "mode" "XF"))))
309 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
310 ;; Assume superforwarding to take place so effective latency of fany op is 0.
311 (define_insn_reservation "athlon_fld" 0
312 (and (eq_attr "cpu" "athlon")
313 (and (eq_attr "type" "fmov")
314 (eq_attr "memory" "load")))
315 "athlon-direct,athlon-fpload,athlon-fany")
316 (define_insn_reservation "athlon_fld_k8" 2
317 (and (eq_attr "cpu" "k8")
318 (and (eq_attr "type" "fmov")
319 (eq_attr "memory" "load")))
320 "athlon-direct,athlon-fploadk8,athlon-fstore")
322 (define_insn_reservation "athlon_fstxf" 10
323 (and (eq_attr "cpu" "athlon")
324 (and (eq_attr "type" "fmov")
325 (and (eq_attr "memory" "store,both")
326 (eq_attr "mode" "XF"))))
327 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
328 (define_insn_reservation "athlon_fstxf_k8" 8
329 (and (eq_attr "cpu" "k8")
330 (and (eq_attr "type" "fmov")
331 (and (eq_attr "memory" "store,both")
332 (eq_attr "mode" "XF"))))
333 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
334 (define_insn_reservation "athlon_fst" 4
335 (and (eq_attr "cpu" "athlon")
336 (and (eq_attr "type" "fmov")
337 (eq_attr "memory" "store,both")))
338 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
339 (define_insn_reservation "athlon_fst_k8" 2
340 (and (eq_attr "cpu" "k8")
341 (and (eq_attr "type" "fmov")
342 (eq_attr "memory" "store,both")))
343 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
344 (define_insn_reservation "athlon_fist" 4
345 (and (eq_attr "cpu" "athlon,k8")
346 (eq_attr "type" "fistp,fisttp"))
347 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
348 (define_insn_reservation "athlon_fmov" 2
349 (and (eq_attr "cpu" "athlon,k8")
350 (eq_attr "type" "fmov"))
351 "athlon-direct,athlon-fpsched,athlon-faddmul")
352 (define_insn_reservation "athlon_fadd_load" 4
353 (and (eq_attr "cpu" "athlon")
354 (and (eq_attr "type" "fop")
355 (eq_attr "memory" "load")))
356 "athlon-direct,athlon-fpload,athlon-fadd")
357 (define_insn_reservation "athlon_fadd_load_k8" 6
358 (and (eq_attr "cpu" "k8")
359 (and (eq_attr "type" "fop")
360 (eq_attr "memory" "load")))
361 "athlon-direct,athlon-fploadk8,athlon-fadd")
362 (define_insn_reservation "athlon_fadd" 4
363 (and (eq_attr "cpu" "athlon,k8")
364 (eq_attr "type" "fop"))
365 "athlon-direct,athlon-fpsched,athlon-fadd")
366 (define_insn_reservation "athlon_fmul_load" 4
367 (and (eq_attr "cpu" "athlon")
368 (and (eq_attr "type" "fmul")
369 (eq_attr "memory" "load")))
370 "athlon-direct,athlon-fpload,athlon-fmul")
371 (define_insn_reservation "athlon_fmul_load_k8" 6
372 (and (eq_attr "cpu" "k8")
373 (and (eq_attr "type" "fmul")
374 (eq_attr "memory" "load")))
375 "athlon-direct,athlon-fploadk8,athlon-fmul")
376 (define_insn_reservation "athlon_fmul" 4
377 (and (eq_attr "cpu" "athlon,k8")
378 (eq_attr "type" "fmul"))
379 "athlon-direct,athlon-fpsched,athlon-fmul")
380 (define_insn_reservation "athlon_fsgn" 2
381 (and (eq_attr "cpu" "athlon,k8")
382 (eq_attr "type" "fsgn"))
383 "athlon-direct,athlon-fpsched,athlon-fmul")
384 (define_insn_reservation "athlon_fdiv_load" 24
385 (and (eq_attr "cpu" "athlon")
386 (and (eq_attr "type" "fdiv")
387 (eq_attr "memory" "load")))
388 "athlon-direct,athlon-fpload,athlon-fmul")
389 (define_insn_reservation "athlon_fdiv_load_k8" 13
390 (and (eq_attr "cpu" "k8")
391 (and (eq_attr "type" "fdiv")
392 (eq_attr "memory" "load")))
393 "athlon-direct,athlon-fploadk8,athlon-fmul")
394 (define_insn_reservation "athlon_fdiv" 24
395 (and (eq_attr "cpu" "athlon")
396 (eq_attr "type" "fdiv"))
397 "athlon-direct,athlon-fpsched,athlon-fmul")
398 (define_insn_reservation "athlon_fdiv_k8" 11
399 (and (eq_attr "cpu" "k8")
400 (eq_attr "type" "fdiv"))
401 "athlon-direct,athlon-fpsched,athlon-fmul")
402 (define_insn_reservation "athlon_fpspc_load" 103
403 (and (eq_attr "cpu" "athlon,k8")
404 (and (eq_attr "type" "fpspc")
405 (eq_attr "memory" "load")))
406 "athlon-vector,athlon-fpload,athlon-fvector")
407 (define_insn_reservation "athlon_fpspc" 100
408 (and (eq_attr "cpu" "athlon,k8")
409 (eq_attr "type" "fpspc"))
410 "athlon-vector,athlon-fpsched,athlon-fvector")
411 (define_insn_reservation "athlon_fcmov_load" 7
412 (and (eq_attr "cpu" "athlon")
413 (and (eq_attr "type" "fcmov")
414 (eq_attr "memory" "load")))
415 "athlon-vector,athlon-fpload,athlon-fvector")
416 (define_insn_reservation "athlon_fcmov" 7
417 (and (eq_attr "cpu" "athlon")
418 (eq_attr "type" "fcmov"))
419 "athlon-vector,athlon-fpsched,athlon-fvector")
420 (define_insn_reservation "athlon_fcmov_load_k8" 17
421 (and (eq_attr "cpu" "k8")
422 (and (eq_attr "type" "fcmov")
423 (eq_attr "memory" "load")))
424 "athlon-vector,athlon-fploadk8,athlon-fvector")
425 (define_insn_reservation "athlon_fcmov_k8" 15
426 (and (eq_attr "cpu" "k8")
427 (eq_attr "type" "fcmov"))
428 "athlon-vector,athlon-fpsched,athlon-fvector")
429 ;; fcomi is vector decoded by uses only one pipe.
430 (define_insn_reservation "athlon_fcomi_load" 3
431 (and (eq_attr "cpu" "athlon")
432 (and (eq_attr "type" "fcmp")
433 (and (eq_attr "athlon_decode" "vector")
434 (eq_attr "memory" "load"))))
435 "athlon-vector,athlon-fpload,athlon-fadd")
436 (define_insn_reservation "athlon_fcomi_load_k8" 5
437 (and (eq_attr "cpu" "k8")
438 (and (eq_attr "type" "fcmp")
439 (and (eq_attr "athlon_decode" "vector")
440 (eq_attr "memory" "load"))))
441 "athlon-vector,athlon-fploadk8,athlon-fadd")
442 (define_insn_reservation "athlon_fcomi" 3
443 (and (eq_attr "cpu" "athlon,k8")
444 (and (eq_attr "athlon_decode" "vector")
445 (eq_attr "type" "fcmp")))
446 "athlon-vector,athlon-fpsched,athlon-fadd")
447 (define_insn_reservation "athlon_fcom_load" 2
448 (and (eq_attr "cpu" "athlon")
449 (and (eq_attr "type" "fcmp")
450 (eq_attr "memory" "load")))
451 "athlon-direct,athlon-fpload,athlon-fadd")
452 (define_insn_reservation "athlon_fcom_load_k8" 4
453 (and (eq_attr "cpu" "k8")
454 (and (eq_attr "type" "fcmp")
455 (eq_attr "memory" "load")))
456 "athlon-direct,athlon-fploadk8,athlon-fadd")
457 (define_insn_reservation "athlon_fcom" 2
458 (and (eq_attr "cpu" "athlon,k8")
459 (eq_attr "type" "fcmp"))
460 "athlon-direct,athlon-fpsched,athlon-fadd")
461 ;; Never seen by the scheduler because we still don't do post reg-stack
463 ;(define_insn_reservation "athlon_fxch" 2
464 ; (and (eq_attr "cpu" "athlon,k8")
465 ; (eq_attr "type" "fxch"))
466 ; "athlon-direct,athlon-fpsched,athlon-fany")
468 ;; Athlon handle MMX operations in the FPU unit with shorter latencies
470 (define_insn_reservation "athlon_movlpd_load" 0
471 (and (eq_attr "cpu" "athlon")
472 (and (eq_attr "type" "ssemov")
473 (match_operand:DF 1 "memory_operand" "")))
474 "athlon-direct,athlon-fpload,athlon-fany")
475 (define_insn_reservation "athlon_movlpd_load_k8" 2
476 (and (eq_attr "cpu" "k8")
477 (and (eq_attr "type" "ssemov")
478 (match_operand:DF 1 "memory_operand" "")))
479 "athlon-direct,athlon-fploadk8,athlon-fstore")
480 (define_insn_reservation "athlon_movaps_load_k8" 2
481 (and (eq_attr "cpu" "k8")
482 (and (eq_attr "type" "ssemov")
483 (and (eq_attr "mode" "V4SF,V2DF,TI")
484 (eq_attr "memory" "load"))))
485 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
486 (define_insn_reservation "athlon_movaps_load" 0
487 (and (eq_attr "cpu" "athlon")
488 (and (eq_attr "type" "ssemov")
489 (and (eq_attr "mode" "V4SF,V2DF,TI")
490 (eq_attr "memory" "load"))))
491 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
492 (define_insn_reservation "athlon_movss_load" 1
493 (and (eq_attr "cpu" "athlon")
494 (and (eq_attr "type" "ssemov")
495 (and (eq_attr "mode" "SF,DI")
496 (eq_attr "memory" "load"))))
497 "athlon-vector,athlon-fpload,(athlon-fany*2)")
498 (define_insn_reservation "athlon_movss_load_k8" 1
499 (and (eq_attr "cpu" "k8")
500 (and (eq_attr "type" "ssemov")
501 (and (eq_attr "mode" "SF,DI")
502 (eq_attr "memory" "load"))))
503 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
504 (define_insn_reservation "athlon_mmxsseld" 0
505 (and (eq_attr "cpu" "athlon")
506 (and (eq_attr "type" "mmxmov,ssemov")
507 (eq_attr "memory" "load")))
508 "athlon-direct,athlon-fpload,athlon-fany")
509 (define_insn_reservation "athlon_mmxsseld_k8" 2
510 (and (eq_attr "cpu" "k8")
511 (and (eq_attr "type" "mmxmov,ssemov")
512 (eq_attr "memory" "load")))
513 "athlon-direct,athlon-fploadk8,athlon-fstore")
514 (define_insn_reservation "athlon_mmxssest" 3
515 (and (eq_attr "cpu" "k8")
516 (and (eq_attr "type" "mmxmov,ssemov")
517 (and (eq_attr "mode" "V4SF,V2DF,TI")
518 (eq_attr "memory" "store,both"))))
519 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
520 (define_insn_reservation "athlon_mmxssest_k8" 3
521 (and (eq_attr "cpu" "k8")
522 (and (eq_attr "type" "mmxmov,ssemov")
523 (and (eq_attr "mode" "V4SF,V2DF,TI")
524 (eq_attr "memory" "store,both"))))
525 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
526 (define_insn_reservation "athlon_mmxssest_short" 2
527 (and (eq_attr "cpu" "athlon,k8")
528 (and (eq_attr "type" "mmxmov,ssemov")
529 (eq_attr "memory" "store,both")))
530 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
531 (define_insn_reservation "athlon_movaps" 2
532 (and (eq_attr "cpu" "k8")
533 (and (eq_attr "type" "ssemov")
534 (eq_attr "mode" "V4SF,V2DF,TI")))
535 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
536 (define_insn_reservation "athlon_movaps_k8" 2
537 (and (eq_attr "cpu" "athlon")
538 (and (eq_attr "type" "ssemov")
539 (eq_attr "mode" "V4SF,V2DF,TI")))
540 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
541 (define_insn_reservation "athlon_mmxssemov" 2
542 (and (eq_attr "cpu" "athlon,k8")
543 (eq_attr "type" "mmxmov,ssemov"))
544 "athlon-direct,athlon-fpsched,athlon-faddmul")
545 (define_insn_reservation "athlon_mmxmul_load" 4
546 (and (eq_attr "cpu" "athlon,k8")
547 (and (eq_attr "type" "mmxmul")
548 (eq_attr "memory" "load")))
549 "athlon-direct,athlon-fpload,athlon-fmul")
550 (define_insn_reservation "athlon_mmxmul" 3
551 (and (eq_attr "cpu" "athlon,k8")
552 (eq_attr "type" "mmxmul"))
553 "athlon-direct,athlon-fpsched,athlon-fmul")
554 (define_insn_reservation "athlon_mmx_load" 3
555 (and (eq_attr "cpu" "athlon,k8")
556 (and (eq_attr "unit" "mmx")
557 (eq_attr "memory" "load")))
558 "athlon-direct,athlon-fpload,athlon-faddmul")
559 (define_insn_reservation "athlon_mmx" 2
560 (and (eq_attr "cpu" "athlon,k8")
561 (eq_attr "unit" "mmx"))
562 "athlon-direct,athlon-fpsched,athlon-faddmul")
563 ;; SSE operations are handled by the i387 unit as well. The latency
564 ;; is same as for i387 operations for scalar operations
566 (define_insn_reservation "athlon_sselog_load" 3
567 (and (eq_attr "cpu" "athlon")
568 (and (eq_attr "type" "sselog,sselog1")
569 (eq_attr "memory" "load")))
570 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
571 (define_insn_reservation "athlon_sselog_load_k8" 5
572 (and (eq_attr "cpu" "k8")
573 (and (eq_attr "type" "sselog,sselog1")
574 (eq_attr "memory" "load")))
575 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
576 (define_insn_reservation "athlon_sselog" 3
577 (and (eq_attr "cpu" "athlon")
578 (eq_attr "type" "sselog,sselog1"))
579 "athlon-vector,athlon-fpsched,athlon-fmul*2")
580 (define_insn_reservation "athlon_sselog_k8" 3
581 (and (eq_attr "cpu" "k8")
582 (eq_attr "type" "sselog,sselog1"))
583 "athlon-double,athlon-fpsched,athlon-fmul")
584 ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
585 (define_insn_reservation "athlon_ssecmp_load" 2
586 (and (eq_attr "cpu" "athlon")
587 (and (eq_attr "type" "ssecmp")
588 (and (eq_attr "mode" "SF,DF,DI")
589 (eq_attr "memory" "load"))))
590 "athlon-direct,athlon-fpload,athlon-fadd")
591 (define_insn_reservation "athlon_ssecmp_load_k8" 4
592 (and (eq_attr "cpu" "k8")
593 (and (eq_attr "type" "ssecmp")
594 (and (eq_attr "mode" "SF,DF,DI,TI")
595 (eq_attr "memory" "load"))))
596 "athlon-direct,athlon-fploadk8,athlon-fadd")
597 (define_insn_reservation "athlon_ssecmp" 2
598 (and (eq_attr "cpu" "athlon,k8")
599 (and (eq_attr "type" "ssecmp")
600 (eq_attr "mode" "SF,DF,DI,TI")))
601 "athlon-direct,athlon-fpsched,athlon-fadd")
602 (define_insn_reservation "athlon_ssecmpvector_load" 3
603 (and (eq_attr "cpu" "athlon")
604 (and (eq_attr "type" "ssecmp")
605 (eq_attr "memory" "load")))
606 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
607 (define_insn_reservation "athlon_ssecmpvector_load_k8" 5
608 (and (eq_attr "cpu" "k8")
609 (and (eq_attr "type" "ssecmp")
610 (eq_attr "memory" "load")))
611 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
612 (define_insn_reservation "athlon_ssecmpvector" 3
613 (and (eq_attr "cpu" "athlon")
614 (eq_attr "type" "ssecmp"))
615 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
616 (define_insn_reservation "athlon_ssecmpvector_k8" 3
617 (and (eq_attr "cpu" "k8")
618 (eq_attr "type" "ssecmp"))
619 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
620 (define_insn_reservation "athlon_ssecomi_load" 4
621 (and (eq_attr "cpu" "athlon")
622 (and (eq_attr "type" "ssecomi")
623 (eq_attr "memory" "load")))
624 "athlon-vector,athlon-fpload,athlon-fadd")
625 (define_insn_reservation "athlon_ssecomi_load_k8" 6
626 (and (eq_attr "cpu" "k8")
627 (and (eq_attr "type" "ssecomi")
628 (eq_attr "memory" "load")))
629 "athlon-vector,athlon-fploadk8,athlon-fadd")
630 (define_insn_reservation "athlon_ssecomi" 4
631 (and (eq_attr "cpu" "athlon,k8")
632 (eq_attr "type" "ssecmp"))
633 "athlon-vector,athlon-fpsched,athlon-fadd")
634 (define_insn_reservation "athlon_sseadd_load" 4
635 (and (eq_attr "cpu" "athlon")
636 (and (eq_attr "type" "sseadd")
637 (and (eq_attr "mode" "SF,DF,DI")
638 (eq_attr "memory" "load"))))
639 "athlon-direct,athlon-fpload,athlon-fadd")
640 (define_insn_reservation "athlon_sseadd_load_k8" 6
641 (and (eq_attr "cpu" "k8")
642 (and (eq_attr "type" "sseadd")
643 (and (eq_attr "mode" "SF,DF,DI")
644 (eq_attr "memory" "load"))))
645 "athlon-direct,athlon-fploadk8,athlon-fadd")
646 (define_insn_reservation "athlon_sseadd" 4
647 (and (eq_attr "cpu" "athlon,k8")
648 (and (eq_attr "type" "sseadd")
649 (eq_attr "mode" "SF,DF,DI")))
650 "athlon-direct,athlon-fpsched,athlon-fadd")
651 (define_insn_reservation "athlon_sseaddvector_load" 5
652 (and (eq_attr "cpu" "athlon")
653 (and (eq_attr "type" "sseadd")
654 (eq_attr "memory" "load")))
655 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
656 (define_insn_reservation "athlon_sseaddvector_load_k8" 7
657 (and (eq_attr "cpu" "k8")
658 (and (eq_attr "type" "sseadd")
659 (eq_attr "memory" "load")))
660 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
661 (define_insn_reservation "athlon_sseaddvector" 5
662 (and (eq_attr "cpu" "athlon")
663 (eq_attr "type" "sseadd"))
664 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
665 (define_insn_reservation "athlon_sseaddvector_k8" 5
666 (and (eq_attr "cpu" "k8")
667 (eq_attr "type" "sseadd"))
668 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
670 ;; Conversions behaves very irregularly and the scheduling is critical here.
671 ;; Take each instruction separately. Assume that the mode is always set to the
672 ;; destination one and athlon_decode is set to the K8 versions.
675 (define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
676 (and (eq_attr "cpu" "k8,athlon")
677 (and (eq_attr "type" "ssecvt")
678 (and (eq_attr "athlon_decode" "direct")
679 (and (eq_attr "mode" "DF")
680 (eq_attr "memory" "load")))))
681 "athlon-direct,athlon-fploadk8,athlon-fstore")
682 (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
683 (and (eq_attr "cpu" "athlon,k8")
684 (and (eq_attr "type" "ssecvt")
685 (and (eq_attr "athlon_decode" "direct")
686 (eq_attr "mode" "DF"))))
687 "athlon-direct,athlon-fpsched,athlon-fstore")
688 ;; cvtps2pd. Model same way the other double decoded FP conversions.
689 (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
690 (and (eq_attr "cpu" "k8,athlon")
691 (and (eq_attr "type" "ssecvt")
692 (and (eq_attr "athlon_decode" "double")
693 (and (eq_attr "mode" "V2DF,V4SF,TI")
694 (eq_attr "memory" "load")))))
695 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
696 (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
697 (and (eq_attr "cpu" "k8,athlon")
698 (and (eq_attr "type" "ssecvt")
699 (and (eq_attr "athlon_decode" "double")
700 (eq_attr "mode" "V2DF,V4SF,TI"))))
701 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
702 ;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath)
703 ;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
704 (define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
705 (and (eq_attr "cpu" "athlon,k8")
706 (and (eq_attr "type" "sseicvt")
707 (and (eq_attr "athlon_decode" "direct")
708 (and (eq_attr "mode" "SF,DF")
709 (eq_attr "memory" "load")))))
710 "athlon-direct,athlon-fploadk8,athlon-fstore")
711 ;; cvtsi2ss mem, reg is doublepath
712 (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
713 (and (eq_attr "cpu" "athlon")
714 (and (eq_attr "type" "sseicvt")
715 (and (eq_attr "athlon_decode" "double")
716 (and (eq_attr "mode" "SF,DF")
717 (eq_attr "memory" "load")))))
718 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
719 (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
720 (and (eq_attr "cpu" "k8")
721 (and (eq_attr "type" "sseicvt")
722 (and (eq_attr "athlon_decode" "double")
723 (and (eq_attr "mode" "SF,DF")
724 (eq_attr "memory" "load")))))
725 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
726 ;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
727 (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
728 (and (eq_attr "cpu" "k8,athlon")
729 (and (eq_attr "type" "sseicvt")
730 (and (eq_attr "athlon_decode" "double")
731 (and (eq_attr "mode" "SF,DF")
732 (eq_attr "memory" "none")))))
733 "athlon-double,athlon-fploadk8,athlon-fstore")
734 ;; cvtsi2ss reg, reg is doublepath
735 (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
736 (and (eq_attr "cpu" "athlon,k8")
737 (and (eq_attr "type" "sseicvt")
738 (and (eq_attr "athlon_decode" "vector")
739 (and (eq_attr "mode" "SF,DF")
740 (eq_attr "memory" "none")))))
741 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
742 ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
743 (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
744 (and (eq_attr "cpu" "k8,athlon")
745 (and (eq_attr "type" "ssecvt")
746 (and (eq_attr "athlon_decode" "double")
747 (and (eq_attr "mode" "SF")
748 (eq_attr "memory" "load")))))
749 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
750 ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
751 (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
752 (and (eq_attr "cpu" "athlon,k8")
753 (and (eq_attr "type" "ssecvt")
754 (and (eq_attr "athlon_decode" "vector")
755 (and (eq_attr "mode" "SF")
756 (eq_attr "memory" "none")))))
757 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
758 (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
759 (and (eq_attr "cpu" "athlon,k8")
760 (and (eq_attr "type" "ssecvt")
761 (and (eq_attr "athlon_decode" "vector")
762 (and (eq_attr "mode" "V4SF,V2DF,TI")
763 (eq_attr "memory" "load")))))
764 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
765 ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
766 ;; ??? Why it is fater than cvtsd2ss?
767 (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
768 (and (eq_attr "cpu" "athlon,k8")
769 (and (eq_attr "type" "ssecvt")
770 (and (eq_attr "athlon_decode" "vector")
771 (and (eq_attr "mode" "V4SF,V2DF,TI")
772 (eq_attr "memory" "none")))))
773 "athlon-vector,athlon-fpsched,athlon-fvector*2")
774 ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
775 (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
776 (and (eq_attr "cpu" "athlon,k8")
777 (and (eq_attr "type" "sseicvt")
778 (and (eq_attr "athlon_decode" "vector")
779 (and (eq_attr "mode" "SI,DI")
780 (eq_attr "memory" "load")))))
781 "athlon-vector,athlon-fploadk8,athlon-fvector")
782 ;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
783 (define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
784 (and (eq_attr "cpu" "athlon")
785 (and (eq_attr "type" "sseicvt")
786 (and (eq_attr "athlon_decode" "double")
787 (and (eq_attr "mode" "SI,DI")
788 (eq_attr "memory" "none")))))
789 "athlon-vector,athlon-fpsched,athlon-fvector")
790 (define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
791 (and (eq_attr "cpu" "k8")
792 (and (eq_attr "type" "sseicvt")
793 (and (eq_attr "athlon_decode" "double")
794 (and (eq_attr "mode" "SI,DI")
795 (eq_attr "memory" "none")))))
796 "athlon-double,athlon-fpsched,athlon-fstore")
799 (define_insn_reservation "athlon_ssemul_load" 4
800 (and (eq_attr "cpu" "athlon")
801 (and (eq_attr "type" "ssemul")
802 (and (eq_attr "mode" "SF,DF")
803 (eq_attr "memory" "load"))))
804 "athlon-direct,athlon-fpload,athlon-fmul")
805 (define_insn_reservation "athlon_ssemul_load_k8" 6
806 (and (eq_attr "cpu" "k8")
807 (and (eq_attr "type" "ssemul")
808 (and (eq_attr "mode" "SF,DF")
809 (eq_attr "memory" "load"))))
810 "athlon-direct,athlon-fploadk8,athlon-fmul")
811 (define_insn_reservation "athlon_ssemul" 4
812 (and (eq_attr "cpu" "athlon,k8")
813 (and (eq_attr "type" "ssemul")
814 (eq_attr "mode" "SF,DF")))
815 "athlon-direct,athlon-fpsched,athlon-fmul")
816 (define_insn_reservation "athlon_ssemulvector_load" 5
817 (and (eq_attr "cpu" "athlon")
818 (and (eq_attr "type" "ssemul")
819 (eq_attr "memory" "load")))
820 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
821 (define_insn_reservation "athlon_ssemulvector_load_k8" 7
822 (and (eq_attr "cpu" "k8")
823 (and (eq_attr "type" "ssemul")
824 (eq_attr "memory" "load")))
825 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
826 (define_insn_reservation "athlon_ssemulvector" 5
827 (and (eq_attr "cpu" "athlon")
828 (eq_attr "type" "ssemul"))
829 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
830 (define_insn_reservation "athlon_ssemulvector_k8" 5
831 (and (eq_attr "cpu" "k8")
832 (eq_attr "type" "ssemul"))
833 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
834 ;; divsd timings. divss is faster
835 (define_insn_reservation "athlon_ssediv_load" 20
836 (and (eq_attr "cpu" "athlon")
837 (and (eq_attr "type" "ssediv")
838 (and (eq_attr "mode" "SF,DF")
839 (eq_attr "memory" "load"))))
840 "athlon-direct,athlon-fpload,athlon-fmul*17")
841 (define_insn_reservation "athlon_ssediv_load_k8" 22
842 (and (eq_attr "cpu" "k8")
843 (and (eq_attr "type" "ssediv")
844 (and (eq_attr "mode" "SF,DF")
845 (eq_attr "memory" "load"))))
846 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
847 (define_insn_reservation "athlon_ssediv" 20
848 (and (eq_attr "cpu" "athlon,k8")
849 (and (eq_attr "type" "ssediv")
850 (eq_attr "mode" "SF,DF")))
851 "athlon-direct,athlon-fpsched,athlon-fmul*17")
852 (define_insn_reservation "athlon_ssedivvector_load" 39
853 (and (eq_attr "cpu" "athlon")
854 (and (eq_attr "type" "ssediv")
855 (eq_attr "memory" "load")))
856 "athlon-vector,athlon-fpload2,athlon-fmul*34")
857 (define_insn_reservation "athlon_ssedivvector_load_k8" 35
858 (and (eq_attr "cpu" "k8")
859 (and (eq_attr "type" "ssediv")
860 (eq_attr "memory" "load")))
861 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
862 (define_insn_reservation "athlon_ssedivvector" 39
863 (and (eq_attr "cpu" "athlon")
864 (eq_attr "type" "ssediv"))
865 "athlon-vector,athlon-fmul*34")
866 (define_insn_reservation "athlon_ssedivvector_k8" 39
867 (and (eq_attr "cpu" "k8")
868 (eq_attr "type" "ssediv"))
869 "athlon-double,athlon-fmul*34")