1 ;; Scheduling description for IBM POWER9 processor.
2 ;; Copyright (C) 2016-2018 Free Software Foundation, Inc.
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
6 ;; This file is part of GCC.
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version 3, or (at your
11 ;; option) any later version.
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 ;; License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
22 (define_automaton "power9dsp,power9lsu,power9vsu,power9misc")
24 (define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
25 (define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
26 ; Two vector permute units, part of vsu
27 (define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
28 ; Two fixed point divide units, not pipelined
29 (define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
30 (define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
32 (define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
33 x2_power9,x3_power9,xb0_power9,xb1_power9,
34 br0_power9,br1_power9" "power9dsp")
37 ; Dispatch port reservations
39 ; Power9 can dispatch a maximum of 6 iops per cycle with the following
40 ; general restrictions (other restrictions also apply):
41 ; 1) At most 2 iops per execution slice
42 ; 2) At most 2 iops to the branch unit
43 ; Note that insn position in a dispatch group of 6 insns does not infer which
44 ; execution slice the insn is routed to. The units are used to infer the
45 ; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
46 ; with 2 insns with 'superslice' requirement).
48 ; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
49 ; are listed as separate units to allow those insns that preclude its use to
50 ; still be scheduled two to a superslice while reserving the 3rd slot. The
51 ; same applies for xb0/xb1.
52 (define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
53 (define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
55 ; Any execution slice dispatch
56 (define_reservation "DU_any_power9"
57 "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|
60 ; Even slice, actually takes even/odd slots
61 (define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
64 (define_reservation "DU_slice_3_power9"
65 "x0_power9+xa0_power9|x1_power9+xa1_power9|
66 x2_power9+xb0_power9|x3_power9+xb1_power9")
69 (define_reservation "DU_super_power9"
70 "x0_power9+x1_power9|x2_power9+x3_power9")
73 (define_reservation "DU_C2_power9" "x0_power9+x1_power9|
74 x1_power9+DU_xa_power9|
76 DU_xa_power9+x2_power9|
78 x3_power9+DU_xb_power9")
80 ; 2-way cracked plus 3rd slot
81 (define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
82 x1_power9+x2_power9+xa0_power9|
83 x1_power9+x2_power9+xb0_power9|
84 x2_power9+x3_power9+xb0_power9")
86 ; 3-way cracked (consumes whole decode/dispatch cycle)
87 (define_reservation "DU_C3_power9"
88 "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+
89 x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
92 (define_reservation "DU_branch_power9" "br0_power9|br1_power9")
95 ; Execution unit reservations
96 (define_reservation "LSU_power9"
97 "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
99 (define_reservation "LSU_pair_power9"
100 "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|
101 lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9")
103 (define_reservation "VSU_power9"
104 "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
106 (define_reservation "VSU_super_power9"
107 "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
109 (define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
113 (define_insn_reservation "power9-load" 4
114 (and (eq_attr "type" "load")
115 (eq_attr "sign_extend" "no")
116 (eq_attr "update" "no")
117 (eq_attr "cpu" "power9"))
118 "DU_any_power9,LSU_power9")
120 (define_insn_reservation "power9-load-update" 4
121 (and (eq_attr "type" "load")
122 (eq_attr "sign_extend" "no")
123 (eq_attr "update" "yes")
124 (eq_attr "cpu" "power9"))
125 "DU_C2_power9,LSU_power9+VSU_power9")
127 (define_insn_reservation "power9-load-ext" 6
128 (and (eq_attr "type" "load")
129 (eq_attr "sign_extend" "yes")
130 (eq_attr "update" "no")
131 (eq_attr "cpu" "power9"))
132 "DU_C2_power9,LSU_power9")
134 (define_insn_reservation "power9-load-ext-update" 6
135 (and (eq_attr "type" "load")
136 (eq_attr "sign_extend" "yes")
137 (eq_attr "update" "yes")
138 (eq_attr "cpu" "power9"))
139 "DU_C3_power9,LSU_power9+VSU_power9")
141 (define_insn_reservation "power9-fpload-double" 4
142 (and (eq_attr "type" "fpload")
143 (eq_attr "update" "no")
144 (eq_attr "size" "64")
145 (eq_attr "cpu" "power9"))
146 "DU_slice_3_power9,LSU_power9")
148 (define_insn_reservation "power9-fpload-update-double" 4
149 (and (eq_attr "type" "fpload")
150 (eq_attr "update" "yes")
151 (eq_attr "size" "64")
152 (eq_attr "cpu" "power9"))
153 "DU_C2_3_power9,LSU_power9+VSU_power9")
155 ; SFmode loads are cracked and have additional 2 cycles over DFmode
156 (define_insn_reservation "power9-fpload-single" 6
157 (and (eq_attr "type" "fpload")
158 (eq_attr "update" "no")
159 (eq_attr "size" "32")
160 (eq_attr "cpu" "power9"))
161 "DU_C2_3_power9,LSU_power9")
163 (define_insn_reservation "power9-fpload-update-single" 6
164 (and (eq_attr "type" "fpload")
165 (eq_attr "update" "yes")
166 (eq_attr "size" "32")
167 (eq_attr "cpu" "power9"))
168 "DU_C3_power9,LSU_power9+VSU_power9")
170 (define_insn_reservation "power9-vecload" 5
171 (and (eq_attr "type" "vecload")
172 (eq_attr "cpu" "power9"))
173 "DU_any_power9,LSU_pair_power9")
175 ; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
176 (define_insn_reservation "power9-store" 0
177 (and (eq_attr "type" "store")
178 (eq_attr "update" "no")
179 (eq_attr "indexed" "no")
180 (eq_attr "cpu" "power9"))
181 "DU_slice_3_power9,LSU_power9")
183 (define_insn_reservation "power9-store-indexed" 0
184 (and (eq_attr "type" "store")
185 (eq_attr "update" "no")
186 (eq_attr "indexed" "yes")
187 (eq_attr "cpu" "power9"))
188 "DU_slice_3_power9,LSU_power9")
190 ; Update forms have 2 cycle latency for updated addr reg
191 (define_insn_reservation "power9-store-update" 2
192 (and (eq_attr "type" "store")
193 (eq_attr "update" "yes")
194 (eq_attr "indexed" "no")
195 (eq_attr "cpu" "power9"))
196 "DU_C2_3_power9,LSU_power9+VSU_power9")
198 ; Update forms have 2 cycle latency for updated addr reg
199 (define_insn_reservation "power9-store-update-indexed" 2
200 (and (eq_attr "type" "store")
201 (eq_attr "update" "yes")
202 (eq_attr "indexed" "yes")
203 (eq_attr "cpu" "power9"))
204 "DU_C2_3_power9,LSU_power9+VSU_power9")
206 (define_insn_reservation "power9-fpstore" 0
207 (and (eq_attr "type" "fpstore")
208 (eq_attr "update" "no")
209 (eq_attr "cpu" "power9"))
210 "DU_slice_3_power9,LSU_power9")
212 ; Update forms have 2 cycle latency for updated addr reg
213 (define_insn_reservation "power9-fpstore-update" 2
214 (and (eq_attr "type" "fpstore")
215 (eq_attr "update" "yes")
216 (eq_attr "cpu" "power9"))
217 "DU_C2_3_power9,LSU_power9+VSU_power9")
219 (define_insn_reservation "power9-vecstore" 0
220 (and (eq_attr "type" "vecstore")
221 (eq_attr "cpu" "power9"))
222 "DU_super_power9,LSU_pair_power9")
224 (define_insn_reservation "power9-larx" 4
225 (and (eq_attr "type" "load_l")
226 (eq_attr "cpu" "power9"))
227 "DU_any_power9,LSU_power9")
229 (define_insn_reservation "power9-stcx" 2
230 (and (eq_attr "type" "store_c")
231 (eq_attr "cpu" "power9"))
232 "DU_C2_3_power9,LSU_power9+VSU_power9")
234 (define_insn_reservation "power9-sync" 4
235 (and (eq_attr "type" "sync,isync")
236 (eq_attr "cpu" "power9"))
237 "DU_any_power9,LSU_power9")
244 ; Most ALU insns are simple 2 cycle, including record form
245 (define_insn_reservation "power9-alu" 2
246 (and (ior (eq_attr "type" "add,exts,integer,logical,isel")
247 (and (eq_attr "type" "insert,shift")
248 (eq_attr "dot" "no")))
249 (eq_attr "cpu" "power9"))
250 "DU_any_power9,VSU_power9")
252 (define_bypass 5 "power9-alu"
253 "power9-crlogical,power9-mfcr,power9-mfcrf")
255 ; Record form rotate/shift are cracked
256 (define_insn_reservation "power9-cracked-alu" 2
257 (and (eq_attr "type" "insert,shift")
258 (eq_attr "dot" "yes")
259 (eq_attr "cpu" "power9"))
260 "DU_C2_power9,VSU_power9")
262 (define_bypass 7 "power9-cracked-alu"
263 "power9-crlogical,power9-mfcr,power9-mfcrf")
265 (define_insn_reservation "power9-alu2" 3
266 (and (eq_attr "type" "cntlz,popcnt,trap")
267 (eq_attr "cpu" "power9"))
268 "DU_any_power9,VSU_power9")
270 (define_bypass 6 "power9-alu2"
271 "power9-crlogical,power9-mfcr,power9-mfcrf")
273 (define_insn_reservation "power9-cmp" 2
274 (and (eq_attr "type" "cmp")
275 (eq_attr "cpu" "power9"))
276 "DU_any_power9,VSU_power9")
279 ; Treat 'two' and 'three' types as 2 or 3 way cracked
280 (define_insn_reservation "power9-two" 4
281 (and (eq_attr "type" "two")
282 (eq_attr "cpu" "power9"))
283 "DU_C2_power9,VSU_power9")
285 (define_insn_reservation "power9-three" 6
286 (and (eq_attr "type" "three")
287 (eq_attr "cpu" "power9"))
288 "DU_C3_power9,VSU_power9")
290 (define_insn_reservation "power9-mul" 5
291 (and (eq_attr "type" "mul")
293 (eq_attr "cpu" "power9"))
294 "DU_any_power9,VSU_power9")
296 (define_insn_reservation "power9-mul-compare" 5
297 (and (eq_attr "type" "mul")
298 (eq_attr "dot" "yes")
299 (eq_attr "cpu" "power9"))
300 "DU_C2_power9,VSU_power9")
301 ; 10 cycle CR latency
302 (define_bypass 10 "power9-mul-compare"
303 "power9-crlogical,power9-mfcr,power9-mfcrf")
305 ; Fixed point divides reserve the divide units for a minimum of 8 cycles
306 (define_insn_reservation "power9-idiv" 16
307 (and (eq_attr "type" "div")
308 (eq_attr "size" "32")
309 (eq_attr "cpu" "power9"))
310 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
312 (define_insn_reservation "power9-ldiv" 24
313 (and (eq_attr "type" "div")
314 (eq_attr "size" "64")
315 (eq_attr "cpu" "power9"))
316 "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
318 (define_insn_reservation "power9-crlogical" 2
319 (and (eq_attr "type" "cr_logical,delayed_cr")
320 (eq_attr "cpu" "power9"))
321 "DU_any_power9,VSU_power9")
323 (define_insn_reservation "power9-mfcrf" 2
324 (and (eq_attr "type" "mfcrf")
325 (eq_attr "cpu" "power9"))
326 "DU_any_power9,VSU_power9")
328 (define_insn_reservation "power9-mfcr" 6
329 (and (eq_attr "type" "mfcr")
330 (eq_attr "cpu" "power9"))
331 "DU_C3_power9,VSU_power9")
333 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
335 (define_insn_reservation "power9-mtcr" 2
336 (and (eq_attr "type" "mtcr")
337 (eq_attr "cpu" "power9"))
338 "DU_any_power9,VSU_power9")
340 ; Move to LR/CTR are executed in VSU
341 (define_insn_reservation "power9-mtjmpr" 5
342 (and (eq_attr "type" "mtjmpr")
343 (eq_attr "cpu" "power9"))
344 "DU_any_power9,VSU_power9")
346 ; Floating point/Vector ops
347 (define_insn_reservation "power9-fpsimple" 2
348 (and (eq_attr "type" "fpsimple")
349 (eq_attr "cpu" "power9"))
350 "DU_slice_3_power9,VSU_power9")
352 (define_insn_reservation "power9-fp" 7
353 (and (eq_attr "type" "fp,dmul")
354 (eq_attr "cpu" "power9"))
355 "DU_slice_3_power9,VSU_power9")
357 (define_insn_reservation "power9-fpcompare" 3
358 (and (eq_attr "type" "fpcompare")
359 (eq_attr "cpu" "power9"))
360 "DU_slice_3_power9,VSU_power9")
362 ; FP div/sqrt are executed in the VSU slices. They are not pipelined wrt other
363 ; divide insns, but for the most part do not block pipelined ops.
364 (define_insn_reservation "power9-sdiv" 22
365 (and (eq_attr "type" "sdiv")
366 (eq_attr "cpu" "power9"))
367 "DU_slice_3_power9,VSU_power9")
369 (define_insn_reservation "power9-ddiv" 33
370 (and (eq_attr "type" "ddiv")
371 (eq_attr "cpu" "power9"))
372 "DU_slice_3_power9,VSU_power9")
374 (define_insn_reservation "power9-sqrt" 26
375 (and (eq_attr "type" "ssqrt")
376 (eq_attr "cpu" "power9"))
377 "DU_slice_3_power9,VSU_power9")
379 (define_insn_reservation "power9-dsqrt" 36
380 (and (eq_attr "type" "dsqrt")
381 (eq_attr "cpu" "power9"))
382 "DU_slice_3_power9,VSU_power9")
384 (define_insn_reservation "power9-vec-2cyc" 2
385 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
386 (eq_attr "cpu" "power9"))
387 "DU_super_power9,VSU_super_power9")
389 (define_insn_reservation "power9-veccmp" 3
390 (and (eq_attr "type" "veccmp")
391 (eq_attr "cpu" "power9"))
392 "DU_super_power9,VSU_super_power9")
394 (define_insn_reservation "power9-vecsimple" 3
395 (and (eq_attr "type" "vecsimple")
396 (eq_attr "cpu" "power9"))
397 "DU_super_power9,VSU_super_power9")
399 (define_insn_reservation "power9-vecnormal" 7
400 (and (eq_attr "type" "vecfloat,vecdouble")
401 (eq_attr "size" "!128")
402 (eq_attr "cpu" "power9"))
403 "DU_super_power9,VSU_super_power9")
405 ; Quad-precision FP ops, execute in DFU
406 (define_insn_reservation "power9-qp" 12
407 (and (eq_attr "type" "vecfloat,vecdouble")
408 (eq_attr "size" "128")
409 (eq_attr "cpu" "power9"))
410 "DU_super_power9,dfu_power9")
412 (define_insn_reservation "power9-vecperm" 3
413 (and (eq_attr "type" "vecperm")
414 (eq_attr "cpu" "power9"))
415 "DU_super_power9,VSU_PRM_power9")
417 (define_insn_reservation "power9-veccomplex" 7
418 (and (eq_attr "type" "veccomplex")
419 (eq_attr "cpu" "power9"))
420 "DU_super_power9,VSU_super_power9")
422 (define_insn_reservation "power9-vecfdiv" 28
423 (and (eq_attr "type" "vecfdiv")
424 (eq_attr "cpu" "power9"))
425 "DU_super_power9,VSU_super_power9")
427 (define_insn_reservation "power9-vecdiv" 32
428 (and (eq_attr "type" "vecdiv")
429 (eq_attr "size" "!128")
430 (eq_attr "cpu" "power9"))
431 "DU_super_power9,VSU_super_power9")
433 (define_insn_reservation "power9-qpdiv" 56
434 (and (eq_attr "type" "vecdiv")
435 (eq_attr "size" "128")
436 (eq_attr "cpu" "power9"))
437 "DU_super_power9,dfu_power9")
439 (define_insn_reservation "power9-mffgpr" 2
440 (and (eq_attr "type" "mffgpr")
441 (eq_attr "cpu" "power9"))
442 "DU_slice_3_power9,VSU_power9")
444 (define_insn_reservation "power9-mftgpr" 2
445 (and (eq_attr "type" "mftgpr")
446 (eq_attr "cpu" "power9"))
447 "DU_slice_3_power9,VSU_power9")
451 ; Move from LR/CTR are executed in BRU but consume a writeback port from an
453 (define_insn_reservation "power9-mfjmpr" 6
454 (and (eq_attr "type" "mfjmpr")
455 (eq_attr "cpu" "power9"))
456 "DU_branch_power9,bru_power9+VSU_power9")
459 (define_insn_reservation "power9-branch" 2
460 (and (eq_attr "type" "jmpreg,branch")
461 (eq_attr "cpu" "power9"))
462 "DU_branch_power9,bru_power9")
466 (define_insn_reservation "power9-crypto" 6
467 (and (eq_attr "type" "crypto")
468 (eq_attr "cpu" "power9"))
469 "DU_super_power9,cryptu_power9")
473 (define_insn_reservation "power9-htm" 4
474 (and (eq_attr "type" "htm")
475 (eq_attr "cpu" "power9"))
476 "DU_C2_power9,LSU_power9")
478 (define_insn_reservation "power9-htm-simple" 2
479 (and (eq_attr "type" "htmsimple")
480 (eq_attr "cpu" "power9"))
481 "DU_any_power9,VSU_power9")
485 (define_insn_reservation "power9-dfp" 12
486 (and (eq_attr "type" "dfp")
487 (eq_attr "cpu" "power9"))
488 "DU_even_power9,dfu_power9")