aix: Alias -m64 to -maix64 and -m32 to -maix32.
[official-gcc.git] / gcc / config / rs6000 / power10.md
blob665f0f22c62eb90cd112c3db5d1c8972637c0328
1 ;; Scheduling description for the IBM POWER10 processor.
2 ;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
3 ;;
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version 3, or (at your
11 ;; option) any later version.
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16 ;; License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3.  If not see
20 ;; <http://www.gnu.org/licenses/>.
22 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
23 ; which consist of 8 instructions max.  We do not try to model the details of
24 ; the out-of-order issue queues and how insns flow to the various execution
25 ; units except for the simple representation of the issue limitation of at
26 ; most 4 insns to the execution units/2 insns to the load units/2 insns to
27 ; the store units.
28 (define_automaton "power10dispatch,power10issue")
30 ; Decode/dispatch slots
31 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
32                   du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
34 ; Four execution units
35 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
36                  "power10issue")
37 ; Two load units and two store units
38 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
39 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
42 ; Dispatch slots are allocated in order conforming to program order.
43 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
44               du5_power10,du6_power10,du7_power10")
45 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
46               du6_power10,du7_power10")
47 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
48               du7_power10")
49 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
50 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
51 (absence_set "du5_power10" "du6_power10,du7_power10")
52 (absence_set "du6_power10" "du7_power10")
55 ; Dispatch port reservations
57 ; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
58 ; 4 VSU/2 Load/2 Store per cycle.
60 ; Any dispatch slot
61 (define_reservation "DU_any_power10"
62                     "du0_power10|du1_power10|du2_power10|du3_power10|
63                      du4_power10|du5_power10|du6_power10|du7_power10")
65 ; Even slot, actually takes even/odd slots
66 (define_reservation "DU_even_power10"
67                     "du0_power10+du1_power10|du2_power10+du3_power10|
68                      du4_power10+du5_power10|du6_power10+du7_power10")
70 ; 4-way cracked (consumes whole decode/dispatch cycle)
71 (define_reservation "DU_all_power10"
72                     "du0_power10+du1_power10+du2_power10+du3_power10+
73                      du4_power10+du5_power10+du6_power10+du7_power10")
76 ; Execution unit reservations
77 (define_reservation "LU_power10"
78                     "lu0_power10|lu1_power10")
80 (define_reservation "STU_power10"
81                     "stu0_power10|stu1_power10")
83 ; Certain simple fixed-point insns can execute in the Store-agen pipe
84 (define_reservation "SXU_power10"
85                     "stu0_power10|stu1_power10")
87 (define_reservation "EXU_power10"
88                     "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
90 (define_reservation "EXU_super_power10"
91                     "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
94 ; Load Unit
95 (define_insn_reservation "power10-load" 4
96   (and (eq_attr "type" "load")
97        (eq_attr "update" "no")
98        (eq_attr "size" "!128")
99        (eq_attr "prefixed" "no")
100        (eq_attr "cpu" "power10"))
101   "DU_any_power10,LU_power10")
103 (define_insn_reservation "power10-prefixed-load" 4
104   (and (eq_attr "type" "load")
105        (eq_attr "update" "no")
106        (eq_attr "size" "!128")
107        (eq_attr "prefixed" "yes")
108        (eq_attr "cpu" "power10"))
109   "DU_even_power10,LU_power10")
111 (define_insn_reservation "power10-load-update" 4
112   (and (eq_attr "type" "load")
113        (eq_attr "update" "yes")
114        (eq_attr "cpu" "power10"))
115   "DU_even_power10,LU_power10+SXU_power10")
117 (define_insn_reservation "power10-fpload-double" 4
118   (and (eq_attr "type" "fpload")
119        (eq_attr "update" "no")
120        (eq_attr "size" "64")
121        (eq_attr "prefixed" "no")
122        (eq_attr "cpu" "power10"))
123   "DU_any_power10,LU_power10")
125 (define_insn_reservation "power10-prefixed-fpload-double" 4
126   (and (eq_attr "type" "fpload")
127        (eq_attr "update" "no")
128        (eq_attr "size" "64")
129        (eq_attr "prefixed" "yes")
130        (eq_attr "cpu" "power10"))
131   "DU_even_power10,LU_power10")
133 (define_insn_reservation "power10-fpload-update-double" 4
134   (and (eq_attr "type" "fpload")
135        (eq_attr "update" "yes")
136        (eq_attr "size" "64")
137        (eq_attr "cpu" "power10"))
138   "DU_even_power10,LU_power10+SXU_power10")
140 ; SFmode loads are cracked and have additional 3 cycles over DFmode
141 ; Prefixed forms behave the same
142 (define_insn_reservation "power10-fpload-single" 7
143   (and (eq_attr "type" "fpload")
144        (eq_attr "update" "no")
145        (eq_attr "size" "32")
146        (eq_attr "cpu" "power10"))
147   "DU_even_power10,LU_power10")
149 (define_insn_reservation "power10-fpload-update-single" 7
150   (and (eq_attr "type" "fpload")
151        (eq_attr "update" "yes")
152        (eq_attr "size" "32")
153        (eq_attr "cpu" "power10"))
154   "DU_even_power10,LU_power10+SXU_power10")
156 (define_insn_reservation "power10-vecload" 4
157   (and (eq_attr "type" "vecload")
158        (eq_attr "size" "!256")
159        (eq_attr "cpu" "power10"))
160   "DU_any_power10,LU_power10")
162 ; lxvp
163 (define_insn_reservation "power10-vecload-pair" 4
164   (and (eq_attr "type" "vecload")
165        (eq_attr "size" "256")
166        (eq_attr "cpu" "power10"))
167   "DU_even_power10,LU_power10+SXU_power10")
169 ; Store Unit
170 (define_insn_reservation "power10-store" 0
171   (and (eq_attr "type" "store,fpstore,vecstore")
172        (eq_attr "update" "no")
173        (eq_attr "prefixed" "no")
174        (eq_attr "size" "!128")
175        (eq_attr "size" "!256")
176        (eq_attr "cpu" "power10"))
177   "DU_any_power10,STU_power10")
179 (define_insn_reservation "power10-prefixed-store" 0
180   (and (eq_attr "type" "store,fpstore,vecstore")
181        (eq_attr "prefixed" "yes")
182        (eq_attr "size" "!128")
183        (eq_attr "size" "!256")
184        (eq_attr "cpu" "power10"))
185   "DU_even_power10,STU_power10")
187 ; Update forms have 2 cycle latency for updated addr reg
188 (define_insn_reservation "power10-store-update" 2
189   (and (eq_attr "type" "store,fpstore")
190        (eq_attr "update" "yes")
191        (eq_attr "cpu" "power10"))
192   "DU_any_power10,STU_power10")
194 ; stxvp
195 (define_insn_reservation "power10-vecstore-pair" 0
196   (and (eq_attr "type" "vecstore")
197        (eq_attr "size" "256")
198        (eq_attr "cpu" "power10"))
199   "DU_even_power10,stu0_power10+stu1_power10")
201 (define_insn_reservation "power10-larx" 4
202   (and (eq_attr "type" "load_l")
203        (eq_attr "size" "!128")
204        (eq_attr "cpu" "power10"))
205   "DU_any_power10,LU_power10")
207 ; All load quad forms
208 (define_insn_reservation "power10-lq" 4
209   (and (eq_attr "type" "load,load_l")
210        (eq_attr "size" "128")
211        (eq_attr "cpu" "power10"))
212   "DU_even_power10,LU_power10+SXU_power10")
214 (define_insn_reservation "power10-stcx" 0
215   (and (eq_attr "type" "store_c")
216        (eq_attr "size" "!128")
217        (eq_attr "cpu" "power10"))
218   "DU_any_power10,STU_power10")
220 ; All store quad forms
221 (define_insn_reservation "power10-stq" 0
222   (and (eq_attr "type" "store,store_c")
223        (eq_attr "size" "128")
224        (eq_attr "cpu" "power10"))
225   "DU_even_power10,stu0_power10+stu1_power10")
227 (define_insn_reservation "power10-sync" 1
228   (and (eq_attr "type" "sync,isync")
229        (eq_attr "cpu" "power10"))
230   "DU_even_power10,STU_power10")
233 ; VSU Execution Unit
235 ; Fixed point ops
237 ; Most ALU insns are simple 2 cycle, including record form
238 (define_insn_reservation "power10-alu" 2
239   (and (eq_attr "type" "add,exts,integer,logical,isel")
240        (eq_attr "prefixed" "no")
241        (eq_attr "cpu" "power10"))
242   "DU_any_power10,EXU_power10")
243 ; 4 cycle CR latency
244 (define_bypass 4 "power10-alu"
245                  "power10-crlogical,power10-mfcr,power10-mfcrf")
247 ; paddi
248 (define_insn_reservation "power10-paddi" 2
249   (and (eq_attr "type" "add")
250        (eq_attr "prefixed" "yes")
251        (eq_attr "cpu" "power10"))
252   "DU_even_power10,EXU_power10")
254 ; Rotate/shift (non-record form)
255 (define_insn_reservation "power10-rot" 2
256   (and (eq_attr "type" "insert,shift")
257        (eq_attr "dot" "no")
258        (eq_attr "cpu" "power10"))
259   "DU_any_power10,EXU_power10")
261 ; Record form rotate/shift
262 (define_insn_reservation "power10-rot-compare" 3
263   (and (eq_attr "type" "insert,shift")
264        (eq_attr "dot" "yes")
265        (eq_attr "cpu" "power10"))
266   "DU_any_power10,EXU_power10")
267 ; 5 cycle CR latency
268 (define_bypass 5 "power10-rot-compare"
269                  "power10-crlogical,power10-mfcr,power10-mfcrf")
271 (define_insn_reservation "power10-alu2" 3
272   (and (eq_attr "type" "cntlz,popcnt,trap")
273        (eq_attr "cpu" "power10"))
274   "DU_any_power10,EXU_power10")
275 ; 5 cycle CR latency
276 (define_bypass 5 "power10-alu2"
277                  "power10-crlogical,power10-mfcr,power10-mfcrf")
279 (define_insn_reservation "power10-cmp" 2
280   (and (eq_attr "type" "cmp")
281        (eq_attr "cpu" "power10"))
282   "DU_any_power10,EXU_power10")
284 ; Treat 'two' and 'three' types as 2 or 3 way cracked
285 (define_insn_reservation "power10-two" 4
286   (and (eq_attr "type" "two")
287        (eq_attr "cpu" "power10"))
288   "DU_even_power10,EXU_power10")
290 (define_insn_reservation "power10-three" 6
291   (and (eq_attr "type" "three")
292        (eq_attr "cpu" "power10"))
293   "DU_all_power10,EXU_power10")
295 (define_insn_reservation "power10-mul" 5
296   (and (eq_attr "type" "mul")
297        (eq_attr "dot" "no")
298        (eq_attr "cpu" "power10"))
299   "DU_any_power10,EXU_power10")
300 ; 4 cycle MUL->MUL latency
301 (define_bypass 4 "power10-mul"
302                  "power10-mul,power10-mul-compare")
304 (define_insn_reservation "power10-mul-compare" 5
305   (and (eq_attr "type" "mul")
306        (eq_attr "dot" "yes")
307        (eq_attr "cpu" "power10"))
308   "DU_even_power10,EXU_power10")
309 ; 4 cycle MUL->MUL latency
310 (define_bypass 4 "power10-mul-compare"
311                  "power10-mul,power10-mul-compare")
312 ; 7 cycle CR latency
313 (define_bypass 7 "power10-mul-compare"
314                  "power10-crlogical,power10-mfcr,power10-mfcrf")
316 (define_insn_reservation "power10-div" 12
317   (and (eq_attr "type" "div")
318        (eq_attr "dot" "no")
319        (eq_attr "cpu" "power10"))
320   "DU_any_power10,EXU_power10")
322 (define_insn_reservation "power10-div-compare" 12
323   (and (eq_attr "type" "div")
324        (eq_attr "dot" "yes")
325        (eq_attr "cpu" "power10"))
326   "DU_even_power10,EXU_power10")
327 ; 14 cycle CR latency
328 (define_bypass 14 "power10-div-compare"
329                  "power10-crlogical,power10-mfcr,power10-mfcrf")
331 (define_insn_reservation "power10-crlogical" 2
332   (and (eq_attr "type" "cr_logical")
333        (eq_attr "cpu" "power10"))
334   "DU_any_power10,EXU_power10")
336 (define_insn_reservation "power10-mfcrf" 2
337   (and (eq_attr "type" "mfcrf")
338        (eq_attr "cpu" "power10"))
339   "DU_any_power10,EXU_power10")
341 (define_insn_reservation "power10-mfcr" 3
342   (and (eq_attr "type" "mfcr")
343        (eq_attr "cpu" "power10"))
344   "DU_even_power10,EXU_power10")
346 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
347 ; is cracked
348 (define_insn_reservation "power10-mtcr" 3
349   (and (eq_attr "type" "mtcr")
350        (eq_attr "cpu" "power10"))
351   "DU_any_power10,EXU_power10")
353 (define_insn_reservation "power10-mtjmpr" 3
354   (and (eq_attr "type" "mtjmpr")
355        (eq_attr "cpu" "power10"))
356   "DU_any_power10,EXU_power10")
358 (define_insn_reservation "power10-mfjmpr" 2
359   (and (eq_attr "type" "mfjmpr")
360        (eq_attr "cpu" "power10"))
361   "DU_any_power10,EXU_power10")
364 ; Floating point/Vector ops
366 (define_insn_reservation "power10-fpsimple" 3
367   (and (eq_attr "type" "fpsimple")
368        (eq_attr "cpu" "power10"))
369   "DU_any_power10,EXU_power10")
371 (define_insn_reservation "power10-fp" 5
372   (and (eq_attr "type" "fp,dmul")
373        (eq_attr "cpu" "power10"))
374   "DU_any_power10,EXU_power10")
376 (define_insn_reservation "power10-fpcompare" 3
377   (and (eq_attr "type" "fpcompare")
378        (eq_attr "cpu" "power10"))
379   "DU_any_power10,EXU_power10")
381 (define_insn_reservation "power10-sdiv" 22
382   (and (eq_attr "type" "sdiv")
383        (eq_attr "cpu" "power10"))
384   "DU_any_power10,EXU_power10")
386 (define_insn_reservation "power10-ddiv" 27
387   (and (eq_attr "type" "ddiv")
388        (eq_attr "cpu" "power10"))
389   "DU_any_power10,EXU_power10")
391 (define_insn_reservation "power10-sqrt" 26
392   (and (eq_attr "type" "ssqrt")
393        (eq_attr "cpu" "power10"))
394   "DU_any_power10,EXU_power10")
396 (define_insn_reservation "power10-dsqrt" 36
397   (and (eq_attr "type" "dsqrt")
398        (eq_attr "cpu" "power10"))
399   "DU_any_power10,EXU_power10")
401 (define_insn_reservation "power10-vec-2cyc" 2
402   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
403        (eq_attr "cpu" "power10"))
404   "DU_any_power10,EXU_power10")
406 (define_insn_reservation "power10-veccmp" 3
407   (and (eq_attr "type" "veccmp")
408        (eq_attr "cpu" "power10"))
409   "DU_any_power10,EXU_power10")
411 (define_insn_reservation "power10-vecsimple" 2
412   (and (eq_attr "type" "vecsimple")
413        (eq_attr "cpu" "power10"))
414   "DU_any_power10,EXU_power10")
416 (define_insn_reservation "power10-vecnormal" 5
417   (and (eq_attr "type" "vecfloat,vecdouble")
418        (eq_attr "size" "!128")
419        (eq_attr "cpu" "power10"))
420   "DU_any_power10,EXU_power10")
422 (define_insn_reservation "power10-qp" 12
423   (and (eq_attr "type" "vecfloat,vecdouble")
424        (eq_attr "size" "128")
425        (eq_attr "cpu" "power10"))
426   "DU_any_power10,EXU_power10")
428 (define_insn_reservation "power10-vecperm" 3
429   (and (eq_attr "type" "vecperm")
430        (eq_attr "prefixed" "no")
431        (eq_attr "dot" "no")
432        (eq_attr "cpu" "power10"))
433   "DU_any_power10,EXU_power10")
435 (define_insn_reservation "power10-vecperm-compare" 3
436   (and (eq_attr "type" "vecperm")
437        (eq_attr "dot" "yes")
438        (eq_attr "cpu" "power10"))
439   "DU_even_power10,EXU_power10")
441 (define_insn_reservation "power10-prefixed-vecperm" 3
442   (and (eq_attr "type" "vecperm")
443        (eq_attr "prefixed" "yes")
444        (eq_attr "cpu" "power10"))
445   "DU_even_power10,EXU_power10")
447 (define_insn_reservation "power10-veccomplex" 6
448   (and (eq_attr "type" "veccomplex")
449        (eq_attr "cpu" "power10"))
450   "DU_any_power10,EXU_power10")
452 (define_insn_reservation "power10-vecfdiv" 24
453   (and (eq_attr "type" "vecfdiv")
454        (eq_attr "cpu" "power10"))
455   "DU_any_power10,EXU_power10")
457 (define_insn_reservation "power10-vecdiv" 27
458   (and (eq_attr "type" "vecdiv")
459        (eq_attr "size" "!128")
460        (eq_attr "cpu" "power10"))
461   "DU_any_power10,EXU_power10")
463 (define_insn_reservation "power10-qpdiv" 56
464   (and (eq_attr "type" "vecdiv")
465        (eq_attr "size" "128")
466        (eq_attr "cpu" "power10"))
467   "DU_any_power10,EXU_power10")
469 (define_insn_reservation "power10-qpmul" 24
470   (and (eq_attr "type" "qmul")
471        (eq_attr "size" "128")
472        (eq_attr "cpu" "power10"))
473   "DU_any_power10,EXU_power10")
475 (define_insn_reservation "power10-mtvsr" 2
476   (and (eq_attr "type" "mtvsr")
477        (eq_attr "cpu" "power10"))
478   "DU_any_power10,EXU_power10")
480 (define_insn_reservation "power10-mfvsr" 2
481   (and (eq_attr "type" "mfvsr")
482        (eq_attr "cpu" "power10"))
483   "DU_any_power10,EXU_power10")
486 ; Branch
487 ; Branch is 2 cycles, grouped with STU for issue
488 (define_insn_reservation "power10-branch" 2
489   (and (eq_attr "type" "jmpreg,branch")
490        (eq_attr "cpu" "power10"))
491   "DU_any_power10,STU_power10")
494 ; Crypto
495 (define_insn_reservation "power10-crypto" 4
496   (and (eq_attr "type" "crypto")
497        (eq_attr "cpu" "power10"))
498   "DU_any_power10,EXU_power10")
501 ; HTM
502 (define_insn_reservation "power10-htm" 2
503   (and (eq_attr "type" "htmsimple,htm")
504        (eq_attr "cpu" "power10"))
505   "DU_any_power10,EXU_power10")
508 ; DFP
509 ; Use the minimum 12 cycle latency for all DFP insns
510 (define_insn_reservation "power10-dfp" 12
511   (and (eq_attr "type" "dfp")
512        (eq_attr "size" "!128")
513        (eq_attr "cpu" "power10"))
514   "DU_any_power10,EXU_power10")
516 (define_insn_reservation "power10-dfpq" 12
517   (and (eq_attr "type" "dfp")
518        (eq_attr "size" "128")
519        (eq_attr "cpu" "power10"))
520   "DU_even_power10,EXU_power10")
522 ; MMA
523 (define_insn_reservation "power10-mma" 9
524   (and (eq_attr "type" "mma")
525        (eq_attr "prefixed" "no")
526        (eq_attr "cpu" "power10"))
527   "DU_any_power10,EXU_super_power10")
529 (define_insn_reservation "power10-prefixed-mma" 9
530   (and (eq_attr "type" "mma")
531        (eq_attr "prefixed" "yes")
532        (eq_attr "cpu" "power10"))
533   "DU_even_power10,EXU_super_power10")
534 ; 4 cycle MMA->MMA latency
535 (define_bypass 4 "power10-mma,power10-prefixed-mma"
536                  "power10-mma,power10-prefixed-mma")