gcov: make profile merging smarter
[official-gcc.git] / gcc / config / rs6000 / power10.md
blob0186ae958965790ed24d68da7a9b5e460294ec80
1 ;; Scheduling description for the IBM POWER10 processor.
2 ;; Copyright (C) 2020-2021 Free Software Foundation, Inc.
3 ;;
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version 3, or (at your
11 ;; option) any later version.
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16 ;; License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3.  If not see
20 ;; <http://www.gnu.org/licenses/>.
22 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
23 ; which consist of 8 instructions max.  We do not try to model the details of
24 ; the out-of-order issue queues and how insns flow to the various execution
25 ; units except for the simple representation of the issue limitation of at
26 ; most 4 insns to the execution units/2 insns to the load units/2 insns to
27 ; the store units.
28 (define_automaton "power10dispatch,power10issue")
30 ; Decode/dispatch slots
31 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
32                   du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
34 ; Four execution units
35 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
36                  "power10issue")
37 ; Two load units and two store units
38 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
39 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
42 ; Dispatch slots are allocated in order conforming to program order.
43 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
44               du5_power10,du6_power10,du7_power10")
45 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
46               du6_power10,du7_power10")
47 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
48               du7_power10")
49 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
50 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
51 (absence_set "du5_power10" "du6_power10,du7_power10")
52 (absence_set "du6_power10" "du7_power10")
55 ; Dispatch port reservations
57 ; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
58 ; 4 VSU/2 Load/2 Store per cycle.
60 ; Any dispatch slot
61 (define_reservation "DU_any_power10"
62                     "du0_power10|du1_power10|du2_power10|du3_power10|
63                      du4_power10|du5_power10|du6_power10|du7_power10")
65 ; Even slot, actually takes even/odd slots
66 (define_reservation "DU_even_power10"
67                     "du0_power10+du1_power10|du2_power10+du3_power10|
68                      du4_power10+du5_power10|du6_power10+du7_power10")
70 ; 4-way cracked (consumes whole decode/dispatch cycle)
71 (define_reservation "DU_all_power10"
72                     "du0_power10+du1_power10+du2_power10+du3_power10+
73                      du4_power10+du5_power10+du6_power10+du7_power10")
76 ; Execution unit reservations
77 (define_reservation "LU_power10"
78                     "lu0_power10|lu1_power10")
80 (define_reservation "STU_power10"
81                     "stu0_power10|stu1_power10")
83 ; Certain simple fixed-point insns can execute in the Store-agen pipe
84 (define_reservation "SXU_power10"
85                     "stu0_power10|stu1_power10")
87 (define_reservation "EXU_power10"
88                     "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
90 (define_reservation "EXU_super_power10"
91                     "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
94 ; Load Unit
95 (define_insn_reservation "power10-load" 4
96   (and (eq_attr "type" "load")
97        (eq_attr "update" "no")
98        (eq_attr "size" "!128")
99        (eq_attr "prefixed" "no")
100        (eq_attr "cpu" "power10"))
101   "DU_any_power10,LU_power10")
103 (define_insn_reservation "power10-fused-load" 4
104   (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
105        (eq_attr "cpu" "power10"))
106   "DU_even_power10,LU_power10")
108 (define_insn_reservation "power10-prefixed-load" 4
109   (and (eq_attr "type" "load")
110        (eq_attr "update" "no")
111        (eq_attr "size" "!128")
112        (eq_attr "prefixed" "yes")
113        (eq_attr "cpu" "power10"))
114   "DU_even_power10,LU_power10")
116 (define_insn_reservation "power10-load-update" 4
117   (and (eq_attr "type" "load")
118        (eq_attr "update" "yes")
119        (eq_attr "cpu" "power10"))
120   "DU_even_power10,LU_power10+SXU_power10")
122 (define_insn_reservation "power10-fpload-double" 4
123   (and (eq_attr "type" "fpload")
124        (eq_attr "update" "no")
125        (eq_attr "size" "64")
126        (eq_attr "prefixed" "no")
127        (eq_attr "cpu" "power10"))
128   "DU_any_power10,LU_power10")
130 (define_insn_reservation "power10-prefixed-fpload-double" 4
131   (and (eq_attr "type" "fpload")
132        (eq_attr "update" "no")
133        (eq_attr "size" "64")
134        (eq_attr "prefixed" "yes")
135        (eq_attr "cpu" "power10"))
136   "DU_even_power10,LU_power10")
138 (define_insn_reservation "power10-fpload-update-double" 4
139   (and (eq_attr "type" "fpload")
140        (eq_attr "update" "yes")
141        (eq_attr "size" "64")
142        (eq_attr "cpu" "power10"))
143   "DU_even_power10,LU_power10+SXU_power10")
145 ; SFmode loads are cracked and have additional 3 cycles over DFmode
146 ; Prefixed forms behave the same
147 (define_insn_reservation "power10-fpload-single" 7
148   (and (eq_attr "type" "fpload")
149        (eq_attr "update" "no")
150        (eq_attr "size" "32")
151        (eq_attr "cpu" "power10"))
152   "DU_even_power10,LU_power10")
154 (define_insn_reservation "power10-fpload-update-single" 7
155   (and (eq_attr "type" "fpload")
156        (eq_attr "update" "yes")
157        (eq_attr "size" "32")
158        (eq_attr "cpu" "power10"))
159   "DU_even_power10,LU_power10+SXU_power10")
161 (define_insn_reservation "power10-vecload" 4
162   (and (eq_attr "type" "vecload")
163        (eq_attr "size" "!256")
164        (eq_attr "cpu" "power10"))
165   "DU_any_power10,LU_power10")
167 ; lxvp
168 (define_insn_reservation "power10-vecload-pair" 4
169   (and (eq_attr "type" "vecload")
170        (eq_attr "size" "256")
171        (eq_attr "cpu" "power10"))
172   "DU_even_power10,LU_power10+SXU_power10")
174 ; Store Unit
175 (define_insn_reservation "power10-store" 0
176   (and (eq_attr "type" "store,fpstore,vecstore")
177        (eq_attr "update" "no")
178        (eq_attr "prefixed" "no")
179        (eq_attr "size" "!128")
180        (eq_attr "size" "!256")
181        (eq_attr "cpu" "power10"))
182   "DU_any_power10,STU_power10")
184 (define_insn_reservation "power10-fused-store" 0
185   (and (eq_attr "type" "fused_store_store")
186        (eq_attr "cpu" "power10"))
187   "DU_even_power10,STU_power10")
189 (define_insn_reservation "power10-prefixed-store" 0
190   (and (eq_attr "type" "store,fpstore,vecstore")
191        (eq_attr "prefixed" "yes")
192        (eq_attr "size" "!128")
193        (eq_attr "size" "!256")
194        (eq_attr "cpu" "power10"))
195   "DU_even_power10,STU_power10")
197 ; Update forms have 2 cycle latency for updated addr reg
198 (define_insn_reservation "power10-store-update" 2
199   (and (eq_attr "type" "store,fpstore")
200        (eq_attr "update" "yes")
201        (eq_attr "cpu" "power10"))
202   "DU_any_power10,STU_power10")
204 ; stxvp
205 (define_insn_reservation "power10-vecstore-pair" 0
206   (and (eq_attr "type" "vecstore")
207        (eq_attr "size" "256")
208        (eq_attr "cpu" "power10"))
209   "DU_even_power10,stu0_power10+stu1_power10")
211 (define_insn_reservation "power10-larx" 4
212   (and (eq_attr "type" "load_l")
213        (eq_attr "size" "!128")
214        (eq_attr "cpu" "power10"))
215   "DU_any_power10,LU_power10")
217 ; All load quad forms
218 (define_insn_reservation "power10-lq" 4
219   (and (eq_attr "type" "load,load_l")
220        (eq_attr "size" "128")
221        (eq_attr "cpu" "power10"))
222   "DU_even_power10,LU_power10+SXU_power10")
224 (define_insn_reservation "power10-stcx" 0
225   (and (eq_attr "type" "store_c")
226        (eq_attr "size" "!128")
227        (eq_attr "cpu" "power10"))
228   "DU_any_power10,STU_power10")
230 ; All store quad forms
231 (define_insn_reservation "power10-stq" 0
232   (and (eq_attr "type" "store,store_c")
233        (eq_attr "size" "128")
234        (eq_attr "cpu" "power10"))
235   "DU_even_power10,stu0_power10+stu1_power10")
237 (define_insn_reservation "power10-sync" 1
238   (and (eq_attr "type" "sync,isync")
239        (eq_attr "cpu" "power10"))
240   "DU_even_power10,STU_power10")
243 ; VSU Execution Unit
245 ; Fixed point ops
247 ; Most ALU insns are simple 2 cycle, including record form
248 (define_insn_reservation "power10-alu" 2
249   (and (eq_attr "type" "add,exts,integer,logical,isel")
250        (eq_attr "prefixed" "no")
251        (eq_attr "cpu" "power10"))
252   "DU_any_power10,EXU_power10")
253 ; 4 cycle CR latency
254 (define_bypass 4 "power10-alu"
255                  "power10-crlogical,power10-mfcr,power10-mfcrf")
257 (define_insn_reservation "power10-fused_alu" 2
258   (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry")
259        (eq_attr "cpu" "power10"))
260   "DU_even_power10,EXU_power10")
262 ; paddi
263 (define_insn_reservation "power10-paddi" 2
264   (and (eq_attr "type" "add")
265        (eq_attr "prefixed" "yes")
266        (eq_attr "cpu" "power10"))
267   "DU_even_power10,EXU_power10")
269 ; Rotate/shift (non-record form)
270 (define_insn_reservation "power10-rot" 2
271   (and (eq_attr "type" "insert,shift")
272        (eq_attr "dot" "no")
273        (eq_attr "cpu" "power10"))
274   "DU_any_power10,EXU_power10")
276 ; Record form rotate/shift
277 (define_insn_reservation "power10-rot-compare" 3
278   (and (eq_attr "type" "insert,shift")
279        (eq_attr "dot" "yes")
280        (eq_attr "cpu" "power10"))
281   "DU_any_power10,EXU_power10")
282 ; 5 cycle CR latency
283 (define_bypass 5 "power10-rot-compare"
284                  "power10-crlogical,power10-mfcr,power10-mfcrf")
286 (define_insn_reservation "power10-alu2" 3
287   (and (eq_attr "type" "cntlz,popcnt,trap")
288        (eq_attr "cpu" "power10"))
289   "DU_any_power10,EXU_power10")
290 ; 5 cycle CR latency
291 (define_bypass 5 "power10-alu2"
292                  "power10-crlogical,power10-mfcr,power10-mfcrf")
294 (define_insn_reservation "power10-cmp" 2
295   (and (eq_attr "type" "cmp")
296        (eq_attr "cpu" "power10"))
297   "DU_any_power10,EXU_power10")
299 ; Treat 'two' and 'three' types as 2 or 3 way cracked
300 (define_insn_reservation "power10-two" 4
301   (and (eq_attr "type" "two")
302        (eq_attr "cpu" "power10"))
303   "DU_even_power10,EXU_power10")
305 (define_insn_reservation "power10-three" 6
306   (and (eq_attr "type" "three")
307        (eq_attr "cpu" "power10"))
308   "DU_all_power10,EXU_power10")
310 (define_insn_reservation "power10-mul" 5
311   (and (eq_attr "type" "mul")
312        (eq_attr "dot" "no")
313        (eq_attr "cpu" "power10"))
314   "DU_any_power10,EXU_power10")
315 ; 4 cycle MUL->MUL latency
316 (define_bypass 4 "power10-mul"
317                  "power10-mul,power10-mul-compare")
319 (define_insn_reservation "power10-mul-compare" 5
320   (and (eq_attr "type" "mul")
321        (eq_attr "dot" "yes")
322        (eq_attr "cpu" "power10"))
323   "DU_even_power10,EXU_power10")
324 ; 4 cycle MUL->MUL latency
325 (define_bypass 4 "power10-mul-compare"
326                  "power10-mul,power10-mul-compare")
327 ; 7 cycle CR latency
328 (define_bypass 7 "power10-mul-compare"
329                  "power10-crlogical,power10-mfcr,power10-mfcrf")
331 (define_insn_reservation "power10-div" 12
332   (and (eq_attr "type" "div")
333        (eq_attr "dot" "no")
334        (eq_attr "cpu" "power10"))
335   "DU_any_power10,EXU_power10")
337 (define_insn_reservation "power10-div-compare" 12
338   (and (eq_attr "type" "div")
339        (eq_attr "dot" "yes")
340        (eq_attr "cpu" "power10"))
341   "DU_even_power10,EXU_power10")
342 ; 14 cycle CR latency
343 (define_bypass 14 "power10-div-compare"
344                  "power10-crlogical,power10-mfcr,power10-mfcrf")
346 (define_insn_reservation "power10-crlogical" 2
347   (and (eq_attr "type" "cr_logical")
348        (eq_attr "cpu" "power10"))
349   "DU_any_power10,EXU_power10")
351 (define_insn_reservation "power10-mfcrf" 2
352   (and (eq_attr "type" "mfcrf")
353        (eq_attr "cpu" "power10"))
354   "DU_any_power10,EXU_power10")
356 (define_insn_reservation "power10-mfcr" 3
357   (and (eq_attr "type" "mfcr")
358        (eq_attr "cpu" "power10"))
359   "DU_even_power10,EXU_power10")
361 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
362 ; is cracked
363 (define_insn_reservation "power10-mtcr" 3
364   (and (eq_attr "type" "mtcr")
365        (eq_attr "cpu" "power10"))
366   "DU_any_power10,EXU_power10")
368 (define_insn_reservation "power10-mtjmpr" 3
369   (and (eq_attr "type" "mtjmpr")
370        (eq_attr "cpu" "power10"))
371   "DU_any_power10,EXU_power10")
373 (define_insn_reservation "power10-mfjmpr" 2
374   (and (eq_attr "type" "mfjmpr")
375        (eq_attr "cpu" "power10"))
376   "DU_any_power10,EXU_power10")
379 ; Floating point/Vector ops
381 (define_insn_reservation "power10-fpsimple" 3
382   (and (eq_attr "type" "fpsimple")
383        (eq_attr "cpu" "power10"))
384   "DU_any_power10,EXU_power10")
386 (define_insn_reservation "power10-fp" 5
387   (and (eq_attr "type" "fp,dmul")
388        (eq_attr "cpu" "power10"))
389   "DU_any_power10,EXU_power10")
391 (define_insn_reservation "power10-fpcompare" 3
392   (and (eq_attr "type" "fpcompare")
393        (eq_attr "cpu" "power10"))
394   "DU_any_power10,EXU_power10")
396 (define_insn_reservation "power10-sdiv" 22
397   (and (eq_attr "type" "sdiv")
398        (eq_attr "cpu" "power10"))
399   "DU_any_power10,EXU_power10")
401 (define_insn_reservation "power10-ddiv" 27
402   (and (eq_attr "type" "ddiv")
403        (eq_attr "cpu" "power10"))
404   "DU_any_power10,EXU_power10")
406 (define_insn_reservation "power10-sqrt" 26
407   (and (eq_attr "type" "ssqrt")
408        (eq_attr "cpu" "power10"))
409   "DU_any_power10,EXU_power10")
411 (define_insn_reservation "power10-dsqrt" 36
412   (and (eq_attr "type" "dsqrt")
413        (eq_attr "cpu" "power10"))
414   "DU_any_power10,EXU_power10")
416 (define_insn_reservation "power10-vec-2cyc" 2
417   (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
418        (eq_attr "cpu" "power10"))
419   "DU_any_power10,EXU_power10")
421 (define_insn_reservation "power10-fused-vec" 2
422   (and (eq_attr "type" "fused_vector")
423        (eq_attr "cpu" "power10"))
424   "DU_even_power10,EXU_power10")
426 (define_insn_reservation "power10-veccmp" 3
427   (and (eq_attr "type" "veccmp")
428        (eq_attr "cpu" "power10"))
429   "DU_any_power10,EXU_power10")
431 (define_insn_reservation "power10-vecsimple" 2
432   (and (eq_attr "type" "vecsimple")
433        (eq_attr "cpu" "power10"))
434   "DU_any_power10,EXU_power10")
436 (define_insn_reservation "power10-vecnormal" 5
437   (and (eq_attr "type" "vecfloat,vecdouble")
438        (eq_attr "size" "!128")
439        (eq_attr "cpu" "power10"))
440   "DU_any_power10,EXU_power10")
442 (define_insn_reservation "power10-qp" 12
443   (and (eq_attr "type" "vecfloat,vecdouble")
444        (eq_attr "size" "128")
445        (eq_attr "cpu" "power10"))
446   "DU_any_power10,EXU_power10")
448 (define_insn_reservation "power10-vecperm" 3
449   (and (eq_attr "type" "vecperm")
450        (eq_attr "prefixed" "no")
451        (eq_attr "dot" "no")
452        (eq_attr "cpu" "power10"))
453   "DU_any_power10,EXU_power10")
455 (define_insn_reservation "power10-vecperm-compare" 3
456   (and (eq_attr "type" "vecperm")
457        (eq_attr "dot" "yes")
458        (eq_attr "cpu" "power10"))
459   "DU_even_power10,EXU_power10")
461 (define_insn_reservation "power10-prefixed-vecperm" 3
462   (and (eq_attr "type" "vecperm")
463        (eq_attr "prefixed" "yes")
464        (eq_attr "cpu" "power10"))
465   "DU_even_power10,EXU_power10")
467 (define_insn_reservation "power10-veccomplex" 6
468   (and (eq_attr "type" "veccomplex")
469        (eq_attr "cpu" "power10"))
470   "DU_any_power10,EXU_power10")
472 (define_insn_reservation "power10-vecfdiv" 24
473   (and (eq_attr "type" "vecfdiv")
474        (eq_attr "cpu" "power10"))
475   "DU_any_power10,EXU_power10")
477 (define_insn_reservation "power10-vecdiv" 27
478   (and (eq_attr "type" "vecdiv")
479        (eq_attr "size" "!128")
480        (eq_attr "cpu" "power10"))
481   "DU_any_power10,EXU_power10")
483 (define_insn_reservation "power10-qpdiv" 56
484   (and (eq_attr "type" "vecdiv")
485        (eq_attr "size" "128")
486        (eq_attr "cpu" "power10"))
487   "DU_any_power10,EXU_power10")
489 (define_insn_reservation "power10-qpmul" 24
490   (and (eq_attr "type" "qmul")
491        (eq_attr "size" "128")
492        (eq_attr "cpu" "power10"))
493   "DU_any_power10,EXU_power10")
495 (define_insn_reservation "power10-mtvsr" 2
496   (and (eq_attr "type" "mtvsr")
497        (eq_attr "cpu" "power10"))
498   "DU_any_power10,EXU_power10")
500 (define_insn_reservation "power10-mfvsr" 2
501   (and (eq_attr "type" "mfvsr")
502        (eq_attr "cpu" "power10"))
503   "DU_any_power10,EXU_power10")
506 ; Branch
507 ; Branch is 2 cycles, grouped with STU for issue
508 (define_insn_reservation "power10-branch" 2
509   (and (eq_attr "type" "jmpreg,branch")
510        (eq_attr "cpu" "power10"))
511   "DU_any_power10,STU_power10")
513 (define_insn_reservation "power10-fused-branch" 3
514   (and (eq_attr "type" "fused_mtbc")
515        (eq_attr "cpu" "power10"))
516   "DU_even_power10,STU_power10")
519 ; Crypto
520 (define_insn_reservation "power10-crypto" 4
521   (and (eq_attr "type" "crypto")
522        (eq_attr "cpu" "power10"))
523   "DU_any_power10,EXU_power10")
526 ; HTM
527 (define_insn_reservation "power10-htm" 2
528   (and (eq_attr "type" "htmsimple,htm")
529        (eq_attr "cpu" "power10"))
530   "DU_any_power10,EXU_power10")
533 ; DFP
534 ; Use the minimum 12 cycle latency for all DFP insns
535 (define_insn_reservation "power10-dfp" 12
536   (and (eq_attr "type" "dfp")
537        (eq_attr "size" "!128")
538        (eq_attr "cpu" "power10"))
539   "DU_any_power10,EXU_power10")
541 (define_insn_reservation "power10-dfpq" 12
542   (and (eq_attr "type" "dfp")
543        (eq_attr "size" "128")
544        (eq_attr "cpu" "power10"))
545   "DU_even_power10,EXU_power10")
547 ; MMA
548 (define_insn_reservation "power10-mma" 9
549   (and (eq_attr "type" "mma")
550        (eq_attr "prefixed" "no")
551        (eq_attr "cpu" "power10"))
552   "DU_any_power10,EXU_super_power10")
554 (define_insn_reservation "power10-prefixed-mma" 9
555   (and (eq_attr "type" "mma")
556        (eq_attr "prefixed" "yes")
557        (eq_attr "cpu" "power10"))
558   "DU_even_power10,EXU_super_power10")
559 ; 4 cycle MMA->MMA latency
560 (define_bypass 4 "power10-mma,power10-prefixed-mma"
561                  "power10-mma,power10-prefixed-mma")