diagnostics: rename tree-diagnostic-path.cc to diagnostic-path.cc
[official-gcc.git] / gcc / config / aarch64 / thunderx3t110.md
blobf0ece79f62b220351ff0b149a5a3f5730daafcef
1 ;; Cavium ThunderX 3 CN11xx pipeline description
2 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
3 ;;
4 ;; Contributed by Marvell
6 ;; This file is part of GCC.
8 ;; GCC is free software; you can redistribute it and/or modify
9 ;; it under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; any later version.
13 ;; GCC is distributed in the hope that it will be useful,
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 ;; GNU General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3.  If not see
20 ;; <http://www.gnu.org/licenses/>.
22 (define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst")
23 (define_automaton "thunderx3t110_mult")
25 (define_cpu_unit "thunderx3t110_i0" "thunderx3t110")
26 (define_cpu_unit "thunderx3t110_i1" "thunderx3t110")
27 (define_cpu_unit "thunderx3t110_i2" "thunderx3t110")
28 (define_cpu_unit "thunderx3t110_i3" "thunderx3t110")
30 (define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst")
31 (define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst")
32 (define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst")
34 ; Pseudo-units for multiply pipeline.
35 ; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots
37 (define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult")
38 (define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult")
39 (define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult")
41 ; Pseudo-units for load delay (assuming dcache hit).
43 (define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst")
44 (define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst")
45 (define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst")
47 (define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst")
48 (define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst")
49 (define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst")
51 ; Define FP units f0/f1/f2/f3.
52 (define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd")
53 (define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd")
54 (define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd")
55 (define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd")
57 (define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3")
58 (define_reservation "thunderx3t110_i01"
59     "thunderx3t110_i0|thunderx3t110_i1")
60 (define_reservation "thunderx3t110_i012"
61     "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2")
62 (define_reservation "thunderx3t110_i0123"
63     "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3")
64 (define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1")
65 (define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1")
66 (define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3")
67 (define_reservation "thunderx3t110_f0123"
68     "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3")
70 ; A load with delay in the ls0/ls1 pipes.
71 ; this is always a delay of four
72 (define_reservation "thunderx3t110_l0delay"
73     "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\
74      thunderx3t110_ls0d3")
75 (define_reservation "thunderx3t110_l1delay"
76     "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\
77      thunderx3t110_ls1d3")
78 (define_reservation "thunderx3t110_l01delay"
79     "thunderx3t110_l0delay|thunderx3t110_l1delay")
80 ;; Branch and call instructions.
82 (define_insn_reservation "thunderx3t110_branch" 1
83   (and (eq_attr "tune" "thunderx3t110")
84        (eq_attr "type" "call,branch,trap"))
85   "thunderx3t110_i23")
87 ;; Misc instructions.
89 ; Speculation barrier
90 (define_insn_reservation "thunderx3t110_nothing" 0
91   (and (eq_attr "tune" "thunderx3t110")
92        (eq_attr "type" "block"))
93   "nothing")
95 (define_insn_reservation "thunderx3t110_mrs" 0
96   (and (eq_attr "tune" "thunderx3t110")
97        (eq_attr "type" "mrs"))
98   "thunderx3t110_i2")
100 (define_insn_reservation "thunderx3t110_multiple" 1
101   (and (eq_attr "tune" "thunderx3t110")
102        (eq_attr "type" "multiple"))
103   "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\
104    thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\
105    thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1")
107 ;; Integer arithmetic/logic instructions.
109 ; Plain register moves are handled by renaming,
110 ; and don't create any uops.
111 (define_insn_reservation "thunderx3t110_regmove" 0
112   (and (eq_attr "tune" "thunderx3t110")
113        (eq_attr "type" "mov_reg"))
114   "nothing")
116 (define_insn_reservation "thunderx3t110_alu_basic" 1
117   (and (eq_attr "tune" "thunderx3t110")
118        (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\
119                         adc_reg,adc_imm,adcs_reg,adcs_imm,\
120                         logic_reg,logic_imm,logics_reg,logics_imm,\
121                         csel,adr,mov_imm,shift_reg,shift_imm,bfm,\
122                         bfx,rbit,rev,extend,rotate_imm"))
123   "thunderx3t110_i0123")
125 ; distinguish between latency 1|2 and throughput 1/4|2/4?
126 ; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3}
127 (define_insn_reservation "thunderx3t110_alu_shift" 2
128   (and (eq_attr "tune" "thunderx3t110")
129        (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,\
130                         alus_shift_imm,alus_ext,\
131                         logic_shift_imm,logics_shift_imm"))
132   "thunderx3t110_i0123")
134 (define_insn_reservation "thunderx3t110_alu_shift1" 1
135   (and (eq_attr "tune" "thunderx3t110")
136        (eq_attr "type" "alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,\
137                         alus_shift_imm,alus_ext,\
138                         logic_shift_imm,logics_shift_imm"))
139   "thunderx3t110_i01")
141 ; we are going for the optimistic answer (13)
142 ; for now, the worst case is 23
143 (define_insn_reservation "thunderx3t110_div" 13
144   (and (eq_attr "tune" "thunderx3t110")
145        (eq_attr "type" "sdiv,udiv"))
146   "thunderx3t110_i1*3")
148 (define_insn_reservation "thunderx3t110_madd" 5
149   (and (eq_attr "tune" "thunderx3t110")
150        (eq_attr "type" "mla,smlal,umlal"))
151   "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\
152    thunderx3t110_i012")
154 ; NOTE: smull, umull are used for "high part" multiplies too.
155 ; mul is alias for MADD
156 ; it has to be distinguished between smulh, umulh (4,1) and
157 ; other (5,1) but there is no such a type, so, we go for the
158 ; conservative approach of (5,1) for now
159 ; smulh, umulh only runs on I1
160 (define_insn_reservation "thunderx3t110_mul" 5
161   (and (eq_attr "tune" "thunderx3t110")
162        (eq_attr "type" "mul,smull,umull"))
163   "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3")
165 (define_insn_reservation "thunderx3t110_countbits" 3
166   (and (eq_attr "tune" "thunderx3t110")
167        (eq_attr "type" "clz"))
168   "thunderx3t110_i1")
170 ;; Integer loads and stores.
172 ; load_4 matches prefetch, a multitude of move/str/dup variants,
173 ; sign extend
174 (define_insn_reservation "thunderx3t110_load_basic" 4
175   (and (eq_attr "tune" "thunderx3t110")
176        (eq_attr "type" "load_4"))
177   "thunderx3t110_ls01")
179 ; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load
180 (define_insn_reservation "thunderx3t110_loadpair" 5
181   (and (eq_attr "tune" "thunderx3t110")
182        (eq_attr "type" "load_8,load_16"))
183   "thunderx3t110_i012,thunderx3t110_ls01")
185 (define_insn_reservation "thunderx3t110_store_basic" 1
186   (and (eq_attr "tune" "thunderx3t110")
187        (eq_attr "type" "store_4"))
188   "thunderx3t110_ls01,thunderx3t110_sd")
190 ; model use of I0/I1/I2/I3 for index versions, model differing
191 ; throughputs
192 (define_insn_reservation "thunderx3t110_storepair_basic" 1
193   (and (eq_attr "tune" "thunderx3t110")
194        (eq_attr "type" "store_8,store_16"))
195   "thunderx3t110_ls01,thunderx3t110_sd")
197 ;; FP data processing instructions.
199 (define_insn_reservation "thunderx3t110_fp_simple" 5
200   (and (eq_attr "tune" "thunderx3t110")
201        (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd"))
202   "thunderx3t110_f0123")
204 ; distinguish latency 3/4 throughput 1/2|1/4
205 (define_insn_reservation "thunderx3t110_fp_addsub3" 3
206   (and (eq_attr "tune" "thunderx3t110")
207        (eq_attr "type" "fadds,faddd"))
208   "thunderx3t110_f23")
209 (define_insn_reservation "thunderx3t110_fp_addsub4" 4
210   (and (eq_attr "tune" "thunderx3t110")
211        (eq_attr "type" "fadds,faddd"))
212   "thunderx3t110_f0123")
214 (define_insn_reservation "thunderx3t110_fp_cmp" 4
215   (and (eq_attr "tune" "thunderx3t110")
216        (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
217   "thunderx3t110_f0123")
219 ; need to split out latency 23 throughput 23/4: F64 from
220 ; latency 16 throughput  16/4: FDIV F32
221 (define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16
222   (and (eq_attr "tune" "thunderx3t110")
223        (eq_attr "type" "fdivs,fsqrts"))
224   "thunderx3t110_f0*3|thunderx3t110_f1*3|\
225    thunderx3t110_f2*3|thunderx3t110_f3*3")
227 (define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23
228   (and (eq_attr "tune" "thunderx3t110")
229        (eq_attr "type" "fdivd,fsqrtd"))
230   "thunderx3t110_f0*5|thunderx3t110_f1*5|\
231    thunderx3t110_f2*5|thunderx3t110_f3*5")
233 (define_insn_reservation "thunderx3t110_fp_mul_mac" 5
234   (and (eq_attr "tune" "thunderx3t110")
235        (eq_attr "type" "fmuls,fmuld,fmacs,fmacd"))
236   "thunderx3t110_f01")
238 (define_insn_reservation "thunderx3t110_frint" 5
239   (and (eq_attr "tune" "thunderx3t110")
240        (eq_attr "type" "f_rints,f_rintd"))
241   "thunderx3t110_f0123")
243 ; mimic latency 3|4 throughput 1/2|1/4
244 (define_insn_reservation "thunderx3t110_fcsel3" 3
245   (and (eq_attr "tune" "thunderx3t110")
246        (eq_attr "type" "fcsel"))
247   "thunderx3t110_f23")
249 (define_insn_reservation "thunderx3t110_fcsel4" 4
250   (and (eq_attr "tune" "thunderx3t110")
251        (eq_attr "type" "fcsel"))
252   "thunderx3t110_f0123")
254 ;; FP miscellaneous instructions.
256 (define_insn_reservation "thunderx3t110_fp_cvt" 5
257   (and (eq_attr "tune" "thunderx3t110")
258        (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f"))
259   "thunderx3t110_f0123")
261 ; even though f_mrc has to belong to fp_mov_to_gen
262 ; we retain this for the sake of legacy as codegen
263 ; doesn't use it anyway
264 (define_insn_reservation "thunderx3t110_fp_mov3" 3
265   (and (eq_attr "tune" "thunderx3t110")
266        (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
267   "thunderx3t110_f23")
269 (define_insn_reservation "thunderx3t110_fp_mov" 4
270   (and (eq_attr "tune" "thunderx3t110")
271        (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
272   "thunderx3t110_f0123")
274 (define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4
275   (and (eq_attr "tune" "thunderx3t110")
276        (eq_attr "type" "f_mcr"))
277   "thunderx3t110_f0123")
279 ;; FP loads and stores.
280 ;  model use of I0/I1/I2 for post/pre index modes
282 (define_insn_reservation "thunderx3t110_fp_load_basic" 4
283   (and (eq_attr "tune" "thunderx3t110")
284        (eq_attr "type" "f_loads,f_loadd"))
285   "thunderx3t110_ls01")
287 ; model throughput 1
288 (define_insn_reservation "thunderx3t110_fp_store_basic" 1
289   (and (eq_attr "tune" "thunderx3t110")
290        (eq_attr "type" "f_stores,f_stored"))
291   "thunderx3t110_ls01,thunderx3t110_sd")
293 ;; ASIMD integer instructions.
295 (define_insn_reservation "thunderx3t110_asimd_int" 5
296   (and (eq_attr "tune" "thunderx3t110")
297        (eq_attr "type" "neon_abd,neon_abd_q,\
298                         neon_arith_acc,neon_arith_acc_q,\
299                         neon_abs,neon_abs_q,\
300                         neon_add,neon_add_q,\
301                         neon_sub,neon_sub_q,\
302                         neon_neg,neon_neg_q,\
303                         neon_add_long,neon_add_widen,\
304                         neon_add_halve,neon_add_halve_q,\
305                         neon_sub_long,neon_sub_widen,\
306                         neon_sub_halve,neon_sub_halve_q,\
307                         neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
308                         neon_qabs,neon_qabs_q,\
309                         neon_qadd,neon_qadd_q,\
310                         neon_qneg,neon_qneg_q,\
311                         neon_qsub,neon_qsub_q,\
312                         neon_minmax,neon_minmax_q,\
313                         neon_reduc_minmax,neon_reduc_minmax_q,\
314                         neon_mul_b,neon_mul_h,neon_mul_s,\
315                         neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\
316                         neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\
317                         neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\
318                         neon_mla_b,neon_mla_h,neon_mla_s,\
319                         neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\
320                         neon_mul_b_long,neon_mul_h_long,\
321                         neon_mul_s_long,neon_mul_d_long,\
322                         neon_sat_mul_b_long,neon_sat_mul_h_long,\
323                         neon_sat_mul_s_long,\
324                         neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
325                         neon_sat_mla_b_long,neon_sat_mla_h_long,\
326                         neon_sat_mla_s_long,\
327                         neon_shift_acc,neon_shift_acc_q,\
328                         neon_shift_imm,neon_shift_imm_q,\
329                         neon_shift_reg,neon_shift_reg_q,\
330                         neon_shift_imm_long,neon_shift_imm_narrow_q,\
331                         neon_sat_shift_imm,neon_sat_shift_imm_q,\
332                         neon_sat_shift_reg,neon_sat_shift_reg_q,\
333                         neon_sat_shift_imm_narrow_q"))
334   "thunderx3t110_f0123")
336 ; neon_reduc_add is used for both addp and [su]adalp
337 (define_insn_reservation "thunderx3t110_asimd_reduc_add" 5
338   (and (eq_attr "tune" "thunderx3t110")
339        (eq_attr "type" "neon_reduc_add,neon_reduc_add_q"))
340   "thunderx3t110_f01")
342 (define_insn_reservation "thunderx3t110_asimd_cmp" 5
343   (and (eq_attr "tune" "thunderx3t110")
344        (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\
345                         neon_tst,neon_tst_q"))
346   "thunderx3t110_f0123")
348 ; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and,
349 ;   simd mov immediate; orr, simd mov immediate; eor; not (mvn)
350 ; latency 4 throughput 1/2 LS0/LS1: ldr
351 ; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str
352 ; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn,
353 ;   bic, and, orr, eor, not (mvn)
354 ; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec
355 ; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov
356 (define_insn_reservation "thunderx3t110_asimd_logic4" 4
357   (and (eq_attr "tune" "thunderx3t110")
358        (eq_attr "type" "neon_logic,neon_logic_q"))
359   "thunderx3t110_f23")
361 (define_insn_reservation "thunderx3t110_asimd_logic5" 5
362   (and (eq_attr "tune" "thunderx3t110")
363        (eq_attr "type" "neon_logic,neon_logic_q"))
364   "thunderx3t110_f0123")
366 ;; ASIMD floating-point instructions.
368 ; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg
369 ; latency 4 throughput 1/4: fcmp
370 (define_insn_reservation "thunderx3t110_asimd_fp_simple" 5
371   (and (eq_attr "tune" "thunderx3t110")
372        (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\
373                         neon_fp_abs_s_q,neon_fp_abs_d_q,\
374                         neon_fp_compare_s,neon_fp_compare_d,\
375                         neon_fp_compare_s_q,neon_fp_compare_d_q,\
376                         neon_fp_minmax_s,neon_fp_minmax_d,\
377                         neon_fp_minmax_s_q,neon_fp_minmax_d_q,\
378                         neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\
379                         neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\
380                         neon_fp_neg_s,neon_fp_neg_d,\
381                         neon_fp_neg_s_q,neon_fp_neg_d_q"))
382   "thunderx3t110_f0123")
384 ; distinguish between latency 3 throughput 1/2,
385 ; latency 4 throughput 1/4
386 ; neon_fp_reduc_add_<stype><q> is used for both faddp and
387 ; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4
388 (define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3
389   (and (eq_attr "tune" "thunderx3t110")
390        (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
391                         neon_fp_abd_s_q,neon_fp_abd_d_q,\
392                         neon_fp_addsub_s,neon_fp_addsub_d,\
393                         neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
394                         neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
395                         neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
396   "thunderx3t110_f23")
398 (define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4
399   (and (eq_attr "tune" "thunderx3t110")
400        (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
401                         neon_fp_abd_s_q,neon_fp_abd_d_q,\
402                         neon_fp_addsub_s,neon_fp_addsub_d,\
403                         neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
404                         neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
405                         neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
406   "thunderx3t110_f0123")
408 (define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5
409   (and (eq_attr "tune" "thunderx3t110")
410        (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\
411                         neon_fp_mul_s_q,neon_fp_mul_d_q,\
412                         neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
413                         neon_fp_mla_s,neon_fp_mla_d,\
414                         neon_fp_mla_s_q,neon_fp_mla_d_q"))
415   "thunderx3t110_f0123")
417 ; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2
418 ; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>}
419 ;   where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract...
420 ; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other
421 ; The int_to_fp* is complicated
422 ;   neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf
423 ;   neon_int_to_fp_s_q,neon_int_to_fp_d_q
424 ; Round matches single define_insn, frint<frint_suffix>
425 ;   neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q,
426 ;   neon_fp_round_d_q: frint<frint_suffix>
427 ; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4
428 ; FRINT*: latency 5 throughput 1/4
429 (define_insn_reservation "thunderx3t110_asimd_fp_conv" 5
430   (and (eq_attr "tune" "thunderx3t110")
431        (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
432                         neon_fp_to_int_s,neon_fp_to_int_d,\
433                         neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
434                         neon_int_to_fp_s,neon_int_to_fp_d,\
435                         neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
436                         neon_fp_round_s,neon_fp_round_d,\
437                         neon_fp_round_s_q,neon_fp_round_d_q"))
438   "thunderx3t110_f0123")
440 ; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4
441 ; Q/F64: 23/4
442 (define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16
443   (and (eq_attr "tune" "thunderx3t110")
444        (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q"))
445   "thunderx3t110_f0123")
447 (define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23
448   (and (eq_attr "tune" "thunderx3t110")
449        (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q"))
450   "thunderx3t110_f0123")
452 ;; ASIMD miscellaneous instructions.
454 ;  divided out:
455 ;  rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4
456 ;  from_gp,from_gp_q : 4 | 1/4
457 ;  dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4
458 ;  permute,permute_q needs to depend on aarch64_expand_vec_perm_const does
459 ;  on TX3
460 (define_insn_reservation "thunderx3t110_asimd_misc3" 3
461   (and (eq_attr "tune" "thunderx3t110")
462        (eq_attr "type" "neon_rbit,\
463                         neon_bsl,neon_bsl_q,\
464                         neon_cls,neon_cls_q,\
465                         neon_cnt,neon_cnt_q,\
466                         neon_move,neon_move_q"))
467   "thunderx3t110_f23")
469 (define_insn_reservation "thunderx3t110_asimd_misc4" 4
470   (and (eq_attr "tune" "thunderx3t110")
471        (eq_attr "type" "neon_rbit,\
472                         neon_bsl,neon_bsl_q,\
473                         neon_cls,neon_cls_q,\
474                         neon_cnt,neon_cnt_q,\
475                         neon_from_gp,neon_from_gp_q,\
476                         neon_move,neon_move_q"))
477   "thunderx3t110_f0123")
479 (define_insn_reservation "thunderx3t110_asimd_misc" 5
480   (and (eq_attr "tune" "thunderx3t110")
481        (eq_attr "type" "
482                         neon_dup,neon_dup_q,\
483                         neon_ext,neon_ext_q,\
484                         neon_ins,neon_ins_q,\
485                         neon_move,neon_move_q,\
486                         neon_fp_recpe_s,neon_fp_recpe_d,\
487                         neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
488                         neon_fp_recpx_s,neon_fp_recpx_d,\
489                         neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
490                         neon_rev,neon_rev_q,\
491                         neon_permute,neon_permute_q"))
492   "thunderx3t110_f0123")
494 (define_insn_reservation "thunderx3t110_asimd_recip_step" 5
495   (and (eq_attr "tune" "thunderx3t110")
496        (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
497                         neon_fp_recps_d,neon_fp_recps_d_q,\
498                         neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
499                         neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
500                         neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
501                         neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
502                         neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
503                         neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
504   "thunderx3t110_f0123")
506 (define_insn_reservation "thunderx3t110_asimd_lut1" 5
507   (and (eq_attr "tune" "thunderx3t110")
508        (eq_attr "type" "neon_tbl1,neon_tbl1_q"))
509   "thunderx3t110_f0123")
511 (define_insn_reservation "thunderx3t110_asimd_lut2" 10
512   (and (eq_attr "tune" "thunderx3t110")
513        (eq_attr "type" "neon_tbl2,neon_tbl2_q"))
514   "thunderx3t110_f0123")
516 (define_insn_reservation "thunderx3t110_asimd_lut3" 15
517   (and (eq_attr "tune" "thunderx3t110")
518        (eq_attr "type" "neon_tbl3,neon_tbl3_q"))
519   "thunderx3t110_f0123")
521 (define_insn_reservation "thunderx3t110_asimd_lut4" 20
522   (and (eq_attr "tune" "thunderx3t110")
523        (eq_attr "type" "neon_tbl4,neon_tbl4_q"))
524   "thunderx3t110_f0123")
526 (define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5
527   (and (eq_attr "tune" "thunderx3t110")
528        (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
529   "thunderx3t110_f0123")
531 ;; ASIMD load instructions.
533 ; NOTE: These reservations attempt to model latency and throughput
534 ; correctly, but the cycle timing of unit allocation is not
535 ; necessarily accurate (because insns are split into uops, and those
536 ; may be issued out-of-order).
538 ; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4
539 ; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These
540 ;forms, as documented, do not use the I0/I1/I2 units (no I3), but the
541 ; other LDP ones do.
542 (define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5
543   (and (eq_attr "tune" "thunderx3t110")
544        (eq_attr "type" "neon_ldp,neon_ldp_q"))
545   "thunderx3t110_i012,thunderx3t110_ls01")
547 ; Need to distinguish latency 6 throughput 2: 4 reg D/Q
548 ; latency 5 throughput 3/2: 3 reg D/Q
549 ; latency 4 throughput 1: 2 reg D/Q
550 ; latency 4 throughput 1/2: 1 reg D/Q
551 (define_insn_reservation "thunderx3t110_asimd_load1" 4
552   (and (eq_attr "tune" "thunderx3t110")
553        (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
554                         neon_load1_2reg,neon_load1_2reg_q,\
555                         neon_load1_3reg,neon_load1_3reg_q,\
556                         neon_load1_4reg,neon_load1_4reg_q"))
557   "thunderx3t110_ls01")
559 (define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5
560   (and (eq_attr "tune" "thunderx3t110")
561        (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q"))
562   "thunderx3t110_l01delay,thunderx3t110_f0123")
564 (define_insn_reservation "thunderx3t110_asimd_load1_all" 5
565   (and (eq_attr "tune" "thunderx3t110")
566        (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q"))
567   "thunderx3t110_l01delay,thunderx3t110_f0123")
569 (define_insn_reservation "thunderx3t110_asimd_load2" 5
570   (and (eq_attr "tune" "thunderx3t110")
571        (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
572                         neon_load2_one_lane,neon_load2_one_lane_q,\
573                         neon_load2_all_lanes,neon_load2_all_lanes_q"))
574   "thunderx3t110_l01delay,thunderx3t110_f0123")
576 (define_insn_reservation "thunderx3t110_asimd_load3" 7
577   (and (eq_attr "tune" "thunderx3t110")
578        (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
579                         neon_load3_one_lane,neon_load3_one_lane_q,\
580                         neon_load3_all_lanes,neon_load3_all_lanes_q"))
581   "thunderx3t110_l01delay,thunderx3t110_f0123")
583 (define_insn_reservation "thunderx3t110_asimd_load4" 8
584   (and (eq_attr "tune" "thunderx3t110")
585        (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
586                         neon_load4_one_lane,neon_load4_one_lane_q,\
587                         neon_load4_all_lanes,neon_load4_all_lanes_q"))
588   "thunderx3t110_l01delay,thunderx3t110_f0123")
590 ;; ASIMD store instructions.
592 ; Same note applies as for ASIMD load instructions.
594 ; Vector Store pair Need to distinguish:
595 ; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D
596 ; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q
597 ; all index modes use I0/I1/I2
598 (define_insn_reservation "thunderx3t110_asimd_store_stp" 1
599   (and (eq_attr "tune" "thunderx3t110")
600        (eq_attr "type" "neon_stp,neon_stp_q"))
601   "thunderx3t110_ls01,thunderx3t110_sd")
603 ; There are multiple forms of ST1
604 ; The following two groups, as documented, do not use the FP pipelines.
605 ; multiple, 1 reg, D-form     ST1
606 ; tx2_ltp:    x    1/2     LS0/LS1
607 ; tx3_ltp:    x    1/2     LS0/LS1
608 ; multiple, 1 reg, Q-form     ST1
609 ; tx2_ltp:    x    1/2     LS0/LS1
610 ; tx3_ltp:    x    1/2     LS0/LS1
612 ; one lane, B/H/S         ST1
613 ; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
614 ; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
615 ; one lane, D             ST1
616 ; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
617 ; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
618 ;; Model for st1 insn needs refinement for different register forms
619 ; multiple, 2 reg, D-form     ST1     x    1     LS0/LS1
620 ; multiple, 2 reg, Q-form     ST1     x    1     LS0/LS1
621 ; multiple, 3 reg, D-form     ST1     x    3/2     LS0/LS1
622 ; multiple, 3 reg, Q-form     ST1     x    3/2     LS0/LS1
623 ; multiple,4 reg, D-form         ST1     x    2     LS0/LS1
624 ; multiple,4 reg, Q-form         ST1     x    2     LS0/LS1
625 (define_insn_reservation "thunderx3t110_asimd_store1" 1
626   (and (eq_attr "tune" "thunderx3t110")
627        (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
628                         neon_store1_2reg,neon_store1_2reg_q,\
629                         neon_store1_3reg,neon_store1_4reg"))
630   "thunderx3t110_ls01")
632 (define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1
633   (and (eq_attr "tune" "thunderx3t110")
634        (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
635   "thunderx3t110_ls01,thunderx3t110_f0123")
637 ; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and
638 ; throughput 1/2: one lane B/H/S/D
639 (define_insn_reservation "thunderx3t110_asimd_store2" 1
640   (and (eq_attr "tune" "thunderx3t110")
641        (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
642                         neon_store2_one_lane,neon_store2_one_lane_q"))
643   "thunderx3t110_ls01,thunderx3t110_f0123")
645 ; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and
646 ; throughput 1: one lane B/H/S/D
647 (define_insn_reservation "thunderx3t110_asimd_store3" 1
648   (and (eq_attr "tune" "thunderx3t110")
649        (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
650                         neon_store3_one_lane,neon_store3_one_lane_q"))
651   "thunderx3t110_ls01,thunderx3t110_f0123")
653 ; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and
654 ; throughput 1: one lane B/H/S/D? (not in doc)
655 (define_insn_reservation "thunderx3t110_asimd_store4" 1
656   (and (eq_attr "tune" "thunderx3t110")
657        (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
658                         neon_store4_one_lane,neon_store4_one_lane_q"))
659   "thunderx3t110_ls01,thunderx3t110_f0123")
661 ;; Crypto extensions.
663 (define_insn_reservation "thunderx3t110_aes" 4
664   (and (eq_attr "tune" "thunderx3t110")
665        (eq_attr "type" "crypto_aese,crypto_aesmc"))
666   "thunderx3t110_f0123")
668 (define_insn_reservation "thunderx3t110_sha" 5
669   (and (eq_attr "tune" "thunderx3t110")
670        (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
671                         crypto_sha256_fast,crypto_sha256_slow"))
672   "thunderx3t110_f0123")
674 ;; CRC extension.
676 (define_insn_reservation "thunderx3t110_crc" 3
677   (and (eq_attr "tune" "thunderx3t110")
678        (eq_attr "type" "crc"))
679   "thunderx3t110_i1")
681 ;; PMULL extension.
683 (define_insn_reservation "thunderx3t110_pmull" 5
684   (and (eq_attr "tune" "thunderx3t110")
685        (eq_attr "type" "crypto_pmull"))
686   "thunderx3t110_f0123")