PR target/84064
[official-gcc.git] / gcc / config / arm / exynos-m1.md
blobe8f962f18757dd67f28cf415113a95742ed41cae
1 ;; Samsung Exynos M1 pipeline description
2 ;; Copyright (C) 2014-2018 Free Software Foundation, Inc.
3 ;;
4 ;; This file is part of GCC.
5 ;;
6 ;; GCC is free software; you can redistribute it and/or modify it
7 ;; under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
9 ;; any later version.
11 ;; GCC is distributed in the hope that it will be useful, but
12 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 ;; General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3.  If not see
18 ;; <http://www.gnu.org/licenses/>.
20 (define_attr "exynos_m1_neon_type"
21   "neon_arith_simple, neon_arith_basic, neon_arith_complex,
22    neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long,
23    neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex,
24    neon_shift_reg_basic, neon_shift_reg_basic_q,
25    neon_shift_reg_complex, neon_shift_reg_complex_q,
26    neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare,
27    neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt,
28    neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q,
29    neon_fp_estimate, neon_fp_estimatex, neon_fp_step,
30    neon_bitops, neon_bitops_q, neon_bitins,
31    neon_to_gp, neon_from_gp, neon_move, neon_tbl,
32    neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4,
33    neon_load1_one, neon_load1_all,
34    neon_load2_2, neon_load2_one, neon_load2_all,
35    neon_load3_3, neon_load3_one, neon_load3_all,
36    neon_load4_4, neon_load4_one, neon_load4_all,
37    neon_store,
38    neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one,
39    neon_store2_2, neon_store2_one,
40    neon_store3_3, neon_store3_one,
41    neon_store4_4, neon_store4_one,
42    unknown"
43   (cond [
44           (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\
45                            neon_abs, neon_abs_q,\
46                            neon_minmax, neon_minmax_q")
47             (const_string "neon_arith_simple")
49           (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
50                            neon_neg, neon_neg_q,\
51                            neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\
52                            neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
53                            neon_compare_zero, neon_compare_zero_q")
54             (const_string "neon_arith_basic")
56           (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\
57                            neon_reduc_add, neon_reduc_add_q,\
58                            neon_reduc_add_acc, neon_reduc_add_acc_q,\
59                            neon_reduc_add_long, neon_add_halve_narrow_q,\
60                            neon_add_halve, neon_add_halve_q,\
61                            neon_sub_halve, neon_sub_halve_q, neon_qabs,\
62                            neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
63                            neon_qneg_q, neon_qsub, neon_qsub_q,\
64                            neon_sub_halve_narrow_q,\
65                            neon_compare, neon_compare_q,\
66                            neon_reduc_minmax, neon_reduc_minmax_q")
67             (const_string "neon_arith_complex")
69           (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\
70                            neon_mul_s, neon_mul_s_q,\
71                            neon_mul_h_scalar, neon_mul_h_scalar_q,\
72                            neon_mul_s_scalar, neon_mul_s_scalar_q,\
73                            neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
74                            neon_sat_mul_b, neon_sat_mul_b_q,\
75                            neon_sat_mul_h, neon_sat_mul_h_q,\
76                            neon_sat_mul_s, neon_sat_mul_s_q,\
77                            neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
78                            neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
79                            neon_sat_mul_b_long, neon_sat_mul_h_long,\
80                            neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
81                            neon_sat_mul_s_scalar_long, crypto_pmull")
82             (const_string "neon_multiply")
84           (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
85                            neon_mla_h_scalar, neon_mla_s_scalar,\
86                            neon_mla_b_long, neon_mla_h_long,\
87                            neon_mla_s_long,\
88                            neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
89                            neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
90                            neon_mla_h_scalar_q, neon_mla_s_scalar_q")
91             (const_string "neon_mla")
93           (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
94                            neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
95                            neon_sat_mla_s_scalar_long")
96             (const_string "neon_sat_mla_long")
98           (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
99             (const_string "neon_shift_acc")
101           (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
102                            neon_shift_imm_narrow_q, neon_shift_imm_long")
103             (const_string "neon_shift_imm_basic")
105           (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
106                            neon_sat_shift_imm_narrow_q")
107             (const_string "neon_shift_imm_complex")
109           (eq_attr "type" "neon_shift_reg, neon_shift_reg_q")
110             (const_string "neon_shift_reg_basic")
112           (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q")
113             (const_string "neon_shift_reg_complex")
115           (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
116                            neon_fp_abs_s, neon_fp_abs_s_q,\
117                            neon_fp_neg_d, neon_fp_neg_d_q,\
118                            neon_fp_abs_d, neon_fp_abs_d_q")
119             (const_string "neon_fp_unary")
121           (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\
122                            neon_fp_addsub_d, neon_fp_addsub_d_q")
123             (const_string "neon_fp_add")
125           (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\
126                            neon_fp_abd_d, neon_fp_abd_d_q")
127             (const_string "neon_fp_abd")
129           (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\
130                            neon_fp_compare_d, neon_fp_compare_d_q,\
131                            neon_fp_minmax_s, neon_fp_minmax_s_q,\
132                            neon_fp_minmax_d, neon_fp_minmax_d_q")
133             (const_string "neon_fp_compare")
135           (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\
136                            neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q")
137             (const_string "neon_fp_reduc_minmax")
139           (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
140                            neon_fp_reduc_add_d, neon_fp_reduc_add_d_q")
141             (const_string "neon_fp_reduc_add")
143           (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\
144                            neon_fp_round_d, neon_fp_round_d_q")
145             (const_string "neon_fp_round")
147           (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h,
148                            neon_fp_to_int_s, neon_fp_to_int_s_q,\
149                            neon_fp_to_int_d_q, neon_fp_to_int_d,\
150                            neon_int_to_fp_s, neon_int_to_fp_s_q,\
151                            neon_int_to_fp_d, neon_int_to_fp_d_q")
152             (const_string "neon_fp_cvt")
154           (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\
155                            neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\
156                            neon_fp_mul_d, neon_fp_mul_d_q,\
157                            neon_fp_mul_d_scalar_q")
158             (const_string "neon_fp_mul")
160           (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\
161                            neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
162                            neon_fp_mla_d, neon_fp_mla_d_q,\
163                            neon_fp_mla_d_scalar_q")
164             (const_string "neon_fp_mla")
166           (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\
167                            neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
168                            neon_fp_recpe_d, neon_fp_recpe_d_q,\
169                            neon_fp_rsqrte_d, neon_fp_rsqrte_d_q")
170             (const_string "neon_fp_estimate")
172           (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\
173                            neon_fp_recpx_d, neon_fp_recpx_d_q")
174             (const_string "neon_fp_estimatex")
176           (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\
177                            neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
178                            neon_fp_recps_d, neon_fp_recps_d_q,\
179                            neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")
180             (const_string "neon_fp_step")
182           (eq_attr "type" "neon_rbit, neon_rbit_q,\
183                            neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\
184                            neon_dup, neon_dup_q,\
185                            neon_rev, neon_rev_q,\
186                            neon_move, neon_move_q,
187                            neon_ext, neon_permute, neon_zip")
188             (const_string "neon_bitops")
190           (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q")
191             (const_string "neon_bitops_q")
193           (eq_attr "type" "neon_bsl, neon_bsl_q")
194             (const_string "neon_bitins")
196           (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4")
197             (const_string "neon_tbl")
199           (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr")
200             (const_string "neon_from_gp")
202           (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc")
203             (const_string "neon_to_gp")
205           (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")
206             (const_string "neon_load1_1")
208           (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
209             (const_string "neon_load1_2")
211           (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q")
212             (const_string "neon_load1_3")
214           (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q")
215             (const_string "neon_load1_4")
217           (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q")
218             (const_string "neon_load1_one")
220           (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q")
221             (const_string "neon_load1_all")
223           (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\
224                            neon_load2_4reg, neon_load2_4reg_q")
225             (const_string "neon_load2_2")
227           (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q")
228             (const_string "neon_load2_one")
230           (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q")
231             (const_string "neon_load2_all")
233           (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q")
234             (const_string "neon_load3_3")
236           (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q")
237             (const_string "neon_load3_one")
239           (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q")
240             (const_string "neon_load3_all")
242           (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q")
243             (const_string "neon_load4_4")
245           (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
246             (const_string "neon_load4_one")
248           (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
249             (const_string "neon_load4_all")
251           (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
252             (const_string "neon_store1_1")
254           (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
255             (const_string "neon_store1_2")
257           (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
258             (const_string "neon_store1_3")
260           (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
261             (const_string "neon_store1_4")
263           (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q")
264             (const_string "neon_store1_one")
266           (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
267                            neon_store2_4reg, neon_store2_4reg_q")
268             (const_string "neon_store2_2")
270           (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q")
271             (const_string "neon_store2_one")
273           (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
274             (const_string "neon_store3_3")
276           (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q")
277             (const_string "neon_store3_one")
279           (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q")
280             (const_string "neon_store4_4")
282           (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q")
283             (const_string "neon_store4_one")]
285           (const_string "unknown")))
287 ;; The Exynos M1 core is modeled as a triple issue pipeline that has
288 ;; the following functional units.
290 (define_automaton "exynos_m1_gp")
291 (define_automaton "exynos_m1_ls")
292 (define_automaton "exynos_m1_fp")
294 ;; 1.  Two pipelines for simple integer operations: A, B
295 ;; 2.  One pipeline for simple or complex integer operations: C
297 (define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
299 (define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
300 (define_reservation "em1_c" "em1_xc")
302 ;; 3.  Two asymmetric pipelines for Neon and FP operations: F0, F1
304 (define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
306 (define_reservation "em1_fmac" "em1_f0")
307 (define_reservation "em1_fcvt" "em1_f0")
308 (define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
309 (define_reservation "em1_nalu0" "em1_f0")
310 (define_reservation "em1_nalu1" "em1_f1")
311 (define_reservation "em1_nmisc" "em1_f0")
312 (define_reservation "em1_ncrypt" "em1_f0")
313 (define_reservation "em1_fadd" "em1_f1")
314 (define_reservation "em1_fvar" "em1_f1")
315 (define_reservation "em1_fst" "em1_f1")
317 ;; 4.  One pipeline for branch operations: BX
319 (define_cpu_unit "em1_bx" "exynos_m1_gp")
321 (define_reservation "em1_br" "em1_bx")
323 ;; 5.  One AGU for loads: L
324 ;;     One AGU for stores and one pipeline for stores: S, SD
326 (define_cpu_unit "em1_lx" "exynos_m1_ls")
327 (define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
329 (define_reservation "em1_ld" "em1_lx")
330 (define_reservation "em1_st" "(em1_sx + em1_sd)")
332 ;; Common occurrences
333 (define_reservation "em1_sfst" "(em1_fst + em1_st)")
334 (define_reservation "em1_lfst" "(em1_fst + em1_ld)")
336 ;; Branches
338 ;; No latency as there is no result
339 ;; TODO: Unconditional branches use no units;
340 ;; conditional branches add the BX unit;
341 ;; indirect branches add the C unit.
342 (define_insn_reservation "exynos_m1_branch" 0
343   (and (eq_attr "tune" "exynosm1")
344        (eq_attr "type" "branch"))
345   "em1_br")
347 (define_insn_reservation "exynos_m1_call" 1
348   (and (eq_attr "tune" "exynosm1")
349        (eq_attr "type" "call"))
350   "em1_alu")
352 ;; Basic ALU
354 ;; Simple ALU without shift, non-predicated
355 (define_insn_reservation "exynos_m1_alu" 1
356   (and (eq_attr "tune" "exynosm1")
357        (and (not (eq_attr "predicated" "yes"))
358             (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
359                              alu_sreg, alus_sreg, logic_reg, logics_reg,\
360                              adc_imm, adcs_imm, adc_reg, adcs_reg,\
361                              adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\
362                              shift_imm, shift_reg, rotate_imm, extend,\
363                              mov_imm, mov_reg,\
364                              mvn_imm, mvn_reg,\
365                              mrs, multiple")))
366   "em1_alu")
368 ;; Simple ALU without shift, predicated
369 (define_insn_reservation "exynos_m1_alu_p" 1
370   (and (eq_attr "tune" "exynosm1")
371        (and (eq_attr "predicated" "yes")
372             (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
373                              alu_sreg, alus_sreg, logic_reg, logics_reg,\
374                              adc_imm, adcs_imm, adc_reg, adcs_reg,\
375                              adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\
376                              shift_imm, shift_reg, rotate_imm, extend,\
377                              mov_imm, mov_reg,\
378                              mvn_imm, mvn_reg,\
379                              mrs, multiple")))
380   "em1_c")
382 ;; ALU ops with immediate shift
383 ;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle;
384 ;;       otherwise it takes 2 cycles and the unit is blocked;
385 ;;       for now, assume the latter's latency and the former's units.
386 (define_insn_reservation "exynos_m1_alu_shift" 2
387   (and (eq_attr "tune" "exynosm1")
388        (eq_attr "type" "alu_ext, alus_ext,\
389                         alu_shift_imm, alus_shift_imm,\
390                         logic_shift_imm, logics_shift_imm,\
391                         mov_shift, mvn_shift"))
392   "(em1_alu)")
394 ;; ALU ops with register controlled shift, non-predicated
395 (define_insn_reservation "exynos_m1_alu_shift_reg" 2
396   (and (eq_attr "tune" "exynosm1")
397        (and (not (eq_attr "predicated" "yes"))
398             (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
399                              logic_shift_reg, logics_shift_reg,\
400                              mov_shift_reg, mvn_shift_reg")))
401    "(em1_alu * 2)")
403 ;; ALU ops with register controlled shift, predicated
404 (define_insn_reservation "exynos_m1_alu_shift_reg_p" 2
405   (and (eq_attr "tune" "exynosm1")
406        (and (eq_attr "predicated" "yes")
407             (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
408                              logic_shift_reg, logics_shift_reg,\
409                              mov_shift_reg, mvn_shift_reg")))
410   "(em1_alu, em1_c)")
412 ;; Integer multiply
413 (define_insn_reservation "exynos_m1_mla" 3
414   (and (eq_attr "tune" "exynosm1")
415        (eq_attr "mul32" "yes"))
416   "em1_c")
418 (define_insn_reservation "exynos_m1_mlal" 4
419   (and (eq_attr "tune" "exynosm1")
420        (eq_attr "mul64" "yes"))
421   "em1_alu, em1_c")
423 ;; Integer divide
424 ;; TODO: assume the median latency; blocks other divisions
425 (define_insn_reservation "exynos_m1_div" 13
426   (and (eq_attr "tune" "exynosm1")
427        (eq_attr "type" "udiv, sdiv"))
428   "em1_c")
430 ;; Load-store execution Unit
432 ;; Loads of up to 2 words.
433 (define_insn_reservation "exynos_m1_load" 4
434   (and (eq_attr "tune" "exynosm1")
435        (eq_attr "type" "load_byte, load_4, load_8"))
436   "em1_ld")
438 ;; Loads of 3 or 4 words.
439 (define_insn_reservation "exynos_m1_loadm" 6
440   (and (eq_attr "tune" "exynosm1")
441        (eq_attr "type" "load_12, load_16"))
442   "(em1_ld * 3)")
444 ;; Stores of up to 2 words.
445 (define_insn_reservation "exynos_m1_store" 1
446   (and (eq_attr "tune" "exynosm1")
447        (eq_attr "type" "store_4, store_8"))
448   "em1_st")
450 ;; Stores of 3 or 4 words.
451 (define_insn_reservation "exynos_m1_storem" 3
452   (and (eq_attr "tune" "exynosm1")
453        (eq_attr "type" "store_12, store_16"))
454   "(em1_st * 3)")
456 ;; Advanced SIMD Unit
458 ;; Integer Arithmetic Instructions.
460 (define_insn_reservation  "exynos_m1_arith_simple" 1
461   (and (eq_attr "tune" "exynosm1")
462        (eq_attr "exynos_m1_neon_type" "neon_arith_simple"))
463   "em1_nmisc")
465 (define_insn_reservation  "exynos_m1_neon_arith_basic" 2
466   (and (eq_attr "tune" "exynosm1")
467        (eq_attr "exynos_m1_neon_type" "neon_arith_basic"))
468   "em1_nalu")
470 (define_insn_reservation  "exynos_m1_neon_arith_complex" 3
471   (and (eq_attr "tune" "exynosm1")
472        (eq_attr "exynos_m1_neon_type" "neon_arith_complex"))
473   "em1_nmisc")
475 ;; Integer Multiply Instructions.
477 (define_insn_reservation "exynos_m1_neon_multiply" 4
478   (and (eq_attr "tune" "exynosm1")
479        (eq_attr "exynos_m1_neon_type"
480                 "neon_multiply, neon_mla, neon_sat_mla_long"))
481   "em1_nmisc")
483 ;; Integer Shift Instructions.
485 (define_insn_reservation
486   "exynos_m1_neon_shift_acc" 4
487   (and (eq_attr "tune" "exynosm1")
488        (eq_attr "exynos_m1_neon_type" "neon_shift_acc"))
489   "em1_nalu1")
491 (define_insn_reservation
492   "exynos_m1_neon_shift_basic" 2
493   (and (eq_attr "tune" "exynosm1")
494        (eq_attr "exynos_m1_neon_type"
495                 "neon_shift_imm_basic, neon_shift_reg_basic"))
496   "em1_nalu")
498 (define_insn_reservation
499   "exynos_m1_neon_shift_complex" 4
500   (and (eq_attr "tune" "exynosm1")
501        (eq_attr "exynos_m1_neon_type"
502                 "neon_shift_imm_complex, neon_shift_reg_complex"))
503   "em1_nalu1")
505 ;; Floating Point Instructions.
507 (define_insn_reservation
508   "exynos_m1_neon_fp_unary" 2
509   (and (eq_attr "tune" "exynosm1")
510        (eq_attr "exynos_m1_neon_type" "neon_fp_unary"))
511   "em1_nalu")
513 (define_insn_reservation
514   "exynos_m1_neon_fp_add" 4
515   (and (eq_attr "tune" "exynosm1")
516        (eq_attr "exynos_m1_neon_type" "neon_fp_add"))
517   "em1_fadd")
519 (define_insn_reservation
520   "exynos_m1_neon_fp_abd" 3
521   (and (eq_attr "tune" "exynosm1")
522        (eq_attr "exynos_m1_neon_type" "neon_fp_abd"))
523   "em1_nmisc")
525 (define_insn_reservation
526   "exynos_m1_neon_fp_compare" 1
527   (and (eq_attr "tune" "exynosm1")
528        (eq_attr "exynos_m1_neon_type" "neon_fp_compare"))
529   "em1_nmisc")
531 ;; TODO: the latency and throughput of reduce insns actually varies between
532 ;; 3-5 and 1/4-1, but picked the median values.
533 (define_insn_reservation
534   "exynos_m1_neon_fp_reduc" 5
535   (and (eq_attr "tune" "exynosm1")
536        (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax"))
537   "(em1_nmisc * 4)")
539 (define_insn_reservation
540   "exynos_m1_neon_fp_reduc_add" 10
541   (and (eq_attr "tune" "exynosm1")
542        (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add"))
543   "((em1_nalu * 2), em1_fadd)")
545 (define_insn_reservation
546   "exynos_m1_neon_fp_round" 4
547   (and (eq_attr "tune" "exynosm1")
548        (eq_attr "exynos_m1_neon_type" "neon_fp_round"))
549   "em1_fcvt")
551 (define_insn_reservation
552   "exynos_m1_neon_fp_cvt" 4
553   (and (eq_attr "tune" "exynosm1")
554        (eq_attr "exynos_m1_neon_type" "neon_fp_cvt"))
555   "em1_fcvt")
557 (define_insn_reservation
558   "exynos_m1_neon_fp_mul" 5
559   (and (eq_attr "tune" "exynosm1")
560        (eq_attr "exynos_m1_neon_type" "neon_fp_mul"))
561   "em1_fmac")
563 (define_insn_reservation
564   "exynos_m1_neon_fp_mla" 6
565   (and (eq_attr "tune" "exynosm1")
566        (eq_attr "exynos_m1_neon_type" "neon_fp_mla"))
567   "em1_fmac")
569 (define_insn_reservation
570   "exynos_m1_neon_fp_estimate" 5
571   (and (eq_attr "tune" "exynosm1")
572        (eq_attr "exynos_m1_neon_type" "neon_fp_estimate"))
573   "em1_fcvt")
575 (define_insn_reservation
576   "exynos_m1_neon_fp_estimatex" 1
577   (and (eq_attr "tune" "exynosm1")
578        (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex"))
579   "em1_nmisc")
581 (define_insn_reservation
582   "exynos_m1_neon_fp_step" 6
583   (and (eq_attr "tune" "exynosm1")
584        (eq_attr "exynos_m1_neon_type" "neon_fp_step"))
585   "em1_fmac")
587 ;; Miscellaneous Instructions.
589 (define_insn_reservation
590   "exynos_m1_neon_bitops" 2
591   (and (eq_attr "tune" "exynosm1")
592        (eq_attr "exynos_m1_neon_type" "neon_bitops"))
593   "em1_nalu")
595 (define_insn_reservation
596   "exynos_m1_neon_bitops_q" 3
597   (and (eq_attr "tune" "exynosm1")
598        (eq_attr "exynos_m1_neon_type" "neon_bitops_q"))
599   "(em1_nalu, em1_nalu)")
601 (define_insn_reservation
602   "exynos_m1_neon_bitins" 2
603   (and (eq_attr "tune" "exynosm1")
604        (eq_attr "exynos_m1_neon_type" "neon_bitins"))
605   "em1_nalu1")
607 ;; TODO: it is more complicated than this.
608 (define_insn_reservation
609   "exynos_m1_neon_tbl" 2
610   (and (eq_attr "tune" "exynosm1")
611        (eq_attr "exynos_m1_neon_type" "neon_tbl"))
612   "em1_nalu1")
614 (define_insn_reservation
615   "exynos_m1_neon_from_gp" 4
616   (and (eq_attr "tune" "exynosm1")
617        (eq_attr "exynos_m1_neon_type" "neon_from_gp"))
618   "em1_st")
620 (define_insn_reservation
621   "exynos_m1_neon_to_gp" 9
622   (and (eq_attr "tune" "exynosm1")
623        (eq_attr "exynos_m1_neon_type" "neon_to_gp"))
624   "em1_lfst")
626 ;; Load Instructions.
628 (define_insn_reservation
629   "exynos_m1_neon_load" 5
630   (and (eq_attr "tune" "exynosm1")
631        (eq_attr "type" "f_loads, f_loadd, neon_ldp"))
632   "em1_ld")
634 (define_insn_reservation
635   "exynos_m1_neon_load_q" 6
636   (and (eq_attr "tune" "exynosm1")
637        (eq_attr "type" "neon_ldp_q"))
638   "(em1_ld, em1_ld)")
640 (define_insn_reservation
641   "exynos_m1_neon_load1_1" 6
642   (and (eq_attr "tune" "exynosm1")
643        (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all"))
644   "em1_ld")
646 (define_insn_reservation
647   "exynos_m1_neon_load1_2" 6
648   (and (eq_attr "tune" "exynosm1")
649        (eq_attr "exynos_m1_neon_type" "neon_load1_2"))
650   "(em1_ld * 2)")
652 (define_insn_reservation
653   "exynos_m1_neon_load1_3" 7
654   (and (eq_attr "tune" "exynosm1")
655        (eq_attr "exynos_m1_neon_type" "neon_load1_3"))
656   "(em1_ld * 3)")
658 (define_insn_reservation
659   "exynos_m1_neon_load1_4" 8
660   (and (eq_attr "tune" "exynosm1")
661        (eq_attr "exynos_m1_neon_type" "neon_load1_4"))
662   "(em1_ld * 4)")
664 (define_insn_reservation
665   "exynos_m1_neon_load1_one" 7
666   (and (eq_attr "tune" "exynosm1")
667        (eq_attr "exynos_m1_neon_type" "neon_load1_one"))
668   "((em1_ld * 2), em1_nalu)")
670 (define_insn_reservation
671   "exynos_m1_neon_load2_2" 10
672   (and (eq_attr "tune" "exynosm1")
673        (eq_attr "exynos_m1_neon_type" "neon_load2_2"))
674   "(em1_ld * 5)")
676 (define_insn_reservation
677   "exynos_m1_neon_load2_one" 7
678   (and (eq_attr "tune" "exynosm1")
679        (eq_attr "exynos_m1_neon_type" "neon_load2_one"))
680   "((em1_ld * 2), (em1_nalu * 2))")
682 (define_insn_reservation
683   "exynos_m1_neon_load2_all" 6
684   (and (eq_attr "tune" "exynosm1")
685        (eq_attr "exynos_m1_neon_type" "neon_load2_all"))
686   "(em1_ld * 2)")
688 (define_insn_reservation
689   "exynos_m1_neon_load3_3" 12
690   (and (eq_attr "tune" "exynosm1")
691        (eq_attr "exynos_m1_neon_type" "neon_load3_3"))
692   "(em1_ld * 6)")
694 (define_insn_reservation
695   "exynos_m1_neon_load3_one" 9
696   (and (eq_attr "tune" "exynosm1")
697        (eq_attr "exynos_m1_neon_type" "neon_load3_one"))
698   "((em1_ld * 4), (em1_nalu * 3))")
700 (define_insn_reservation
701   "exynos_m1_neon_load3_all" 7
702   (and (eq_attr "tune" "exynosm1")
703        (eq_attr "exynos_m1_neon_type" "neon_load3_all"))
704   "(em1_ld * 3)")
706 (define_insn_reservation
707   "exynos_m1_neon_load4_4" 14
708   (and (eq_attr "tune" "exynosm1")
709        (eq_attr "exynos_m1_neon_type" "neon_load4_4"))
710   "(em1_ld * 7)")
712 (define_insn_reservation
713   "exynos_m1_neon_load4_one" 9
714   (and (eq_attr "tune" "exynosm1")
715        (eq_attr "exynos_m1_neon_type" "neon_load4_one"))
716   "((em1_ld * 4), (em1_nalu * 4))")
718 (define_insn_reservation
719   "exynos_m1_neon_load4_all" 8
720   (and (eq_attr "tune" "exynosm1")
721        (eq_attr "exynos_m1_neon_type" "neon_load4_all"))
722   "(em1_ld * 4)")
724 ;; Store Instructions.
726 (define_insn_reservation
727   "exynos_m1_neon_store" 1
728   (and (eq_attr "tune" "exynosm1")
729        (eq_attr "type" "f_stores, f_stored, neon_stp"))
730   "em1_sfst")
732 (define_insn_reservation
733   "exynos_m1_neon_store_q" 3
734   (and (eq_attr "tune" "exynosm1")
735        (eq_attr "type" "neon_stp_q"))
736   "(em1_sfst * 2)")
738 (define_insn_reservation
739   "exynos_m1_neon_store1_1" 1
740   (and (eq_attr "tune" "exynosm1")
741        (eq_attr "exynos_m1_neon_type" "neon_store1_1"))
742   "em1_sfst")
744 (define_insn_reservation
745   "exynos_m1_neon_store1_2" 2
746   (and (eq_attr "tune" "exynosm1")
747        (eq_attr "exynos_m1_neon_type" "neon_store1_2"))
748   "(em1_sfst * 2)")
750 (define_insn_reservation
751   "exynos_m1_neon_store1_3" 3
752   (and (eq_attr "tune" "exynosm1")
753        (eq_attr "exynos_m1_neon_type" "neon_store1_3"))
754   "(em1_sfst * 3)")
756 (define_insn_reservation
757   "exynos_m1_neon_store1_4" 4
758   (and (eq_attr "tune" "exynosm1")
759        (eq_attr "exynos_m1_neon_type" "neon_store1_4"))
760   "(em1_sfst * 4)")
762 (define_insn_reservation
763   "exynos_m1_neon_store1_one" 7
764   (and (eq_attr "tune" "exynosm1")
765        (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
766   "em1_sfst")
768 (define_insn_reservation
769   "exynos_m1_neon_store2" 7
770   (and (eq_attr "tune" "exynosm1")
771        (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one"))
772   "em1_sfst, em1_fst")
774 (define_insn_reservation
775   "exynos_m1_neon_store3" 16
776   (and (eq_attr "tune" "exynosm1")
777        (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one"))
778   "((em1_sfst * 3), (em1_fst * 2), em1_nalu)")
780 (define_insn_reservation
781   "exynos_m1_neon_store4" 17
782   (and (eq_attr "tune" "exynosm1")
783        (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one"))
784   "((em1_sfst * 4), (em1_fst * 2), em1_nalu)")
786 ;; Floating-Point Operations.
788 (define_insn_reservation "exynos_m1_fp_const" 2
789   (and (eq_attr "tune" "exynosm1")
790        (eq_attr "type" "fconsts, fconstd"))
791   "em1_nalu")
793 (define_insn_reservation "exynos_m1_fp_add" 4
794   (and (eq_attr "tune" "exynosm1")
795        (eq_attr "type" "fadds, faddd"))
796   "em1_fadd")
798 (define_insn_reservation "exynos_m1_fp_mul" 5
799   (and (eq_attr "tune" "exynosm1")
800        (eq_attr "type" "fmuls, fmuld"))
801   "em1_fmac")
803 (define_insn_reservation "exynos_m1_fp_mac" 6
804   (and (eq_attr "tune" "exynosm1")
805        (eq_attr "type" "fmacs, ffmas, fmacd, ffmad"))
806   "em1_fmac")
808 (define_insn_reservation "exynos_m1_fp_cvt" 4
809   (and (eq_attr "tune" "exynosm1")
810        (eq_attr "type" "f_cvt, f_rints, f_rintd"))
811   "em1_fcvt")
813 (define_insn_reservation "exynos_m1_fp_cvt_i" 13
814   (and (eq_attr "tune" "exynosm1")
815        (eq_attr "type" "f_cvtf2i"))
816   "(em1_fcvt, em1_lfst)")
818 (define_insn_reservation "exynos_m1_i_cvt_fp" 9
819   (and (eq_attr "tune" "exynosm1")
820        (eq_attr "type" "f_cvti2f"))
821   "(em1_st, em1_fcvt)")
823 (define_insn_reservation "exynos_m1_fp_cmp" 4
824   (and (eq_attr "tune" "exynosm1")
825        (eq_attr "type" "fcmps, fcmpd"))
826   "em1_nmisc")
828 (define_insn_reservation "exynos_m1_fp_ccmp" 7
829   (and (eq_attr "tune" "exynosm1")
830        (eq_attr "type" "fccmps, fccmpd"))
831   "(em1_st, em1_nmisc)")
833 (define_insn_reservation "exynos_m1_fp_sel" 4
834   (and (eq_attr "tune" "exynosm1")
835        (eq_attr "type" "fcsel"))
836   "(em1_st + em1_nalu0)")
838 (define_insn_reservation "exynos_m1_fp_arith" 2
839   (and (eq_attr "tune" "exynosm1")
840        (eq_attr "type" "ffariths, ffarithd"))
841   "em1_nalu")
843 (define_insn_reservation "exynos_m1_fp_cpy" 2
844   (and (eq_attr "tune" "exynosm1")
845        (eq_attr "type" "fmov"))
846   "em1_nalu")
848 (define_insn_reservation "exynos_m1_fp_divs" 15
849   (and (eq_attr "tune" "exynosm1")
850        (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\
851                         fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q"))
852   "(em1_fvar * 9)")
854 (define_insn_reservation "exynos_m1_fp_divd" 22
855   (and (eq_attr "tune" "exynosm1")
856        (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\
857                         fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q"))
858   "(em1_fvar * 9)")
860 (define_insn_reservation "exynos_m1_fp_minmax" 2
861   (and (eq_attr "tune" "exynosm1")
862        (eq_attr "type" "f_minmaxs, f_minmaxd"))
863   "(em1_nmisc * 2)")
865 ;; Crypto Operations.
867 (define_insn_reservation "exynos_m1_crypto_simple" 2
868   (and (eq_attr "tune" "exynosm1")
869        (eq_attr "type" "crypto_aese, crypto_aesmc,\
870                         crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast"))
871   "em1_ncrypt")
873 (define_insn_reservation "exynos_m1_crypto_complex" 6
874   (and (eq_attr "tune" "exynosm1")
875        (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
876   "em1_ncrypt")
878 (define_insn_reservation "exynos_m1_crypto_poly" 2
879   (and (eq_attr "tune" "exynosm1")
880        (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long"))
881   "em1_ncrypt")
883 (define_insn_reservation "exynos_m1_crypto_polyl" 4
884   (and (eq_attr "tune" "exynosm1")
885        (eq_attr "type" "neon_mul_d_long"))
886   "em1_ncrypt")
888 (define_insn_reservation "exynos_m1_crc" 2
889   (and (eq_attr "tune" "exynosm1")
890        (eq_attr "type" "crc"))
891   "em1_c")
893 ;; Simple execution unit bypasses
895 ;; Pre-decrement and post-increment addressing modes update the register quickly.
896 ;; TODO: figure out how to tell the addressing mode register from the loaded one.
897 (define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
898                  "exynos_m1_store*, exynos_m1_neon_store*,
899                   exynos_m1_load*, exynos_m1_neon_load*")
901 ;; MLAs can feed other MLAs quickly.
902 (define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
904 ;; Insns in FMAC or FADD can feed other such insns quickly.
905 (define_bypass 4 "exynos_m1_fp_mul"
906                  "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
907 (define_bypass 5 "exynos_m1_fp_mac"
908                  "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
909 (define_bypass 4 "exynos_m1_neon_fp_mul"
910                  "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
911                   exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
912 (define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
913                  "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
914                   exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
915 (define_bypass 3 "exynos_m1_fp_add"
916                  "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
917 (define_bypass 3 "exynos_m1_neon_fp_add"
918                  "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
919                   exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
921 ;; Insns in NALU can feed other such insns quickly.
922 (define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy"
923                  "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
924                   exynos_m1_fp_sel")
925 (define_bypass 3 "exynos_m1_fp_sel"
926                  "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
927                   exynos_m1_fp_sel")
928 (define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
929                   exynos_m1_neon_bitops, exynos_m1_neon_bitins,\
930                   exynos_m1_neon_tbl"
931                  "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
932                   exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
933                   exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
934                   exynos_m1_neon_tbl")
935 (define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex"
936                  "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
937                   exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
938                   exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
939                   exynos_m1_neon_tbl")
940 (define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary")
942 ;; Insns in NCRYPT can feed other such insns quickly.
943 (define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly"
944                  "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
945                   exynos_m1_crypto_poly*")
946 (define_bypass 3 "exynos_m1_crypto_polyl"
947                  "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
948                   exynos_m1_crypto_poly*")
949 (define_bypass 5 "exynos_m1_crypto_complex"
950                  "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
951                   exynos_m1_crypto_poly*")
953 ;; AES{D,E}/AESMC pairs can feed each other instantly.
954 (define_bypass 0 "exynos_m1_crypto_simple"
955                  "exynos_m1_crypto_simple"
956                  "aarch_crypto_can_dual_issue")
958 ;; Predicted branches take no time, but mispredicted ones take forever anyway.
959 (define_bypass 1 "exynos_m1_*"
960                  "exynos_m1_call, exynos_m1_branch")