1 ;; ARM Cortex-A53 pipeline description
2 ;; Copyright (C) 2013 Free Software Foundation, Inc.
4 ;; Contributed by ARM Ltd.
6 ;; This file is part of GCC.
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published by
10 ;; the Free Software Foundation; either version 3, or (at your option)
13 ;; GCC is distributed in the hope that it will be useful, but
14 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;; General Public License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
22 (define_automaton "cortex_a53")
24 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
26 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
28 ;; There are two main integer execution pipelines, described as
29 ;; slot 0 and issue slot 1.
31 (define_cpu_unit "cortex_a53_slot0" "cortex_a53")
32 (define_cpu_unit "cortex_a53_slot1" "cortex_a53")
34 (define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
35 (define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
37 ;; The load/store pipeline. Load/store instructions can dual-issue from
38 ;; either pipeline, but two load/stores cannot simultaneously issue.
40 (define_cpu_unit "cortex_a53_ls" "cortex_a53")
42 ;; The store pipeline. Shared between both execution pipelines.
44 (define_cpu_unit "cortex_a53_store" "cortex_a53")
46 ;; The branch pipeline. Branches can dual-issue with other instructions
47 ;; (except when those instructions take multiple cycles to issue).
49 (define_cpu_unit "cortex_a53_branch" "cortex_a53")
51 ;; The integer divider.
53 (define_cpu_unit "cortex_a53_idiv" "cortex_a53")
55 ;; The floating-point add pipeline used to model the usage
56 ;; of the add pipeline by fmac instructions.
58 (define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
60 ;; Floating-point div/sqrt (long latency, out-of-order completion).
62 (define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
64 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
66 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
68 (define_insn_reservation "cortex_a53_alu" 2
69 (and (eq_attr "tune" "cortexa53")
70 (eq_attr "type" "arlo_imm,arlo_reg,shift,shift_reg,\
71 mov_imm,mov_reg,mvn_imm,mvn_reg"))
72 "cortex_a53_slot_any")
74 (define_insn_reservation "cortex_a53_alu_shift" 2
75 (and (eq_attr "tune" "cortexa53")
76 (eq_attr "type" "arlo_shift,arlo_shift_reg,\
77 mov_shift,mov_shift_reg,\
78 mvn_shift,mvn_shift_reg"))
79 "cortex_a53_slot_any")
81 ;; Forwarding path for unshifted operands.
83 (define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
86 (define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
87 "cortex_a53_alu_shift"
88 "arm_no_early_alu_shift_dep")
90 ;; The multiplier pipeline can forward results so there's no need to specify
91 ;; bypasses. Multiplies can only single-issue currently.
93 (define_insn_reservation "cortex_a53_mul" 3
94 (and (eq_attr "tune" "cortexa53")
95 (ior (eq_attr "mul32" "yes")
96 (eq_attr "mul64" "yes")))
97 "cortex_a53_single_issue")
99 ;; A multiply with a single-register result or an MLA, followed by an
100 ;; MLA with an accumulator dependency, has its result forwarded so two
101 ;; such instructions can issue back-to-back.
103 (define_bypass 1 "cortex_a53_mul"
105 "arm_mac_accumulator_is_mul_result")
107 ;; Punt with a high enough latency for divides.
108 (define_insn_reservation "cortex_a53_udiv" 8
109 (and (eq_attr "tune" "cortexa53")
110 (eq_attr "type" "udiv"))
111 "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
113 (define_insn_reservation "cortex_a53_sdiv" 9
114 (and (eq_attr "tune" "cortexa53")
115 (eq_attr "type" "sdiv"))
116 "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
119 (define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
121 (define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
122 "cortex_a53_alu_shift"
123 "arm_no_early_alu_shift_dep")
125 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
126 ;; Load/store instructions.
127 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
129 ;; Address-generation happens in the issue stage.
131 (define_insn_reservation "cortex_a53_load1" 3
132 (and (eq_attr "tune" "cortexa53")
133 (eq_attr "type" "load_byte,load1"))
134 "cortex_a53_slot_any+cortex_a53_ls")
136 (define_insn_reservation "cortex_a53_store1" 2
137 (and (eq_attr "tune" "cortexa53")
138 (eq_attr "type" "store1"))
139 "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
141 (define_insn_reservation "cortex_a53_load2" 3
142 (and (eq_attr "tune" "cortexa53")
143 (eq_attr "type" "load2"))
144 "cortex_a53_single_issue+cortex_a53_ls")
146 (define_insn_reservation "cortex_a53_store2" 2
147 (and (eq_attr "tune" "cortexa53")
148 (eq_attr "type" "store2"))
149 "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
151 (define_insn_reservation "cortex_a53_load3plus" 4
152 (and (eq_attr "tune" "cortexa53")
153 (eq_attr "type" "load3,load4"))
154 "(cortex_a53_single_issue+cortex_a53_ls)*2")
156 (define_insn_reservation "cortex_a53_store3plus" 3
157 (and (eq_attr "tune" "cortexa53")
158 (eq_attr "type" "store3,store4"))
159 "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
161 ;; Load/store addresses are required early in Issue.
162 (define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
164 "arm_early_load_addr_dep")
165 (define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
167 "arm_early_store_addr_dep")
169 ;; Load data can forward in the ALU pipeline
170 (define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
172 (define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
173 "cortex_a53_alu_shift"
174 "arm_no_early_alu_shift_dep")
176 ;; ALU ops can forward to stores.
177 (define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
178 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
179 "arm_no_early_store_addr_dep")
181 (define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
182 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
183 "arm_no_early_store_addr_dep")
185 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
187 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
189 ;; Currently models all branches as dual-issuable from either execution
190 ;; slot, which isn't true for all cases. We still need to model indirect
193 (define_insn_reservation "cortex_a53_branch" 0
194 (and (eq_attr "tune" "cortexa53")
195 (eq_attr "type" "branch,call"))
196 "cortex_a53_slot_any+cortex_a53_branch")
198 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
199 ;; Floating-point arithmetic.
200 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
202 (define_insn_reservation "cortex_a53_fpalu" 4
203 (and (eq_attr "tune" "cortexa53")
204 (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fcpys, fmuls, f_cvt,\
206 "cortex_a53_slot0+cortex_a53_fpadd_pipe")
208 (define_insn_reservation "cortex_a53_fconst" 2
209 (and (eq_attr "tune" "cortexa53")
210 (eq_attr "type" "fconsts,fconstd"))
211 "cortex_a53_slot0+cortex_a53_fpadd_pipe")
213 (define_insn_reservation "cortex_a53_fpmul" 4
214 (and (eq_attr "tune" "cortexa53")
215 (eq_attr "type" "fmuls,fmuld"))
218 ;; For single-precision multiply-accumulate, the add (accumulate) is issued after
219 ;; the multiply completes. Model that accordingly.
221 (define_insn_reservation "cortex_a53_fpmac" 8
222 (and (eq_attr "tune" "cortexa53")
223 (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
224 "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
226 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
227 ;; Floating-point divide/square root instructions.
228 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
229 ;; fsqrt really takes one cycle less, but that is not modelled.
231 (define_insn_reservation "cortex_a53_fdivs" 14
232 (and (eq_attr "tune" "cortexa53")
233 (eq_attr "type" "fdivs"))
234 "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 13")
236 (define_insn_reservation "cortex_a53_fdivd" 29
237 (and (eq_attr "tune" "cortexa53")
238 (eq_attr "type" "fdivd"))
239 "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 28")
241 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
242 ;; VFP to/from core transfers.
243 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
245 (define_insn_reservation "cortex_a53_r2f" 4
246 (and (eq_attr "tune" "cortexa53")
247 (eq_attr "type" "r_2_f"))
250 (define_insn_reservation "cortex_a53_f2r" 2
251 (and (eq_attr "tune" "cortexa53")
252 (eq_attr "type" "f_2_r"))
255 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
256 ;; VFP flag transfer.
257 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
259 (define_insn_reservation "cortex_a53_f_flags" 4
260 (and (eq_attr "tune" "cortexa53")
261 (eq_attr "type" "f_flag"))
264 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
266 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
268 (define_insn_reservation "cortex_a53_f_loads" 4
269 (and (eq_attr "tune" "cortexa53")
270 (eq_attr "type" "f_loads"))
273 (define_insn_reservation "cortex_a53_f_loadd" 5
274 (and (eq_attr "tune" "cortexa53")
275 (eq_attr "type" "f_loadd"))
278 (define_insn_reservation "cortex_a53_f_stores" 0
279 (and (eq_attr "tune" "cortexa53")
280 (eq_attr "type" "f_stores"))
283 (define_insn_reservation "cortex_a53_f_stored" 0
284 (and (eq_attr "tune" "cortexa53")
285 (eq_attr "type" "f_stored"))
288 ;; Load-to-use for floating-point values has a penalty of one cycle,
289 ;; i.e. a latency of two.
291 (define_bypass 2 "cortex_a53_f_loads"
292 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
293 cortex_a53_fdivs, cortex_a53_fdivd,\
296 (define_bypass 2 "cortex_a53_f_loadd"
297 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
298 cortex_a53_fdivs, cortex_a53_fdivd,\