preprocessor: Create the parser before handling command-line includes [PR115312]
[official-gcc.git] / gcc / config / aarch64 / aarch64-builtins.cc
blob30669f8aa1823b64689c67e306d38e234bd31698
1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "ssa.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "langhooks.h"
44 #include "gimple-iterator.h"
45 #include "case-cfn-macros.h"
46 #include "emit-rtl.h"
47 #include "stringpool.h"
48 #include "attribs.h"
49 #include "gimple-fold.h"
50 #include "builtins.h"
51 #include "aarch64-builtins.h"
53 #define v8qi_UP E_V8QImode
54 #define v8di_UP E_V8DImode
55 #define v4hi_UP E_V4HImode
56 #define v4hf_UP E_V4HFmode
57 #define v2si_UP E_V2SImode
58 #define v2sf_UP E_V2SFmode
59 #define v1df_UP E_V1DFmode
60 #define v1di_UP E_V1DImode
61 #define di_UP E_DImode
62 #define df_UP E_DFmode
63 #define v16qi_UP E_V16QImode
64 #define v8hi_UP E_V8HImode
65 #define v8hf_UP E_V8HFmode
66 #define v4si_UP E_V4SImode
67 #define v4sf_UP E_V4SFmode
68 #define v2di_UP E_V2DImode
69 #define v2df_UP E_V2DFmode
70 #define ti_UP E_TImode
71 #define oi_UP E_OImode
72 #define ci_UP E_CImode
73 #define xi_UP E_XImode
74 #define si_UP E_SImode
75 #define sf_UP E_SFmode
76 #define hi_UP E_HImode
77 #define hf_UP E_HFmode
78 #define qi_UP E_QImode
79 #define bf_UP E_BFmode
80 #define v4bf_UP E_V4BFmode
81 #define v8bf_UP E_V8BFmode
82 #define v2x8qi_UP E_V2x8QImode
83 #define v2x4hi_UP E_V2x4HImode
84 #define v2x4hf_UP E_V2x4HFmode
85 #define v2x4bf_UP E_V2x4BFmode
86 #define v2x2si_UP E_V2x2SImode
87 #define v2x2sf_UP E_V2x2SFmode
88 #define v2x1di_UP E_V2x1DImode
89 #define v2x1df_UP E_V2x1DFmode
90 #define v2x16qi_UP E_V2x16QImode
91 #define v2x8hi_UP E_V2x8HImode
92 #define v2x8hf_UP E_V2x8HFmode
93 #define v2x8bf_UP E_V2x8BFmode
94 #define v2x4si_UP E_V2x4SImode
95 #define v2x4sf_UP E_V2x4SFmode
96 #define v2x2di_UP E_V2x2DImode
97 #define v2x2df_UP E_V2x2DFmode
98 #define v3x8qi_UP E_V3x8QImode
99 #define v3x4hi_UP E_V3x4HImode
100 #define v3x4hf_UP E_V3x4HFmode
101 #define v3x4bf_UP E_V3x4BFmode
102 #define v3x2si_UP E_V3x2SImode
103 #define v3x2sf_UP E_V3x2SFmode
104 #define v3x1di_UP E_V3x1DImode
105 #define v3x1df_UP E_V3x1DFmode
106 #define v3x16qi_UP E_V3x16QImode
107 #define v3x8hi_UP E_V3x8HImode
108 #define v3x8hf_UP E_V3x8HFmode
109 #define v3x8bf_UP E_V3x8BFmode
110 #define v3x4si_UP E_V3x4SImode
111 #define v3x4sf_UP E_V3x4SFmode
112 #define v3x2di_UP E_V3x2DImode
113 #define v3x2df_UP E_V3x2DFmode
114 #define v4x8qi_UP E_V4x8QImode
115 #define v4x4hi_UP E_V4x4HImode
116 #define v4x4hf_UP E_V4x4HFmode
117 #define v4x4bf_UP E_V4x4BFmode
118 #define v4x2si_UP E_V4x2SImode
119 #define v4x2sf_UP E_V4x2SFmode
120 #define v4x1di_UP E_V4x1DImode
121 #define v4x1df_UP E_V4x1DFmode
122 #define v4x16qi_UP E_V4x16QImode
123 #define v4x8hi_UP E_V4x8HImode
124 #define v4x8hf_UP E_V4x8HFmode
125 #define v4x8bf_UP E_V4x8BFmode
126 #define v4x4si_UP E_V4x4SImode
127 #define v4x4sf_UP E_V4x4SFmode
128 #define v4x2di_UP E_V4x2DImode
129 #define v4x2df_UP E_V4x2DFmode
130 #define UP(X) X##_UP
132 #define MODE_d_bf16 E_V4BFmode
133 #define MODE_d_f16 E_V4HFmode
134 #define MODE_d_f32 E_V2SFmode
135 #define MODE_d_f64 E_V1DFmode
136 #define MODE_d_s8 E_V8QImode
137 #define MODE_d_s16 E_V4HImode
138 #define MODE_d_s32 E_V2SImode
139 #define MODE_d_s64 E_V1DImode
140 #define MODE_d_u8 E_V8QImode
141 #define MODE_d_u16 E_V4HImode
142 #define MODE_d_u32 E_V2SImode
143 #define MODE_d_u64 E_V1DImode
144 #define MODE_d_p8 E_V8QImode
145 #define MODE_d_p16 E_V4HImode
146 #define MODE_d_p64 E_V1DImode
147 #define MODE_q_bf16 E_V8BFmode
148 #define MODE_q_f16 E_V8HFmode
149 #define MODE_q_f32 E_V4SFmode
150 #define MODE_q_f64 E_V2DFmode
151 #define MODE_q_s8 E_V16QImode
152 #define MODE_q_s16 E_V8HImode
153 #define MODE_q_s32 E_V4SImode
154 #define MODE_q_s64 E_V2DImode
155 #define MODE_q_u8 E_V16QImode
156 #define MODE_q_u16 E_V8HImode
157 #define MODE_q_u32 E_V4SImode
158 #define MODE_q_u64 E_V2DImode
159 #define MODE_q_p8 E_V16QImode
160 #define MODE_q_p16 E_V8HImode
161 #define MODE_q_p64 E_V2DImode
162 #define MODE_q_p128 E_TImode
164 #define QUAL_bf16 qualifier_none
165 #define QUAL_f16 qualifier_none
166 #define QUAL_f32 qualifier_none
167 #define QUAL_f64 qualifier_none
168 #define QUAL_s8 qualifier_none
169 #define QUAL_s16 qualifier_none
170 #define QUAL_s32 qualifier_none
171 #define QUAL_s64 qualifier_none
172 #define QUAL_u8 qualifier_unsigned
173 #define QUAL_u16 qualifier_unsigned
174 #define QUAL_u32 qualifier_unsigned
175 #define QUAL_u64 qualifier_unsigned
176 #define QUAL_p8 qualifier_poly
177 #define QUAL_p16 qualifier_poly
178 #define QUAL_p64 qualifier_poly
179 #define QUAL_p128 qualifier_poly
181 #define LENGTH_d ""
182 #define LENGTH_q "q"
184 #define SIMD_INTR_MODE(suffix, length) MODE_##length##_##suffix
185 #define SIMD_INTR_QUAL(suffix) QUAL_##suffix
186 #define SIMD_INTR_LENGTH_CHAR(length) LENGTH_##length
188 #define SIMD_MAX_BUILTIN_ARGS 5
190 /* Flags that describe what a function might do. */
191 const unsigned int FLAG_NONE = 0U;
192 const unsigned int FLAG_READ_FPCR = 1U << 0;
193 const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
194 const unsigned int FLAG_READ_MEMORY = 1U << 2;
195 const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
196 const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
198 /* Not all FP intrinsics raise FP exceptions or read FPCR register,
199 use this flag to suppress it. */
200 const unsigned int FLAG_AUTO_FP = 1U << 5;
202 const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
203 const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
204 | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
205 const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
206 const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
208 typedef struct
210 const char *name;
211 machine_mode mode;
212 const enum insn_code code;
213 unsigned int fcode;
214 enum aarch64_type_qualifiers *qualifiers;
215 unsigned int flags;
216 } aarch64_simd_builtin_datum;
218 static enum aarch64_type_qualifiers
219 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
220 = { qualifier_none, qualifier_none };
221 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
222 static enum aarch64_type_qualifiers
223 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
224 = { qualifier_unsigned, qualifier_unsigned };
225 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
226 static enum aarch64_type_qualifiers
227 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
228 = { qualifier_unsigned, qualifier_none };
229 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
230 static enum aarch64_type_qualifiers
231 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
232 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
233 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
234 static enum aarch64_type_qualifiers
235 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
236 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
237 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
238 static enum aarch64_type_qualifiers
239 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
240 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
241 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
242 static enum aarch64_type_qualifiers
243 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
244 = { qualifier_none, qualifier_none, qualifier_unsigned };
245 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
246 static enum aarch64_type_qualifiers
247 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
248 = { qualifier_unsigned, qualifier_none, qualifier_none };
249 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
250 static enum aarch64_type_qualifiers
251 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
252 = { qualifier_poly, qualifier_poly, qualifier_poly };
253 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
254 static enum aarch64_type_qualifiers
255 aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
256 = { qualifier_poly, qualifier_poly, qualifier_unsigned };
257 #define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
259 static enum aarch64_type_qualifiers
260 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
261 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
262 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
263 static enum aarch64_type_qualifiers
264 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
265 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
266 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
267 static enum aarch64_type_qualifiers
268 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
269 = { qualifier_unsigned, qualifier_unsigned,
270 qualifier_unsigned, qualifier_unsigned };
271 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
272 static enum aarch64_type_qualifiers
273 aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
274 = { qualifier_unsigned, qualifier_unsigned,
275 qualifier_unsigned, qualifier_lane_index };
276 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
277 static enum aarch64_type_qualifiers
278 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
279 = { qualifier_unsigned, qualifier_unsigned,
280 qualifier_unsigned, qualifier_immediate };
281 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
282 static enum aarch64_type_qualifiers
283 aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
284 = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
285 #define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
286 static enum aarch64_type_qualifiers
287 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
288 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
289 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
290 static enum aarch64_type_qualifiers
291 aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
292 = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
293 #define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
294 static enum aarch64_type_qualifiers
295 aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
296 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
297 #define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
299 static enum aarch64_type_qualifiers
300 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
301 = { qualifier_none, qualifier_none, qualifier_none,
302 qualifier_none, qualifier_lane_pair_index };
303 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
304 static enum aarch64_type_qualifiers
305 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
306 = { qualifier_none, qualifier_none, qualifier_none,
307 qualifier_none, qualifier_lane_index };
308 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
309 static enum aarch64_type_qualifiers
310 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
311 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
312 qualifier_unsigned, qualifier_lane_index };
313 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
315 static enum aarch64_type_qualifiers
316 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
317 = { qualifier_none, qualifier_none, qualifier_unsigned,
318 qualifier_none, qualifier_lane_quadtup_index };
319 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
320 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
321 static enum aarch64_type_qualifiers
322 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
323 = { qualifier_none, qualifier_none, qualifier_none,
324 qualifier_unsigned, qualifier_lane_quadtup_index };
325 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
326 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
328 static enum aarch64_type_qualifiers
329 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
330 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
331 qualifier_unsigned, qualifier_immediate };
332 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
334 static enum aarch64_type_qualifiers
335 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
336 = { qualifier_none, qualifier_none, qualifier_immediate };
337 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
338 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
339 static enum aarch64_type_qualifiers
340 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
341 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
342 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
343 static enum aarch64_type_qualifiers
344 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
345 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
346 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
347 static enum aarch64_type_qualifiers
348 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
349 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
350 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
351 #define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
352 static enum aarch64_type_qualifiers
353 aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
354 = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
355 #define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
357 static enum aarch64_type_qualifiers
358 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
359 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
360 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
361 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
362 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
363 #define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
365 static enum aarch64_type_qualifiers
366 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
367 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
368 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
370 static enum aarch64_type_qualifiers
371 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
372 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
373 qualifier_immediate };
374 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
376 static enum aarch64_type_qualifiers
377 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
378 = { qualifier_none, qualifier_const_pointer_map_mode };
379 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
380 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
381 static enum aarch64_type_qualifiers
382 aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
383 = { qualifier_unsigned, qualifier_const_pointer_map_mode };
384 #define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
385 #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
386 static enum aarch64_type_qualifiers
387 aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
388 = { qualifier_poly, qualifier_const_pointer_map_mode };
389 #define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
390 #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
392 static enum aarch64_type_qualifiers
393 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
394 = { qualifier_none, qualifier_const_pointer_map_mode,
395 qualifier_none, qualifier_struct_load_store_lane_index };
396 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
397 static enum aarch64_type_qualifiers
398 aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
399 = { qualifier_unsigned, qualifier_const_pointer_map_mode,
400 qualifier_unsigned, qualifier_struct_load_store_lane_index };
401 #define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
402 static enum aarch64_type_qualifiers
403 aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
404 = { qualifier_poly, qualifier_const_pointer_map_mode,
405 qualifier_poly, qualifier_struct_load_store_lane_index };
406 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
408 static enum aarch64_type_qualifiers
409 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
410 = { qualifier_poly, qualifier_unsigned,
411 qualifier_poly, qualifier_poly };
412 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
413 static enum aarch64_type_qualifiers
414 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
415 = { qualifier_none, qualifier_unsigned,
416 qualifier_none, qualifier_none };
417 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
418 static enum aarch64_type_qualifiers
419 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
420 = { qualifier_unsigned, qualifier_unsigned,
421 qualifier_unsigned, qualifier_unsigned };
422 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
424 /* The first argument (return type) of a store should be void type,
425 which we represent with qualifier_void. Their first operand will be
426 a DImode pointer to the location to store to, so we must use
427 qualifier_map_mode | qualifier_pointer to build a pointer to the
428 element type of the vector. */
429 static enum aarch64_type_qualifiers
430 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
431 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
432 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
433 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
434 static enum aarch64_type_qualifiers
435 aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
436 = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
437 #define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
438 #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
439 static enum aarch64_type_qualifiers
440 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
441 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
442 #define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
443 #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
445 static enum aarch64_type_qualifiers
446 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
447 = { qualifier_void, qualifier_pointer_map_mode,
448 qualifier_none, qualifier_struct_load_store_lane_index };
449 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
450 static enum aarch64_type_qualifiers
451 aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
452 = { qualifier_void, qualifier_pointer_map_mode,
453 qualifier_unsigned, qualifier_struct_load_store_lane_index };
454 #define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
455 static enum aarch64_type_qualifiers
456 aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
457 = { qualifier_void, qualifier_pointer_map_mode,
458 qualifier_poly, qualifier_struct_load_store_lane_index };
459 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
461 #define CF0(N, X) CODE_FOR_aarch64_##N##X
462 #define CF1(N, X) CODE_FOR_##N##X##1
463 #define CF2(N, X) CODE_FOR_##N##X##2
464 #define CF3(N, X) CODE_FOR_##N##X##3
465 #define CF4(N, X) CODE_FOR_##N##X##4
466 #define CF10(N, X) CODE_FOR_##N##X
468 /* Define cascading VAR<N> macros that are used from
469 aarch64-builtin-iterators.h to iterate over modes. These definitions
470 will end up generating a number of VAR1 expansions and code later on in the
471 file should redefine VAR1 to whatever it needs to process on a per-mode
472 basis. */
473 #define VAR2(T, N, MAP, FLAG, A, B) \
474 VAR1 (T, N, MAP, FLAG, A) \
475 VAR1 (T, N, MAP, FLAG, B)
476 #define VAR3(T, N, MAP, FLAG, A, B, C) \
477 VAR2 (T, N, MAP, FLAG, A, B) \
478 VAR1 (T, N, MAP, FLAG, C)
479 #define VAR4(T, N, MAP, FLAG, A, B, C, D) \
480 VAR3 (T, N, MAP, FLAG, A, B, C) \
481 VAR1 (T, N, MAP, FLAG, D)
482 #define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
483 VAR4 (T, N, MAP, FLAG, A, B, C, D) \
484 VAR1 (T, N, MAP, FLAG, E)
485 #define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
486 VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
487 VAR1 (T, N, MAP, FLAG, F)
488 #define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
489 VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
490 VAR1 (T, N, MAP, FLAG, G)
491 #define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
492 VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
493 VAR1 (T, N, MAP, FLAG, H)
494 #define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
495 VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
496 VAR1 (T, N, MAP, FLAG, I)
497 #define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
498 VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
499 VAR1 (T, N, MAP, FLAG, J)
500 #define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
501 VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
502 VAR1 (T, N, MAP, FLAG, K)
503 #define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
504 VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
505 VAR1 (T, N, MAP, FLAG, L)
506 #define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
507 VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
508 VAR1 (T, N, MAP, FLAG, M)
509 #define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
510 VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
511 VAR1 (T, X, MAP, FLAG, N)
512 #define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
513 VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
514 VAR1 (T, X, MAP, FLAG, O)
515 #define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
516 VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
517 VAR1 (T, X, MAP, FLAG, P)
519 #include "aarch64-builtin-iterators.h"
521 /* The builtins below should be expanded through the standard optabs
522 CODE_FOR_[u]avg<mode>3_[floor,ceil]. However the mapping scheme in
523 aarch64-simd-builtins.def does not easily allow us to have a pre-mode
524 ("uavg") and post-mode string ("_ceil") in the CODE_FOR_* construction.
525 So the builtins use a name that is natural for AArch64 instructions
526 e.g. "aarch64_srhadd<mode>" and we re-map these to the optab-related
527 CODE_FOR_ here. */
528 #undef VAR1
529 #define VAR1(F,T1,T2,I,M) \
530 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T1##M##3##T2;
532 BUILTIN_VDQ_BHSI (srhadd, avg, _ceil, 0)
533 BUILTIN_VDQ_BHSI (urhadd, uavg, _ceil, 0)
534 BUILTIN_VDQ_BHSI (shadd, avg, _floor, 0)
535 BUILTIN_VDQ_BHSI (uhadd, uavg, _floor, 0)
537 /* The builtins below should be expanded through the standard optabs
538 CODE_FOR_extend<mode><Vwide>2. */
539 #undef VAR1
540 #define VAR1(F,T,N,M) \
541 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T##N##M##2;
543 VAR1 (float_extend_lo_, extend, v2sf, v2df)
544 VAR1 (float_extend_lo_, extend, v4hf, v4sf)
546 /* __builtin_aarch64_float_truncate_lo_<mode> should be expanded through the
547 standard optabs CODE_FOR_trunc<Vwide><mode>2. */
548 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v4hf
549 = CODE_FOR_truncv4sfv4hf2;
550 constexpr insn_code CODE_FOR_aarch64_float_truncate_lo_v2sf
551 = CODE_FOR_truncv2dfv2sf2;
553 #undef VAR1
554 #define VAR1(T, N, MAP, FLAG, A) \
555 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
557 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
558 #include "aarch64-simd-builtins.def"
561 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
562 #define AARCH64_CRC32_BUILTINS \
563 CRC32_BUILTIN (crc32b, QI) \
564 CRC32_BUILTIN (crc32h, HI) \
565 CRC32_BUILTIN (crc32w, SI) \
566 CRC32_BUILTIN (crc32x, DI) \
567 CRC32_BUILTIN (crc32cb, QI) \
568 CRC32_BUILTIN (crc32ch, HI) \
569 CRC32_BUILTIN (crc32cw, SI) \
570 CRC32_BUILTIN (crc32cx, DI)
572 /* The next 8 FCMLA instrinsics require some special handling compared the
573 normal simd intrinsics. */
574 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
575 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
576 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
577 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
578 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
579 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
580 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
581 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
582 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
585 /* vreinterpret intrinsics are defined for any pair of element types.
586 { _bf16 } { _bf16 }
587 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
588 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
589 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
590 { _p8 _p16 _p64 } { _p8 _p16 _p64 }. */
591 #define VREINTERPRET_BUILTIN2(A, B) \
592 VREINTERPRET_BUILTIN (A, B, d)
594 #define VREINTERPRET_BUILTINS1(A) \
595 VREINTERPRET_BUILTIN2 (A, bf16) \
596 VREINTERPRET_BUILTIN2 (A, f16) \
597 VREINTERPRET_BUILTIN2 (A, f32) \
598 VREINTERPRET_BUILTIN2 (A, f64) \
599 VREINTERPRET_BUILTIN2 (A, s8) \
600 VREINTERPRET_BUILTIN2 (A, s16) \
601 VREINTERPRET_BUILTIN2 (A, s32) \
602 VREINTERPRET_BUILTIN2 (A, s64) \
603 VREINTERPRET_BUILTIN2 (A, u8) \
604 VREINTERPRET_BUILTIN2 (A, u16) \
605 VREINTERPRET_BUILTIN2 (A, u32) \
606 VREINTERPRET_BUILTIN2 (A, u64) \
607 VREINTERPRET_BUILTIN2 (A, p8) \
608 VREINTERPRET_BUILTIN2 (A, p16) \
609 VREINTERPRET_BUILTIN2 (A, p64)
611 #define VREINTERPRET_BUILTINS \
612 VREINTERPRET_BUILTINS1 (bf16) \
613 VREINTERPRET_BUILTINS1 (f16) \
614 VREINTERPRET_BUILTINS1 (f32) \
615 VREINTERPRET_BUILTINS1 (f64) \
616 VREINTERPRET_BUILTINS1 (s8) \
617 VREINTERPRET_BUILTINS1 (s16) \
618 VREINTERPRET_BUILTINS1 (s32) \
619 VREINTERPRET_BUILTINS1 (s64) \
620 VREINTERPRET_BUILTINS1 (u8) \
621 VREINTERPRET_BUILTINS1 (u16) \
622 VREINTERPRET_BUILTINS1 (u32) \
623 VREINTERPRET_BUILTINS1 (u64) \
624 VREINTERPRET_BUILTINS1 (p8) \
625 VREINTERPRET_BUILTINS1 (p16) \
626 VREINTERPRET_BUILTINS1 (p64)
628 /* vreinterpretq intrinsics are additionally defined for p128.
629 { _bf16 } { _bf16 }
630 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
631 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
632 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
633 { _p8 _p16 _p64 _p128 } { _p8 _p16 _p64 _p128 }. */
634 #define VREINTERPRETQ_BUILTIN2(A, B) \
635 VREINTERPRET_BUILTIN (A, B, q)
637 #define VREINTERPRETQ_BUILTINS1(A) \
638 VREINTERPRETQ_BUILTIN2 (A, bf16) \
639 VREINTERPRETQ_BUILTIN2 (A, f16) \
640 VREINTERPRETQ_BUILTIN2 (A, f32) \
641 VREINTERPRETQ_BUILTIN2 (A, f64) \
642 VREINTERPRETQ_BUILTIN2 (A, s8) \
643 VREINTERPRETQ_BUILTIN2 (A, s16) \
644 VREINTERPRETQ_BUILTIN2 (A, s32) \
645 VREINTERPRETQ_BUILTIN2 (A, s64) \
646 VREINTERPRETQ_BUILTIN2 (A, u8) \
647 VREINTERPRETQ_BUILTIN2 (A, u16) \
648 VREINTERPRETQ_BUILTIN2 (A, u32) \
649 VREINTERPRETQ_BUILTIN2 (A, u64) \
650 VREINTERPRETQ_BUILTIN2 (A, p8) \
651 VREINTERPRETQ_BUILTIN2 (A, p16) \
652 VREINTERPRETQ_BUILTIN2 (A, p64) \
653 VREINTERPRETQ_BUILTIN2 (A, p128)
655 #define VREINTERPRETQ_BUILTINS \
656 VREINTERPRETQ_BUILTINS1 (bf16) \
657 VREINTERPRETQ_BUILTINS1 (f16) \
658 VREINTERPRETQ_BUILTINS1 (f32) \
659 VREINTERPRETQ_BUILTINS1 (f64) \
660 VREINTERPRETQ_BUILTINS1 (s8) \
661 VREINTERPRETQ_BUILTINS1 (s16) \
662 VREINTERPRETQ_BUILTINS1 (s32) \
663 VREINTERPRETQ_BUILTINS1 (s64) \
664 VREINTERPRETQ_BUILTINS1 (u8) \
665 VREINTERPRETQ_BUILTINS1 (u16) \
666 VREINTERPRETQ_BUILTINS1 (u32) \
667 VREINTERPRETQ_BUILTINS1 (u64) \
668 VREINTERPRETQ_BUILTINS1 (p8) \
669 VREINTERPRETQ_BUILTINS1 (p16) \
670 VREINTERPRETQ_BUILTINS1 (p64) \
671 VREINTERPRETQ_BUILTINS1 (p128)
673 #define AARCH64_SIMD_VREINTERPRET_BUILTINS \
674 VREINTERPRET_BUILTINS \
675 VREINTERPRETQ_BUILTINS
677 #define AARCH64_SIMD_VGET_LOW_BUILTINS \
678 VGET_LOW_BUILTIN(f16) \
679 VGET_LOW_BUILTIN(f32) \
680 VGET_LOW_BUILTIN(f64) \
681 VGET_LOW_BUILTIN(p8) \
682 VGET_LOW_BUILTIN(p16) \
683 VGET_LOW_BUILTIN(p64) \
684 VGET_LOW_BUILTIN(s8) \
685 VGET_LOW_BUILTIN(s16) \
686 VGET_LOW_BUILTIN(s32) \
687 VGET_LOW_BUILTIN(s64) \
688 VGET_LOW_BUILTIN(u8) \
689 VGET_LOW_BUILTIN(u16) \
690 VGET_LOW_BUILTIN(u32) \
691 VGET_LOW_BUILTIN(u64) \
692 VGET_LOW_BUILTIN(bf16)
694 #define AARCH64_SIMD_VGET_HIGH_BUILTINS \
695 VGET_HIGH_BUILTIN(f16) \
696 VGET_HIGH_BUILTIN(f32) \
697 VGET_HIGH_BUILTIN(f64) \
698 VGET_HIGH_BUILTIN(p8) \
699 VGET_HIGH_BUILTIN(p16) \
700 VGET_HIGH_BUILTIN(p64) \
701 VGET_HIGH_BUILTIN(s8) \
702 VGET_HIGH_BUILTIN(s16) \
703 VGET_HIGH_BUILTIN(s32) \
704 VGET_HIGH_BUILTIN(s64) \
705 VGET_HIGH_BUILTIN(u8) \
706 VGET_HIGH_BUILTIN(u16) \
707 VGET_HIGH_BUILTIN(u32) \
708 VGET_HIGH_BUILTIN(u64) \
709 VGET_HIGH_BUILTIN(bf16)
711 typedef struct
713 const char *name;
714 machine_mode mode;
715 const enum insn_code icode;
716 unsigned int fcode;
717 } aarch64_crc_builtin_datum;
719 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
720 typedef struct
722 const char *name;
723 machine_mode mode;
724 const enum insn_code icode;
725 unsigned int fcode;
726 bool lane;
727 } aarch64_fcmla_laneq_builtin_datum;
729 /* Hold information about how to declare SIMD intrinsics. */
730 typedef struct
732 const char *name;
733 unsigned int fcode;
734 unsigned int op_count;
735 machine_mode op_modes[SIMD_MAX_BUILTIN_ARGS];
736 enum aarch64_type_qualifiers qualifiers[SIMD_MAX_BUILTIN_ARGS];
737 unsigned int flags;
738 bool skip;
739 } aarch64_simd_intrinsic_datum;
741 #define CRC32_BUILTIN(N, M) \
742 AARCH64_BUILTIN_##N,
744 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
745 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
747 #define VREINTERPRET_BUILTIN(A, B, L) \
748 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
750 #define VGET_LOW_BUILTIN(A) \
751 AARCH64_SIMD_BUILTIN_VGET_LOW_##A,
753 #define VGET_HIGH_BUILTIN(A) \
754 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A,
756 #undef VAR1
757 #define VAR1(T, N, MAP, FLAG, A) \
758 AARCH64_SIMD_BUILTIN_##T##_##N##A,
760 enum aarch64_builtins
762 AARCH64_BUILTIN_MIN,
764 AARCH64_BUILTIN_GET_FPCR,
765 AARCH64_BUILTIN_SET_FPCR,
766 AARCH64_BUILTIN_GET_FPSR,
767 AARCH64_BUILTIN_SET_FPSR,
769 AARCH64_BUILTIN_GET_FPCR64,
770 AARCH64_BUILTIN_SET_FPCR64,
771 AARCH64_BUILTIN_GET_FPSR64,
772 AARCH64_BUILTIN_SET_FPSR64,
774 AARCH64_BUILTIN_RSQRT_DF,
775 AARCH64_BUILTIN_RSQRT_SF,
776 AARCH64_BUILTIN_RSQRT_V2DF,
777 AARCH64_BUILTIN_RSQRT_V2SF,
778 AARCH64_BUILTIN_RSQRT_V4SF,
779 AARCH64_SIMD_BUILTIN_BASE,
780 AARCH64_SIMD_BUILTIN_LANE_CHECK,
781 #include "aarch64-simd-builtins.def"
782 /* The first enum element which is based on an insn_data pattern. */
783 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
784 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
785 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
786 AARCH64_CRC32_BUILTIN_BASE,
787 AARCH64_CRC32_BUILTINS
788 AARCH64_CRC32_BUILTIN_MAX,
789 /* SIMD intrinsic builtins. */
790 AARCH64_SIMD_VREINTERPRET_BUILTINS
791 AARCH64_SIMD_VGET_LOW_BUILTINS
792 AARCH64_SIMD_VGET_HIGH_BUILTINS
793 /* ARMv8.3-A Pointer Authentication Builtins. */
794 AARCH64_PAUTH_BUILTIN_AUTIA1716,
795 AARCH64_PAUTH_BUILTIN_PACIA1716,
796 AARCH64_PAUTH_BUILTIN_AUTIB1716,
797 AARCH64_PAUTH_BUILTIN_PACIB1716,
798 AARCH64_PAUTH_BUILTIN_XPACLRI,
799 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
800 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
801 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
802 /* Builtin for Arm8.3-a Javascript conversion instruction. */
803 AARCH64_JSCVT,
804 /* TME builtins. */
805 AARCH64_TME_BUILTIN_TSTART,
806 AARCH64_TME_BUILTIN_TCOMMIT,
807 AARCH64_TME_BUILTIN_TTEST,
808 AARCH64_TME_BUILTIN_TCANCEL,
809 /* Armv8.5-a RNG instruction builtins. */
810 AARCH64_BUILTIN_RNG_RNDR,
811 AARCH64_BUILTIN_RNG_RNDRRS,
812 /* MEMTAG builtins. */
813 AARCH64_MEMTAG_BUILTIN_START,
814 AARCH64_MEMTAG_BUILTIN_IRG,
815 AARCH64_MEMTAG_BUILTIN_GMI,
816 AARCH64_MEMTAG_BUILTIN_SUBP,
817 AARCH64_MEMTAG_BUILTIN_INC_TAG,
818 AARCH64_MEMTAG_BUILTIN_SET_TAG,
819 AARCH64_MEMTAG_BUILTIN_GET_TAG,
820 AARCH64_MEMTAG_BUILTIN_END,
821 /* LS64 builtins. */
822 AARCH64_LS64_BUILTIN_LD64B,
823 AARCH64_LS64_BUILTIN_ST64B,
824 AARCH64_LS64_BUILTIN_ST64BV,
825 AARCH64_LS64_BUILTIN_ST64BV0,
826 AARCH64_REV16,
827 AARCH64_REV16L,
828 AARCH64_REV16LL,
829 AARCH64_RBIT,
830 AARCH64_RBITL,
831 AARCH64_RBITLL,
832 /* System register builtins. */
833 AARCH64_RSR,
834 AARCH64_RSRP,
835 AARCH64_RSR64,
836 AARCH64_RSRF,
837 AARCH64_RSRF64,
838 AARCH64_RSR128,
839 AARCH64_WSR,
840 AARCH64_WSRP,
841 AARCH64_WSR64,
842 AARCH64_WSRF,
843 AARCH64_WSRF64,
844 AARCH64_WSR128,
845 AARCH64_PLD,
846 AARCH64_PLDX,
847 AARCH64_PLI,
848 AARCH64_PLIX,
849 AARCH64_BUILTIN_MAX
852 #undef CRC32_BUILTIN
853 #define CRC32_BUILTIN(N, M) \
854 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
856 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
857 AARCH64_CRC32_BUILTINS
861 #undef FCMLA_LANEQ_BUILTIN
862 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
863 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
864 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
866 /* This structure contains how to manage the mapping form the builtin to the
867 instruction to generate in the backend and how to invoke the instruction. */
868 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
869 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
872 #undef VREINTERPRET_BUILTIN
873 #define VREINTERPRET_BUILTIN(A, B, L) \
874 {"vreinterpret" SIMD_INTR_LENGTH_CHAR(L) "_" #A "_" #B, \
875 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B, \
876 2, \
877 { SIMD_INTR_MODE(A, L), SIMD_INTR_MODE(B, L) }, \
878 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(B) }, \
879 FLAG_AUTO_FP, \
880 SIMD_INTR_MODE(A, L) == SIMD_INTR_MODE(B, L) \
881 && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
884 #undef VGET_LOW_BUILTIN
885 #define VGET_LOW_BUILTIN(A) \
886 {"vget_low_" #A, \
887 AARCH64_SIMD_BUILTIN_VGET_LOW_##A, \
888 2, \
889 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
890 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
891 FLAG_AUTO_FP, \
892 false \
895 #undef VGET_HIGH_BUILTIN
896 #define VGET_HIGH_BUILTIN(A) \
897 {"vget_high_" #A, \
898 AARCH64_SIMD_BUILTIN_VGET_HIGH_##A, \
899 2, \
900 { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
901 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
902 FLAG_AUTO_FP, \
903 false \
906 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
907 AARCH64_SIMD_VREINTERPRET_BUILTINS
908 AARCH64_SIMD_VGET_LOW_BUILTINS
909 AARCH64_SIMD_VGET_HIGH_BUILTINS
913 #undef CRC32_BUILTIN
915 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
917 #define NUM_DREG_TYPES 6
918 #define NUM_QREG_TYPES 6
920 /* Internal scalar builtin types. These types are used to support
921 neon intrinsic builtins. They are _not_ user-visible types. Therefore
922 the mangling for these types are implementation defined. */
923 const char *aarch64_scalar_builtin_types[] = {
924 "__builtin_aarch64_simd_qi",
925 "__builtin_aarch64_simd_hi",
926 "__builtin_aarch64_simd_si",
927 "__builtin_aarch64_simd_hf",
928 "__builtin_aarch64_simd_sf",
929 "__builtin_aarch64_simd_di",
930 "__builtin_aarch64_simd_df",
931 "__builtin_aarch64_simd_poly8",
932 "__builtin_aarch64_simd_poly16",
933 "__builtin_aarch64_simd_poly64",
934 "__builtin_aarch64_simd_poly128",
935 "__builtin_aarch64_simd_ti",
936 "__builtin_aarch64_simd_uqi",
937 "__builtin_aarch64_simd_uhi",
938 "__builtin_aarch64_simd_usi",
939 "__builtin_aarch64_simd_udi",
940 "__builtin_aarch64_simd_ei",
941 "__builtin_aarch64_simd_oi",
942 "__builtin_aarch64_simd_ci",
943 "__builtin_aarch64_simd_xi",
944 "__builtin_aarch64_simd_bf",
945 NULL
948 extern GTY(()) aarch64_simd_type_info aarch64_simd_types[];
950 #define ENTRY(E, M, Q, G) \
951 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
952 struct aarch64_simd_type_info aarch64_simd_types [] = {
953 #include "aarch64-simd-builtin-types.def"
955 #undef ENTRY
957 static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
958 static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
960 static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
961 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
962 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
964 /* The user-visible __fp16 type, and a pointer to that type. Used
965 across the back-end. */
966 tree aarch64_fp16_type_node = NULL_TREE;
967 tree aarch64_fp16_ptr_type_node = NULL_TREE;
969 /* Back-end node type for brain float (bfloat) types. */
970 tree aarch64_bf16_ptr_type_node = NULL_TREE;
972 /* Wrapper around add_builtin_function. NAME is the name of the built-in
973 function, TYPE is the function type, CODE is the function subcode
974 (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
975 attributes. */
976 static tree
977 aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
978 tree attrs = NULL_TREE)
980 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
981 return add_builtin_function (name, type, code, BUILT_IN_MD,
982 NULL, attrs);
985 static tree
986 aarch64_general_simulate_builtin (const char *name, tree fntype,
987 unsigned int code,
988 tree attrs = NULL_TREE)
990 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
991 return simulate_builtin_function_decl (input_location, name, fntype,
992 code, NULL, attrs);
995 static const char *
996 aarch64_mangle_builtin_scalar_type (const_tree type)
998 int i = 0;
1000 while (aarch64_scalar_builtin_types[i] != NULL)
1002 const char *name = aarch64_scalar_builtin_types[i];
1004 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
1005 && DECL_NAME (TYPE_NAME (type))
1006 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
1007 return aarch64_scalar_builtin_types[i];
1008 i++;
1010 return NULL;
1013 static const char *
1014 aarch64_mangle_builtin_vector_type (const_tree type)
1016 tree attrs = TYPE_ATTRIBUTES (type);
1017 if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
1019 tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
1020 return IDENTIFIER_POINTER (mangled_name);
1023 return NULL;
1026 const char *
1027 aarch64_general_mangle_builtin_type (const_tree type)
1029 const char *mangle;
1030 /* Walk through all the AArch64 builtins types tables to filter out the
1031 incoming type. */
1032 if ((mangle = aarch64_mangle_builtin_vector_type (type))
1033 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
1034 return mangle;
1036 return NULL;
1039 /* Helper function for aarch64_simd_builtin_type. */
1040 static tree
1041 aarch64_int_or_fp_type (machine_mode mode,
1042 enum aarch64_type_qualifiers qualifiers)
1044 #define QUAL_TYPE(M) ((qualifiers & qualifier_unsigned) \
1045 ? unsigned_int##M##_type_node : int##M##_type_node);
1046 switch (mode)
1048 case E_QImode:
1049 return QUAL_TYPE (QI);
1050 case E_HImode:
1051 return QUAL_TYPE (HI);
1052 case E_SImode:
1053 return QUAL_TYPE (SI);
1054 case E_DImode:
1055 return QUAL_TYPE (DI);
1056 case E_TImode:
1057 return QUAL_TYPE (TI);
1058 case E_OImode:
1059 return aarch64_simd_intOI_type_node;
1060 case E_CImode:
1061 return aarch64_simd_intCI_type_node;
1062 case E_XImode:
1063 return aarch64_simd_intXI_type_node;
1064 case E_HFmode:
1065 return aarch64_fp16_type_node;
1066 case E_SFmode:
1067 return float_type_node;
1068 case E_DFmode:
1069 return double_type_node;
1070 case E_BFmode:
1071 return bfloat16_type_node;
1072 default:
1073 gcc_unreachable ();
1075 #undef QUAL_TYPE
1078 /* Helper function for aarch64_simd_builtin_type. */
1079 static tree
1080 aarch64_lookup_simd_type_in_table (machine_mode mode,
1081 enum aarch64_type_qualifiers qualifiers)
1083 int i;
1084 int nelts = ARRAY_SIZE (aarch64_simd_types);
1085 int q = qualifiers & (qualifier_poly | qualifier_unsigned);
1087 for (i = 0; i < nelts; i++)
1089 if (aarch64_simd_types[i].mode == mode
1090 && aarch64_simd_types[i].q == q)
1091 return aarch64_simd_types[i].itype;
1092 if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
1093 for (int j = 0; j < 3; j++)
1094 if (aarch64_simd_tuple_modes[i][j] == mode
1095 && aarch64_simd_types[i].q == q)
1096 return aarch64_simd_tuple_types[i][j];
1099 return NULL_TREE;
1102 /* Return a type for an operand with specified mode and qualifiers. */
1103 static tree
1104 aarch64_simd_builtin_type (machine_mode mode,
1105 enum aarch64_type_qualifiers qualifiers)
1107 tree type = NULL_TREE;
1109 /* For pointers, we want a pointer to the basic type of the vector. */
1110 if ((qualifiers & qualifier_pointer) && VECTOR_MODE_P (mode))
1111 mode = GET_MODE_INNER (mode);
1113 /* Non-poly scalar modes map to standard types not in the table. */
1114 if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
1115 type = aarch64_lookup_simd_type_in_table (mode, qualifiers);
1116 else
1117 type = aarch64_int_or_fp_type (mode, qualifiers);
1119 gcc_assert (type != NULL_TREE);
1121 /* Add qualifiers. */
1122 if (qualifiers & qualifier_const)
1123 type = build_qualified_type (type, TYPE_QUAL_CONST);
1124 if (qualifiers & qualifier_pointer)
1125 type = build_pointer_type (type);
1127 return type;
1130 static void
1131 aarch64_init_simd_builtin_types (void)
1133 int i;
1134 int nelts = ARRAY_SIZE (aarch64_simd_types);
1135 tree tdecl;
1137 /* Init all the element types built by the front-end. */
1138 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
1139 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
1140 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
1141 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
1142 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
1143 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
1144 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
1145 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
1146 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
1147 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
1148 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
1149 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
1150 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
1151 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
1152 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
1153 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
1155 /* Poly types are a world of their own. */
1156 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
1157 build_distinct_type_copy (unsigned_intQI_type_node);
1158 /* Prevent front-ends from transforming Poly8_t arrays into string
1159 literals. */
1160 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
1162 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
1163 build_distinct_type_copy (unsigned_intHI_type_node);
1164 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
1165 build_distinct_type_copy (unsigned_intDI_type_node);
1166 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
1167 build_distinct_type_copy (unsigned_intTI_type_node);
1168 /* Init poly vector element types with scalar poly types. */
1169 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
1170 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
1171 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
1172 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
1173 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
1174 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
1176 /* Continue with standard types. */
1177 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
1178 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
1179 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
1180 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
1181 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
1182 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
1184 /* Init Bfloat vector types with underlying __bf16 type. */
1185 aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
1186 aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
1188 for (i = 0; i < nelts; i++)
1190 tree eltype = aarch64_simd_types[i].eltype;
1191 machine_mode mode = aarch64_simd_types[i].mode;
1193 if (aarch64_simd_types[i].itype == NULL)
1195 tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
1196 type = build_distinct_type_copy (type);
1197 SET_TYPE_STRUCTURAL_EQUALITY (type);
1199 tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
1200 tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
1201 TYPE_ATTRIBUTES (type)
1202 = tree_cons (get_identifier ("Advanced SIMD type"), value,
1203 TYPE_ATTRIBUTES (type));
1204 aarch64_simd_types[i].itype = type;
1207 tdecl = add_builtin_type (aarch64_simd_types[i].name,
1208 aarch64_simd_types[i].itype);
1209 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
1212 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
1213 make_signed_type (GET_MODE_PRECISION (mode));
1214 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
1215 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
1216 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
1217 #undef AARCH64_BUILD_SIGNED_TYPE
1219 tdecl = add_builtin_type
1220 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
1221 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
1222 tdecl = add_builtin_type
1223 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
1224 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
1225 tdecl = add_builtin_type
1226 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
1227 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
1230 static void
1231 aarch64_init_simd_builtin_scalar_types (void)
1233 /* Define typedefs for all the standard scalar types. */
1234 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
1235 "__builtin_aarch64_simd_qi");
1236 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
1237 "__builtin_aarch64_simd_hi");
1238 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
1239 "__builtin_aarch64_simd_hf");
1240 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
1241 "__builtin_aarch64_simd_si");
1242 (*lang_hooks.types.register_builtin_type) (float_type_node,
1243 "__builtin_aarch64_simd_sf");
1244 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
1245 "__builtin_aarch64_simd_di");
1246 (*lang_hooks.types.register_builtin_type) (double_type_node,
1247 "__builtin_aarch64_simd_df");
1248 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1249 "__builtin_aarch64_simd_poly8");
1250 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1251 "__builtin_aarch64_simd_poly16");
1252 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1253 "__builtin_aarch64_simd_poly64");
1254 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
1255 "__builtin_aarch64_simd_poly128");
1256 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
1257 "__builtin_aarch64_simd_ti");
1258 (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
1259 "__builtin_aarch64_simd_bf");
1260 /* Unsigned integer types for various mode sizes. */
1261 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1262 "__builtin_aarch64_simd_uqi");
1263 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1264 "__builtin_aarch64_simd_uhi");
1265 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
1266 "__builtin_aarch64_simd_usi");
1267 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1268 "__builtin_aarch64_simd_udi");
1271 /* Return a set of FLAG_* flags derived from FLAGS
1272 that describe what a function with result MODE could do,
1273 taking the command-line flags into account. */
1274 static unsigned int
1275 aarch64_call_properties (unsigned int flags, machine_mode mode)
1277 if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
1278 flags |= FLAG_FP;
1280 /* -fno-trapping-math means that we can assume any FP exceptions
1281 are not user-visible. */
1282 if (!flag_trapping_math)
1283 flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
1285 return flags;
1288 /* Return true if calls to a function with flags F and mode MODE
1289 could modify some form of global state. */
1290 static bool
1291 aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
1293 unsigned int flags = aarch64_call_properties (f, mode);
1295 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1296 return true;
1298 if (flags & FLAG_PREFETCH_MEMORY)
1299 return true;
1301 return flags & FLAG_WRITE_MEMORY;
1304 /* Return true if calls to a function with flags F and mode MODE
1305 could read some form of global state. */
1306 static bool
1307 aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
1309 unsigned int flags = aarch64_call_properties (f, mode);
1311 if (flags & FLAG_READ_FPCR)
1312 return true;
1314 return flags & FLAG_READ_MEMORY;
1317 /* Return true if calls to a function with flags F and mode MODE
1318 could raise a signal. */
1319 static bool
1320 aarch64_could_trap_p (unsigned int f, machine_mode mode)
1322 unsigned int flags = aarch64_call_properties (f, mode);
1324 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1325 return true;
1327 if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
1328 return true;
1330 return false;
1333 /* Add attribute NAME to ATTRS. */
1334 static tree
1335 aarch64_add_attribute (const char *name, tree attrs)
1337 return tree_cons (get_identifier (name), NULL_TREE, attrs);
1340 /* Return the appropriate attributes for a function that has
1341 flags F and mode MODE. */
1342 static tree
1343 aarch64_get_attributes (unsigned int f, machine_mode mode)
1345 tree attrs = NULL_TREE;
1347 if (!aarch64_modifies_global_state_p (f, mode))
1349 if (aarch64_reads_global_state_p (f, mode))
1350 attrs = aarch64_add_attribute ("pure", attrs);
1351 else
1352 attrs = aarch64_add_attribute ("const", attrs);
1355 if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
1356 attrs = aarch64_add_attribute ("nothrow", attrs);
1358 return aarch64_add_attribute ("leaf", attrs);
1361 /* Due to the architecture not providing lane variant of the lane instructions
1362 for fcmla we can't use the standard simd builtin expansion code, but we
1363 still want the majority of the validation that would normally be done. */
1365 void
1366 aarch64_init_fcmla_laneq_builtins (void)
1368 unsigned int i = 0;
1370 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
1372 aarch64_fcmla_laneq_builtin_datum* d
1373 = &aarch64_fcmla_lane_builtin_data[i];
1374 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_none);
1375 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1376 tree quadtype = aarch64_simd_builtin_type (quadmode, qualifier_none);
1377 tree lanetype
1378 = aarch64_simd_builtin_type (SImode, qualifier_lane_pair_index);
1379 tree ftype = build_function_type_list (argtype, argtype, argtype,
1380 quadtype, lanetype, NULL_TREE);
1381 tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
1382 tree fndecl
1383 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1385 aarch64_builtin_decls[d->fcode] = fndecl;
1389 void
1390 aarch64_init_simd_intrinsics (void)
1392 unsigned int i = 0;
1394 for (i = 0; i < ARRAY_SIZE (aarch64_simd_intrinsic_data); ++i)
1396 auto d = &aarch64_simd_intrinsic_data[i];
1398 if (d->skip)
1399 continue;
1401 tree return_type = void_type_node;
1402 tree args = void_list_node;
1404 for (int op_num = d->op_count - 1; op_num >= 0; op_num--)
1406 machine_mode op_mode = d->op_modes[op_num];
1407 enum aarch64_type_qualifiers qualifiers = d->qualifiers[op_num];
1409 tree eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1411 if (op_num == 0)
1412 return_type = eltype;
1413 else
1414 args = tree_cons (NULL_TREE, eltype, args);
1417 tree ftype = build_function_type (return_type, args);
1418 tree attrs = aarch64_get_attributes (d->flags, d->op_modes[0]);
1419 unsigned int code
1420 = (d->fcode << AARCH64_BUILTIN_SHIFT | AARCH64_BUILTIN_GENERAL);
1421 tree fndecl = simulate_builtin_function_decl (input_location, d->name,
1422 ftype, code, NULL, attrs);
1423 aarch64_builtin_decls[d->fcode] = fndecl;
1427 void
1428 aarch64_init_simd_builtin_functions (bool called_from_pragma)
1430 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
1432 if (!called_from_pragma)
1434 tree lane_check_fpr = build_function_type_list (void_type_node,
1435 size_type_node,
1436 size_type_node,
1437 intSI_type_node,
1438 NULL);
1439 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
1440 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
1441 lane_check_fpr,
1442 AARCH64_SIMD_BUILTIN_LANE_CHECK);
1445 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
1447 bool print_type_signature_p = false;
1448 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
1449 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
1450 char namebuf[60];
1451 tree ftype = NULL;
1452 tree fndecl = NULL;
1454 d->fcode = fcode;
1456 /* We must track two variables here. op_num is
1457 the operand number as in the RTL pattern. This is
1458 required to access the mode (e.g. V4SF mode) of the
1459 argument, from which the base type can be derived.
1460 arg_num is an index in to the qualifiers data, which
1461 gives qualifiers to the type (e.g. const unsigned).
1462 The reason these two variables may differ by one is the
1463 void return type. While all return types take the 0th entry
1464 in the qualifiers array, there is no operand for them in the
1465 RTL pattern. */
1466 int op_num = insn_data[d->code].n_operands - 1;
1467 int arg_num = d->qualifiers[0] & qualifier_void
1468 ? op_num + 1
1469 : op_num;
1470 tree return_type = void_type_node, args = void_list_node;
1471 tree eltype;
1473 int struct_mode_args = 0;
1474 for (int j = op_num; j >= 0; j--)
1476 machine_mode op_mode = insn_data[d->code].operand[j].mode;
1477 if (aarch64_advsimd_struct_mode_p (op_mode))
1478 struct_mode_args++;
1481 if ((called_from_pragma && struct_mode_args == 0)
1482 || (!called_from_pragma && struct_mode_args > 0))
1483 continue;
1485 /* Build a function type directly from the insn_data for this
1486 builtin. The build_function_type () function takes care of
1487 removing duplicates for us. */
1488 for (; op_num >= 0; arg_num--, op_num--)
1490 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
1491 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
1493 if (qualifiers & qualifier_unsigned)
1495 type_signature[op_num] = 'u';
1496 print_type_signature_p = true;
1498 else if (qualifiers & qualifier_poly)
1500 type_signature[op_num] = 'p';
1501 print_type_signature_p = true;
1503 else
1504 type_signature[op_num] = 's';
1506 /* Some builtins have different user-facing types
1507 for certain arguments, encoded in d->mode. */
1508 if (qualifiers & qualifier_map_mode)
1509 op_mode = d->mode;
1511 eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1513 /* If we have reached arg_num == 0, we are at a non-void
1514 return type. Otherwise, we are still processing
1515 arguments. */
1516 if (arg_num == 0)
1517 return_type = eltype;
1518 else
1519 args = tree_cons (NULL_TREE, eltype, args);
1522 ftype = build_function_type (return_type, args);
1524 gcc_assert (ftype != NULL);
1526 if (print_type_signature_p)
1527 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
1528 d->name, type_signature);
1529 else
1530 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
1531 d->name);
1533 tree attrs = aarch64_get_attributes (d->flags, d->mode);
1535 if (called_from_pragma)
1537 unsigned int raw_code
1538 = (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1539 fndecl = simulate_builtin_function_decl (input_location, namebuf,
1540 ftype, raw_code, NULL,
1541 attrs);
1543 else
1544 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
1546 aarch64_builtin_decls[fcode] = fndecl;
1550 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
1551 indexed by TYPE_INDEX. */
1552 static void
1553 register_tuple_type (unsigned int num_vectors, unsigned int type_index)
1555 aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
1557 /* Synthesize the name of the user-visible vector tuple type. */
1558 const char *vector_type_name = type->name;
1559 char tuple_type_name[sizeof ("bfloat16x4x2_t")];
1560 snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
1561 (int) strlen (vector_type_name) - 4, vector_type_name + 2,
1562 num_vectors);
1563 tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
1565 tree vector_type = type->itype;
1566 tree array_type = build_array_type_nelts (vector_type, num_vectors);
1567 if (type->mode == DImode)
1569 if (num_vectors == 2)
1570 SET_TYPE_MODE (array_type, V2x1DImode);
1571 else if (num_vectors == 3)
1572 SET_TYPE_MODE (array_type, V3x1DImode);
1573 else if (num_vectors == 4)
1574 SET_TYPE_MODE (array_type, V4x1DImode);
1577 unsigned int alignment
1578 = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
1579 machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
1580 gcc_assert (VECTOR_MODE_P (tuple_mode)
1581 && TYPE_MODE (array_type) == tuple_mode
1582 && TYPE_ALIGN (array_type) == alignment);
1584 tree field = build_decl (input_location, FIELD_DECL,
1585 get_identifier ("val"), array_type);
1587 tree t = lang_hooks.types.simulate_record_decl (input_location,
1588 tuple_type_name,
1589 make_array_slice (&field,
1590 1));
1591 gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
1592 && (flag_pack_struct
1593 || maximum_field_alignment
1594 || (TYPE_MODE_RAW (t) == tuple_mode
1595 && TYPE_ALIGN (t) == alignment)));
1597 aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
1598 aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
1601 static bool
1602 aarch64_scalar_builtin_type_p (aarch64_simd_type t)
1604 return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
1607 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
1608 set. */
1609 aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags)
1610 : m_old_asm_isa_flags (aarch64_asm_isa_flags),
1611 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
1613 /* Changing the ISA flags should be enough here. We shouldn't need to
1614 pay the compile-time cost of a full target switch. */
1615 global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
1616 aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
1619 aarch64_simd_switcher::~aarch64_simd_switcher ()
1621 if (m_old_general_regs_only)
1622 global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
1623 aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
1626 /* Implement #pragma GCC aarch64 "arm_neon.h".
1628 The types and functions defined here need to be available internally
1629 during LTO as well. */
1630 void
1631 handle_arm_neon_h (void)
1633 aarch64_simd_switcher simd;
1635 /* Register the AdvSIMD vector tuple types. */
1636 for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
1637 for (unsigned int count = 2; count <= 4; ++count)
1638 if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
1639 register_tuple_type (count, i);
1641 aarch64_init_simd_builtin_functions (true);
1642 aarch64_init_simd_intrinsics ();
1645 static void
1646 aarch64_init_simd_builtins (void)
1648 aarch64_init_simd_builtin_types ();
1650 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
1651 Therefore we need to preserve the old __builtin scalar types. It can be
1652 removed once all the intrinsics become strongly typed using the qualifier
1653 system. */
1654 aarch64_init_simd_builtin_scalar_types ();
1656 aarch64_init_simd_builtin_functions (false);
1657 if (in_lto_p)
1658 handle_arm_neon_h ();
1660 /* Initialize the remaining fcmla_laneq intrinsics. */
1661 aarch64_init_fcmla_laneq_builtins ();
1664 static void
1665 aarch64_init_crc32_builtins ()
1667 tree usi_type = aarch64_simd_builtin_type (SImode, qualifier_unsigned);
1668 unsigned int i = 0;
1670 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
1672 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
1673 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_unsigned);
1674 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
1675 tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
1676 tree fndecl
1677 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1679 aarch64_builtin_decls[d->fcode] = fndecl;
1683 /* Add builtins for reciprocal square root. */
1685 void
1686 aarch64_init_builtin_rsqrt (void)
1688 tree fndecl = NULL;
1689 tree ftype = NULL;
1691 tree V2SF_type_node = build_vector_type (float_type_node, 2);
1692 tree V2DF_type_node = build_vector_type (double_type_node, 2);
1693 tree V4SF_type_node = build_vector_type (float_type_node, 4);
1695 struct builtin_decls_data
1697 tree type_node;
1698 const char *builtin_name;
1699 int function_code;
1702 builtin_decls_data bdda[] =
1704 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1705 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1706 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1707 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1708 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1711 builtin_decls_data *bdd = bdda;
1712 builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
1714 for (; bdd < bdd_end; bdd++)
1716 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1717 tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
1718 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
1719 ftype, bdd->function_code, attrs);
1720 aarch64_builtin_decls[bdd->function_code] = fndecl;
1724 /* Initialize the backend types that support the user-visible __fp16
1725 type, also initialize a pointer to that type, to be used when
1726 forming HFAs. */
1728 static void
1729 aarch64_init_fp16_types (void)
1731 aarch64_fp16_type_node = make_node (REAL_TYPE);
1732 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
1733 layout_type (aarch64_fp16_type_node);
1735 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
1736 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1739 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
1740 static void
1741 aarch64_init_bf16_types (void)
1743 lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
1744 aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
1747 /* Pointer authentication builtins that will become NOP on legacy platform.
1748 Currently, these builtins are for internal use only (libgcc EH unwinder). */
1750 void
1751 aarch64_init_pauth_hint_builtins (void)
1753 /* Pointer Authentication builtins. */
1754 tree ftype_pointer_auth
1755 = build_function_type_list (ptr_type_node, ptr_type_node,
1756 unsigned_intDI_type_node, NULL_TREE);
1757 tree ftype_pointer_strip
1758 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
1760 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
1761 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
1762 ftype_pointer_auth,
1763 AARCH64_PAUTH_BUILTIN_AUTIA1716);
1764 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
1765 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
1766 ftype_pointer_auth,
1767 AARCH64_PAUTH_BUILTIN_PACIA1716);
1768 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
1769 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
1770 ftype_pointer_auth,
1771 AARCH64_PAUTH_BUILTIN_AUTIB1716);
1772 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
1773 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
1774 ftype_pointer_auth,
1775 AARCH64_PAUTH_BUILTIN_PACIB1716);
1776 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
1777 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
1778 ftype_pointer_strip,
1779 AARCH64_PAUTH_BUILTIN_XPACLRI);
1782 /* Initialize the transactional memory extension (TME) builtins. */
1783 static void
1784 aarch64_init_tme_builtins (void)
1786 tree ftype_uint64_void
1787 = build_function_type_list (uint64_type_node, NULL);
1788 tree ftype_void_void
1789 = build_function_type_list (void_type_node, NULL);
1790 tree ftype_void_uint64
1791 = build_function_type_list (void_type_node, uint64_type_node, NULL);
1793 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
1794 = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
1795 ftype_uint64_void,
1796 AARCH64_TME_BUILTIN_TSTART);
1797 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
1798 = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
1799 ftype_uint64_void,
1800 AARCH64_TME_BUILTIN_TTEST);
1801 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
1802 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
1803 ftype_void_void,
1804 AARCH64_TME_BUILTIN_TCOMMIT);
1805 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
1806 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
1807 ftype_void_uint64,
1808 AARCH64_TME_BUILTIN_TCANCEL);
1811 /* Add builtins for Random Number instructions. */
1813 static void
1814 aarch64_init_rng_builtins (void)
1816 tree unsigned_ptr_type
1817 = build_pointer_type (get_typenode_from_name (UINT64_TYPE));
1818 tree ftype
1819 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1820 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1821 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
1822 AARCH64_BUILTIN_RNG_RNDR);
1823 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1824 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
1825 AARCH64_BUILTIN_RNG_RNDRRS);
1828 /* Add builtins for reading system register. */
1829 static void
1830 aarch64_init_rwsr_builtins (void)
1832 tree fntype = NULL;
1833 tree const_char_ptr_type
1834 = build_pointer_type (build_type_variant (char_type_node, true, false));
1836 #define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
1837 aarch64_builtin_decls[AARCH64_##F] \
1838 = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
1840 fntype
1841 = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
1842 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
1844 fntype
1845 = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
1846 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
1848 fntype
1849 = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
1850 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
1852 fntype
1853 = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
1854 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
1856 fntype
1857 = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
1858 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
1860 fntype
1861 = build_function_type_list (uint128_type_node, const_char_ptr_type, NULL);
1862 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR128, rsr128, fntype);
1864 fntype
1865 = build_function_type_list (void_type_node, const_char_ptr_type,
1866 uint32_type_node, NULL);
1868 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
1870 fntype
1871 = build_function_type_list (void_type_node, const_char_ptr_type,
1872 const_ptr_type_node, NULL);
1873 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
1875 fntype
1876 = build_function_type_list (void_type_node, const_char_ptr_type,
1877 uint64_type_node, NULL);
1878 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
1880 fntype
1881 = build_function_type_list (void_type_node, const_char_ptr_type,
1882 float_type_node, NULL);
1883 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
1885 fntype
1886 = build_function_type_list (void_type_node, const_char_ptr_type,
1887 double_type_node, NULL);
1888 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
1890 fntype
1891 = build_function_type_list (void_type_node, const_char_ptr_type,
1892 uint128_type_node, NULL);
1893 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype);
1896 /* Add builtins for data and instrution prefetch. */
1897 static void
1898 aarch64_init_prefetch_builtin (void)
1900 #define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
1901 aarch64_builtin_decls[INDEX] = \
1902 aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
1904 tree ftype;
1905 tree cv_argtype;
1906 cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
1907 | TYPE_QUAL_VOLATILE);
1908 cv_argtype = build_pointer_type (cv_argtype);
1910 ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
1911 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
1912 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
1914 ftype = build_function_type_list (void_type_node, unsigned_type_node,
1915 unsigned_type_node, unsigned_type_node,
1916 cv_argtype, NULL);
1917 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
1919 ftype = build_function_type_list (void_type_node, unsigned_type_node,
1920 unsigned_type_node, cv_argtype, NULL);
1921 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
1924 /* Initialize the memory tagging extension (MTE) builtins. */
1925 static GTY(()) struct GTY(())
1927 tree ftype;
1928 enum insn_code icode;
1929 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
1930 AARCH64_MEMTAG_BUILTIN_START - 1];
1932 static void
1933 aarch64_init_memtag_builtins (void)
1935 tree fntype = NULL;
1937 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
1938 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
1939 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
1940 T, AARCH64_MEMTAG_BUILTIN_##F); \
1941 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
1942 AARCH64_MEMTAG_BUILTIN_START - 1] = \
1943 {T, CODE_FOR_##I};
1945 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1946 uint64_type_node, NULL);
1947 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
1949 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
1950 uint64_type_node, NULL);
1951 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
1953 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
1954 ptr_type_node, NULL);
1955 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
1957 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1958 unsigned_type_node, NULL);
1959 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
1961 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
1962 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
1964 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
1965 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
1967 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
1970 /* Add builtins for Load/store 64 Byte instructions. */
1972 typedef struct
1974 const char *name;
1975 unsigned int code;
1976 tree type;
1977 } ls64_builtins_data;
1979 static GTY(()) tree ls64_arm_data_t = NULL_TREE;
1981 static void
1982 aarch64_init_ls64_builtins_types (void)
1984 /* Synthesize:
1986 typedef struct {
1987 uint64_t val[8];
1988 } __arm_data512_t; */
1989 const char *tuple_type_name = "__arm_data512_t";
1990 tree node_type = get_typenode_from_name (UINT64_TYPE);
1991 tree array_type = build_array_type_nelts (node_type, 8);
1992 SET_TYPE_MODE (array_type, V8DImode);
1994 gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
1995 gcc_assert (TYPE_ALIGN (array_type) == 64);
1997 tree field = build_decl (input_location, FIELD_DECL,
1998 get_identifier ("val"), array_type);
2000 ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
2001 tuple_type_name,
2002 make_array_slice (&field, 1));
2004 gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
2005 gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
2006 gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
2009 static void
2010 aarch64_init_ls64_builtins (void)
2012 aarch64_init_ls64_builtins_types ();
2014 ls64_builtins_data data[4] = {
2015 {"__arm_ld64b", AARCH64_LS64_BUILTIN_LD64B,
2016 build_function_type_list (ls64_arm_data_t,
2017 const_ptr_type_node, NULL_TREE)},
2018 {"__arm_st64b", AARCH64_LS64_BUILTIN_ST64B,
2019 build_function_type_list (void_type_node, ptr_type_node,
2020 ls64_arm_data_t, NULL_TREE)},
2021 {"__arm_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
2022 build_function_type_list (uint64_type_node, ptr_type_node,
2023 ls64_arm_data_t, NULL_TREE)},
2024 {"__arm_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
2025 build_function_type_list (uint64_type_node, ptr_type_node,
2026 ls64_arm_data_t, NULL_TREE)},
2029 for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
2030 aarch64_builtin_decls[data[i].code]
2031 = aarch64_general_simulate_builtin (data[i].name, data[i].type,
2032 data[i].code);
2035 static void
2036 aarch64_init_data_intrinsics (void)
2038 tree uint32_fntype = build_function_type_list (uint32_type_node,
2039 uint32_type_node, NULL_TREE);
2040 tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
2041 long_unsigned_type_node,
2042 NULL_TREE);
2043 tree uint64_fntype = build_function_type_list (uint64_type_node,
2044 uint64_type_node, NULL_TREE);
2045 aarch64_builtin_decls[AARCH64_REV16]
2046 = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
2047 AARCH64_REV16);
2048 aarch64_builtin_decls[AARCH64_REV16L]
2049 = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
2050 AARCH64_REV16L);
2051 aarch64_builtin_decls[AARCH64_REV16LL]
2052 = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
2053 AARCH64_REV16LL);
2054 aarch64_builtin_decls[AARCH64_RBIT]
2055 = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
2056 AARCH64_RBIT);
2057 aarch64_builtin_decls[AARCH64_RBITL]
2058 = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
2059 AARCH64_RBITL);
2060 aarch64_builtin_decls[AARCH64_RBITLL]
2061 = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
2062 AARCH64_RBITLL);
2065 /* Implement #pragma GCC aarch64 "arm_acle.h". */
2066 void
2067 handle_arm_acle_h (void)
2069 if (TARGET_LS64)
2070 aarch64_init_ls64_builtins ();
2073 /* Initialize fpsr fpcr getters and setters. */
2075 static void
2076 aarch64_init_fpsr_fpcr_builtins (void)
2078 tree ftype_set
2079 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
2080 tree ftype_get
2081 = build_function_type_list (unsigned_type_node, NULL);
2083 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
2084 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
2085 ftype_get,
2086 AARCH64_BUILTIN_GET_FPCR);
2087 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
2088 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
2089 ftype_set,
2090 AARCH64_BUILTIN_SET_FPCR);
2091 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
2092 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
2093 ftype_get,
2094 AARCH64_BUILTIN_GET_FPSR);
2095 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
2096 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
2097 ftype_set,
2098 AARCH64_BUILTIN_SET_FPSR);
2100 ftype_set
2101 = build_function_type_list (void_type_node, long_long_unsigned_type_node,
2102 NULL);
2103 ftype_get
2104 = build_function_type_list (long_long_unsigned_type_node, NULL);
2106 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
2107 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
2108 ftype_get,
2109 AARCH64_BUILTIN_GET_FPCR64);
2110 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
2111 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
2112 ftype_set,
2113 AARCH64_BUILTIN_SET_FPCR64);
2114 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
2115 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
2116 ftype_get,
2117 AARCH64_BUILTIN_GET_FPSR64);
2118 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
2119 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
2120 ftype_set,
2121 AARCH64_BUILTIN_SET_FPSR64);
2124 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
2126 void
2127 aarch64_general_init_builtins (void)
2129 aarch64_init_fpsr_fpcr_builtins ();
2131 aarch64_init_fp16_types ();
2133 aarch64_init_bf16_types ();
2136 aarch64_simd_switcher simd;
2137 aarch64_init_simd_builtins ();
2140 aarch64_init_crc32_builtins ();
2141 aarch64_init_builtin_rsqrt ();
2142 aarch64_init_rng_builtins ();
2143 aarch64_init_data_intrinsics ();
2145 aarch64_init_rwsr_builtins ();
2146 aarch64_init_prefetch_builtin ();
2148 tree ftype_jcvt
2149 = build_function_type_list (intSI_type_node, double_type_node, NULL);
2150 aarch64_builtin_decls[AARCH64_JSCVT]
2151 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
2152 AARCH64_JSCVT);
2154 /* Initialize pointer authentication builtins which are backed by instructions
2155 in NOP encoding space.
2157 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
2158 there is no support on return address signing under ILP32, we don't
2159 register them. */
2160 if (!TARGET_ILP32)
2161 aarch64_init_pauth_hint_builtins ();
2163 if (TARGET_TME)
2164 aarch64_init_tme_builtins ();
2166 if (TARGET_MEMTAG)
2167 aarch64_init_memtag_builtins ();
2169 if (in_lto_p)
2170 handle_arm_acle_h ();
2173 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
2174 tree
2175 aarch64_general_builtin_decl (unsigned code, bool)
2177 if (code >= AARCH64_BUILTIN_MAX)
2178 return error_mark_node;
2180 return aarch64_builtin_decls[code];
2183 bool
2184 aarch64_general_check_builtin_call (location_t location, vec<location_t>,
2185 unsigned int code, tree fndecl,
2186 unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
2188 switch (code)
2190 case AARCH64_RSR:
2191 case AARCH64_RSRP:
2192 case AARCH64_RSR64:
2193 case AARCH64_RSRF:
2194 case AARCH64_RSRF64:
2195 case AARCH64_WSR:
2196 case AARCH64_WSRP:
2197 case AARCH64_WSR64:
2198 case AARCH64_WSRF:
2199 case AARCH64_WSRF64:
2200 tree addr = STRIP_NOPS (args[0]);
2201 if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
2202 || TREE_CODE (addr) != ADDR_EXPR
2203 || TREE_CODE (TREE_OPERAND (addr, 0)) != STRING_CST)
2205 error_at (location, "first argument to %qD must be a string literal",
2206 fndecl);
2207 return false;
2210 /* Default behavior. */
2211 return true;
2214 typedef enum
2216 SIMD_ARG_COPY_TO_REG,
2217 SIMD_ARG_CONSTANT,
2218 SIMD_ARG_LANE_INDEX,
2219 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
2220 SIMD_ARG_LANE_PAIR_INDEX,
2221 SIMD_ARG_LANE_QUADTUP_INDEX,
2222 SIMD_ARG_STOP
2223 } builtin_simd_arg;
2226 static rtx
2227 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
2228 tree exp, builtin_simd_arg *args,
2229 machine_mode builtin_mode)
2231 rtx pat;
2232 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
2233 int opc = 0;
2235 if (have_retval)
2237 machine_mode tmode = insn_data[icode].operand[0].mode;
2238 if (!target
2239 || GET_MODE (target) != tmode
2240 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
2241 target = gen_reg_rtx (tmode);
2242 op[opc++] = target;
2245 for (;;)
2247 builtin_simd_arg thisarg = args[opc - have_retval];
2249 if (thisarg == SIMD_ARG_STOP)
2250 break;
2251 else
2253 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
2254 machine_mode mode = insn_data[icode].operand[opc].mode;
2255 op[opc] = expand_normal (arg);
2257 switch (thisarg)
2259 case SIMD_ARG_COPY_TO_REG:
2260 if (POINTER_TYPE_P (TREE_TYPE (arg)))
2261 op[opc] = convert_memory_address (Pmode, op[opc]);
2262 /*gcc_assert (GET_MODE (op[opc]) == mode); */
2263 if (!(*insn_data[icode].operand[opc].predicate)
2264 (op[opc], mode))
2265 op[opc] = copy_to_mode_reg (mode, op[opc]);
2266 break;
2268 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
2269 gcc_assert (opc > 1);
2270 if (CONST_INT_P (op[opc]))
2272 unsigned int nunits
2273 = GET_MODE_NUNITS (builtin_mode).to_constant ();
2274 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2275 /* Keep to GCC-vector-extension lane indices in the RTL. */
2276 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
2277 INTVAL (op[opc]));
2279 goto constant_arg;
2281 case SIMD_ARG_LANE_INDEX:
2282 /* Must be a previous operand into which this is an index. */
2283 gcc_assert (opc > 0);
2284 if (CONST_INT_P (op[opc]))
2286 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2287 unsigned int nunits
2288 = GET_MODE_NUNITS (vmode).to_constant ();
2289 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2290 /* Keep to GCC-vector-extension lane indices in the RTL. */
2291 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
2293 /* If the lane index isn't a constant then error out. */
2294 goto constant_arg;
2296 case SIMD_ARG_LANE_PAIR_INDEX:
2297 /* Must be a previous operand into which this is an index and
2298 index is restricted to nunits / 2. */
2299 gcc_assert (opc > 0);
2300 if (CONST_INT_P (op[opc]))
2302 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2303 unsigned int nunits
2304 = GET_MODE_NUNITS (vmode).to_constant ();
2305 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
2306 /* Keep to GCC-vector-extension lane indices in the RTL. */
2307 int lane = INTVAL (op[opc]);
2308 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
2309 SImode);
2311 /* If the lane index isn't a constant then error out. */
2312 goto constant_arg;
2313 case SIMD_ARG_LANE_QUADTUP_INDEX:
2314 /* Must be a previous operand into which this is an index and
2315 index is restricted to nunits / 4. */
2316 gcc_assert (opc > 0);
2317 if (CONST_INT_P (op[opc]))
2319 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2320 unsigned int nunits
2321 = GET_MODE_NUNITS (vmode).to_constant ();
2322 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
2323 /* Keep to GCC-vector-extension lane indices in the RTL. */
2324 int lane = INTVAL (op[opc]);
2325 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
2326 SImode);
2328 /* If the lane index isn't a constant then error out. */
2329 goto constant_arg;
2330 case SIMD_ARG_CONSTANT:
2331 constant_arg:
2332 if (!(*insn_data[icode].operand[opc].predicate)
2333 (op[opc], mode))
2335 error_at (EXPR_LOCATION (exp),
2336 "argument %d must be a constant immediate",
2337 opc + 1 - have_retval);
2338 return const0_rtx;
2340 break;
2342 case SIMD_ARG_STOP:
2343 gcc_unreachable ();
2346 opc++;
2350 switch (opc)
2352 case 1:
2353 pat = GEN_FCN (icode) (op[0]);
2354 break;
2356 case 2:
2357 pat = GEN_FCN (icode) (op[0], op[1]);
2358 break;
2360 case 3:
2361 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
2362 break;
2364 case 4:
2365 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
2366 break;
2368 case 5:
2369 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
2370 break;
2372 case 6:
2373 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
2374 break;
2376 default:
2377 gcc_unreachable ();
2380 if (!pat)
2381 return NULL_RTX;
2383 emit_insn (pat);
2385 return target;
2388 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
2390 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
2392 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
2394 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
2395 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
2396 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
2397 && UINTVAL (elementsize) != 0
2398 && UINTVAL (totalsize) != 0)
2400 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
2401 if (CONST_INT_P (lane_idx))
2402 aarch64_simd_lane_bounds (lane_idx, 0,
2403 UINTVAL (totalsize)
2404 / UINTVAL (elementsize),
2405 exp);
2406 else
2407 error_at (EXPR_LOCATION (exp),
2408 "lane index must be a constant immediate");
2410 else
2411 error_at (EXPR_LOCATION (exp),
2412 "total size and element size must be a nonzero "
2413 "constant immediate");
2414 /* Don't generate any RTL. */
2415 return const0_rtx;
2417 aarch64_simd_builtin_datum *d =
2418 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
2419 enum insn_code icode = d->code;
2420 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
2421 int num_args = insn_data[d->code].n_operands;
2422 int is_void = 0;
2423 int k;
2425 is_void = !!(d->qualifiers[0] & qualifier_void);
2427 num_args += is_void;
2429 for (k = 1; k < num_args; k++)
2431 /* We have four arrays of data, each indexed in a different fashion.
2432 qualifiers - element 0 always describes the function return type.
2433 operands - element 0 is either the operand for return value (if
2434 the function has a non-void return type) or the operand for the
2435 first argument.
2436 expr_args - element 0 always holds the first argument.
2437 args - element 0 is always used for the return type. */
2438 int qualifiers_k = k;
2439 int operands_k = k - is_void;
2440 int expr_args_k = k - 1;
2442 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
2443 args[k] = SIMD_ARG_LANE_INDEX;
2444 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
2445 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
2446 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
2447 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
2448 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
2449 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
2450 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
2451 args[k] = SIMD_ARG_CONSTANT;
2452 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
2454 rtx arg
2455 = expand_normal (CALL_EXPR_ARG (exp,
2456 (expr_args_k)));
2457 /* Handle constants only if the predicate allows it. */
2458 bool op_const_int_p =
2459 (CONST_INT_P (arg)
2460 && (*insn_data[icode].operand[operands_k].predicate)
2461 (arg, insn_data[icode].operand[operands_k].mode));
2462 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
2464 else
2465 args[k] = SIMD_ARG_COPY_TO_REG;
2468 args[k] = SIMD_ARG_STOP;
2470 /* The interface to aarch64_simd_expand_args expects a 0 if
2471 the function is void, and a 1 if it is not. */
2472 return aarch64_simd_expand_args
2473 (target, icode, !is_void, exp, &args[1], d->mode);
2477 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
2479 rtx pat;
2480 aarch64_crc_builtin_datum *d
2481 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
2482 enum insn_code icode = d->icode;
2483 tree arg0 = CALL_EXPR_ARG (exp, 0);
2484 tree arg1 = CALL_EXPR_ARG (exp, 1);
2485 rtx op0 = expand_normal (arg0);
2486 rtx op1 = expand_normal (arg1);
2487 machine_mode tmode = insn_data[icode].operand[0].mode;
2488 machine_mode mode0 = insn_data[icode].operand[1].mode;
2489 machine_mode mode1 = insn_data[icode].operand[2].mode;
2491 if (! target
2492 || GET_MODE (target) != tmode
2493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
2494 target = gen_reg_rtx (tmode);
2496 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
2497 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
2499 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
2500 op0 = copy_to_mode_reg (mode0, op0);
2501 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
2502 op1 = copy_to_mode_reg (mode1, op1);
2504 pat = GEN_FCN (icode) (target, op0, op1);
2505 if (!pat)
2506 return NULL_RTX;
2508 emit_insn (pat);
2509 return target;
2512 /* Function to expand reciprocal square root builtins. */
2514 static rtx
2515 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
2517 tree arg0 = CALL_EXPR_ARG (exp, 0);
2518 rtx op0 = expand_normal (arg0);
2520 rtx (*gen) (rtx, rtx);
2522 switch (fcode)
2524 case AARCH64_BUILTIN_RSQRT_DF:
2525 gen = gen_rsqrtdf2;
2526 break;
2527 case AARCH64_BUILTIN_RSQRT_SF:
2528 gen = gen_rsqrtsf2;
2529 break;
2530 case AARCH64_BUILTIN_RSQRT_V2DF:
2531 gen = gen_rsqrtv2df2;
2532 break;
2533 case AARCH64_BUILTIN_RSQRT_V2SF:
2534 gen = gen_rsqrtv2sf2;
2535 break;
2536 case AARCH64_BUILTIN_RSQRT_V4SF:
2537 gen = gen_rsqrtv4sf2;
2538 break;
2539 default: gcc_unreachable ();
2542 if (!target)
2543 target = gen_reg_rtx (GET_MODE (op0));
2545 emit_insn (gen (target, op0));
2547 return target;
2550 /* Expand a FCMLA lane expression EXP with code FCODE and
2551 result going to TARGET if that is convenient. */
2554 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
2556 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
2557 aarch64_fcmla_laneq_builtin_datum* d
2558 = &aarch64_fcmla_lane_builtin_data[bcode];
2559 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
2560 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
2561 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
2562 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
2563 tree tmp = CALL_EXPR_ARG (exp, 3);
2564 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
2566 /* Validate that the lane index is a constant. */
2567 if (!CONST_INT_P (lane_idx))
2569 error_at (EXPR_LOCATION (exp),
2570 "argument %d must be a constant immediate", 4);
2571 return const0_rtx;
2574 /* Validate that the index is within the expected range. */
2575 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
2576 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
2578 /* Generate the correct register and mode. */
2579 int lane = INTVAL (lane_idx);
2581 if (lane < nunits / 4)
2582 op2 = force_lowpart_subreg (d->mode, op2, quadmode);
2583 else
2585 /* Select the upper 64 bits, either a V2SF or V4HF, this however
2586 is quite messy, as the operation required even though simple
2587 doesn't have a simple RTL pattern, and seems it's quite hard to
2588 define using a single RTL pattern. The target generic version
2589 gen_highpart_mode generates code that isn't optimal. */
2590 rtx temp1 = gen_reg_rtx (d->mode);
2591 rtx temp2 = gen_reg_rtx (DImode);
2592 temp1 = force_lowpart_subreg (d->mode, op2, quadmode);
2593 temp1 = force_subreg (V2DImode, temp1, d->mode, 0);
2594 if (BYTES_BIG_ENDIAN)
2595 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
2596 else
2597 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
2598 op2 = force_subreg (d->mode, temp2, GET_MODE (temp2), 0);
2600 /* And recalculate the index. */
2601 lane -= nunits / 4;
2604 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
2605 (max nunits in range check) are valid. Which means only 0-1, so we
2606 only need to know the order in a V2mode. */
2607 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
2609 if (!target
2610 || !REG_P (target)
2611 || GET_MODE (target) != d->mode)
2612 target = gen_reg_rtx (d->mode);
2614 rtx pat = NULL_RTX;
2616 if (d->lane)
2617 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
2618 else
2619 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
2621 if (!pat)
2622 return NULL_RTX;
2624 emit_insn (pat);
2625 return target;
2628 /* Function to expand an expression EXP which calls one of the Transactional
2629 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
2630 static rtx
2631 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
2633 switch (fcode)
2635 case AARCH64_TME_BUILTIN_TSTART:
2636 target = gen_reg_rtx (DImode);
2637 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
2638 break;
2640 case AARCH64_TME_BUILTIN_TTEST:
2641 target = gen_reg_rtx (DImode);
2642 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
2643 break;
2645 case AARCH64_TME_BUILTIN_TCOMMIT:
2646 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
2647 break;
2649 case AARCH64_TME_BUILTIN_TCANCEL:
2651 tree arg0 = CALL_EXPR_ARG (exp, 0);
2652 rtx op0 = expand_normal (arg0);
2653 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
2654 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
2655 else
2657 error_at (EXPR_LOCATION (exp),
2658 "argument must be a 16-bit constant immediate");
2659 return const0_rtx;
2662 break;
2664 default :
2665 gcc_unreachable ();
2667 return target;
2670 /* Function to expand an expression EXP which calls one of the Load/Store
2671 64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
2672 static rtx
2673 aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
2675 expand_operand ops[3];
2677 switch (fcode)
2679 case AARCH64_LS64_BUILTIN_LD64B:
2681 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2682 create_output_operand (&ops[0], target, V8DImode);
2683 create_input_operand (&ops[1], op0, DImode);
2684 expand_insn (CODE_FOR_ld64b, 2, ops);
2685 return ops[0].value;
2687 case AARCH64_LS64_BUILTIN_ST64B:
2689 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2690 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2691 create_input_operand (&ops[0], op0, DImode);
2692 create_input_operand (&ops[1], op1, V8DImode);
2693 expand_insn (CODE_FOR_st64b, 2, ops);
2694 return const0_rtx;
2696 case AARCH64_LS64_BUILTIN_ST64BV:
2698 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2699 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2700 create_output_operand (&ops[0], target, DImode);
2701 create_input_operand (&ops[1], op0, DImode);
2702 create_input_operand (&ops[2], op1, V8DImode);
2703 expand_insn (CODE_FOR_st64bv, 3, ops);
2704 return ops[0].value;
2706 case AARCH64_LS64_BUILTIN_ST64BV0:
2708 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2709 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2710 create_output_operand (&ops[0], target, DImode);
2711 create_input_operand (&ops[1], op0, DImode);
2712 create_input_operand (&ops[2], op1, V8DImode);
2713 expand_insn (CODE_FOR_st64bv0, 3, ops);
2714 return ops[0].value;
2718 gcc_unreachable ();
2721 /* Expand a random number builtin EXP with code FCODE, putting the result
2722 int TARGET. If IGNORE is true the return value is ignored. */
2725 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
2727 rtx pat;
2728 enum insn_code icode;
2729 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
2730 icode = CODE_FOR_aarch64_rndr;
2731 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
2732 icode = CODE_FOR_aarch64_rndrrs;
2733 else
2734 gcc_unreachable ();
2736 rtx rand = gen_reg_rtx (DImode);
2737 pat = GEN_FCN (icode) (rand);
2738 if (!pat)
2739 return NULL_RTX;
2741 tree arg0 = CALL_EXPR_ARG (exp, 0);
2742 rtx res_addr = expand_normal (arg0);
2743 res_addr = convert_memory_address (Pmode, res_addr);
2744 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
2745 emit_insn (pat);
2746 emit_move_insn (res_mem, rand);
2747 /* If the status result is unused don't generate the CSET code. */
2748 if (ignore)
2749 return target;
2751 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
2752 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
2753 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
2754 return target;
2757 /* Expand the read/write system register builtin EXPs. */
2759 aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
2761 tree arg0, arg1;
2762 rtx const_str, input_val, subreg;
2763 enum machine_mode mode;
2764 enum insn_code icode;
2765 class expand_operand ops[2];
2767 arg0 = CALL_EXPR_ARG (exp, 0);
2769 bool write_op = (fcode == AARCH64_WSR
2770 || fcode == AARCH64_WSRP
2771 || fcode == AARCH64_WSR64
2772 || fcode == AARCH64_WSRF
2773 || fcode == AARCH64_WSRF64
2774 || fcode == AARCH64_WSR128);
2776 bool op128 = (fcode == AARCH64_RSR128 || fcode == AARCH64_WSR128);
2777 enum machine_mode sysreg_mode = op128 ? TImode : DImode;
2779 if (op128 && !TARGET_D128)
2781 error_at (EXPR_LOCATION (exp), "128-bit system register support requires"
2782 " the %<d128%> extension");
2783 return const0_rtx;
2786 /* Argument 0 (system register name) must be a string literal. */
2787 gcc_assert (TREE_CODE (arg0) == ADDR_EXPR
2788 && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
2789 && TREE_CODE (TREE_OPERAND (arg0, 0)) == STRING_CST);
2791 const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
2793 tree len_tree = c_strlen (arg0, 1);
2794 if (len_tree == NULL_TREE)
2796 error_at (EXPR_LOCATION (exp), "invalid system register name provided");
2797 return const0_rtx;
2800 size_t len = TREE_INT_CST_LOW (len_tree);
2801 char *sysreg_name = xstrdup (name_input);
2803 for (unsigned pos = 0; pos <= len; pos++)
2804 sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
2806 const char* name_output = aarch64_retrieve_sysreg ((const char *) sysreg_name,
2807 write_op, op128);
2808 if (name_output == NULL)
2810 error_at (EXPR_LOCATION (exp), "invalid system register name %qs",
2811 sysreg_name);
2812 return const0_rtx;
2815 /* Assign the string corresponding to the system register name to an RTX. */
2816 const_str = rtx_alloc (CONST_STRING);
2817 PUT_CODE (const_str, CONST_STRING);
2818 XSTR (const_str, 0) = ggc_strdup (name_output);
2820 /* Set up expander operands and call instruction expansion. */
2821 if (write_op)
2823 arg1 = CALL_EXPR_ARG (exp, 1);
2824 mode = TYPE_MODE (TREE_TYPE (arg1));
2825 input_val = copy_to_mode_reg (mode, expand_normal (arg1));
2827 icode = (op128 ? CODE_FOR_aarch64_write_sysregti
2828 : CODE_FOR_aarch64_write_sysregdi);
2830 switch (fcode)
2832 case AARCH64_WSR:
2833 case AARCH64_WSRP:
2834 case AARCH64_WSR64:
2835 case AARCH64_WSRF64:
2836 case AARCH64_WSR128:
2837 subreg = force_lowpart_subreg (sysreg_mode, input_val, mode);
2838 break;
2839 case AARCH64_WSRF:
2840 subreg = gen_lowpart_SUBREG (SImode, input_val);
2841 subreg = gen_lowpart_SUBREG (DImode, subreg);
2842 break;
2845 create_fixed_operand (&ops[0], const_str);
2846 create_input_operand (&ops[1], subreg, sysreg_mode);
2847 expand_insn (icode, 2, ops);
2849 return target;
2852 /* Read operations are implied by !write_op. */
2853 gcc_assert (call_expr_nargs (exp) == 1);
2855 icode = (op128 ? CODE_FOR_aarch64_read_sysregti
2856 : CODE_FOR_aarch64_read_sysregdi);
2858 /* Emit the initial read_sysregdi rtx. */
2859 create_output_operand (&ops[0], target, sysreg_mode);
2860 create_fixed_operand (&ops[1], const_str);
2861 expand_insn (icode, 2, ops);
2862 target = ops[0].value;
2864 /* Do any necessary post-processing on the result. */
2865 switch (fcode)
2867 case AARCH64_RSR:
2868 case AARCH64_RSRP:
2869 case AARCH64_RSR64:
2870 case AARCH64_RSRF64:
2871 case AARCH64_RSR128:
2872 return force_lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)),
2873 target, sysreg_mode);
2874 case AARCH64_RSRF:
2875 subreg = gen_lowpart_SUBREG (SImode, target);
2876 return gen_lowpart_SUBREG (SFmode, subreg);
2877 default:
2878 gcc_unreachable ();
2882 /* Ensure argument ARGNO in EXP represents a const-type argument in the range
2883 [MINVAL, MAXVAL). */
2884 static HOST_WIDE_INT
2885 require_const_argument (tree exp, unsigned int argno, HOST_WIDE_INT minval,
2886 HOST_WIDE_INT maxval)
2888 maxval--;
2889 tree arg = CALL_EXPR_ARG (exp, argno);
2890 if (TREE_CODE (arg) != INTEGER_CST)
2891 error_at (EXPR_LOCATION (exp), "Constant-type argument expected");
2893 auto argval = wi::to_widest (arg);
2895 if (argval < minval || argval > maxval)
2896 error_at (EXPR_LOCATION (exp),
2897 "argument %d must be a constant immediate "
2898 "in range [%wd,%wd]", argno + 1, minval, maxval);
2900 HOST_WIDE_INT retval = argval.to_shwi ();
2901 return retval;
2905 /* Expand a prefetch builtin EXP. */
2906 void
2907 aarch64_expand_prefetch_builtin (tree exp, int fcode)
2909 int kind_id = -1;
2910 int level_id = -1;
2911 int rettn_id = -1;
2912 char prfop[11];
2913 class expand_operand ops[2];
2915 static const char *kind_s[] = {"PLD", "PST", "PLI"};
2916 static const char *level_s[] = {"L1", "L2", "L3", "SLC"};
2917 static const char *rettn_s[] = {"KEEP", "STRM"};
2919 /* Each of the four prefetch builtins takes a different number of arguments,
2920 but proceeds to call the PRFM insn which requires 4 pieces of information
2921 to be fully defined. Where one of these takes less than 4 arguments, set
2922 sensible defaults. */
2923 switch (fcode)
2925 case AARCH64_PLDX:
2926 break;
2927 case AARCH64_PLIX:
2928 kind_id = 2;
2929 break;
2930 case AARCH64_PLI:
2931 case AARCH64_PLD:
2932 kind_id = (fcode == AARCH64_PLD) ? 0 : 2;
2933 level_id = 0;
2934 rettn_id = 0;
2935 break;
2936 default:
2937 gcc_unreachable ();
2940 /* Any -1 id variable is to be user-supplied. Here we fill these in and run
2941 bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
2942 AARCH64_PLIX, never explicitly. */
2943 int argno = 0;
2944 if (kind_id < 0)
2945 kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s) - 1);
2946 if (level_id < 0)
2947 level_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (level_s));
2948 if (rettn_id < 0)
2949 rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (rettn_s));
2950 rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode,
2951 EXPAND_NORMAL);
2953 if (seen_error ())
2954 return;
2956 sprintf (prfop, "%s%s%s", kind_s[kind_id],
2957 level_s[level_id],
2958 rettn_s[rettn_id]);
2960 rtx const_str = rtx_alloc (CONST_STRING);
2961 PUT_CODE (const_str, CONST_STRING);
2962 XSTR (const_str, 0) = ggc_strdup (prfop);
2964 create_fixed_operand (&ops[0], const_str);
2965 create_address_operand (&ops[1], address);
2966 maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
2969 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
2970 with result going to TARGET. */
2971 static rtx
2972 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
2974 if (TARGET_ILP32)
2976 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
2977 return const0_rtx;
2980 rtx pat = NULL;
2981 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
2982 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
2984 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2985 machine_mode mode0 = GET_MODE (op0);
2986 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
2987 op0 = convert_to_mode (DImode, op0, true);
2989 switch (fcode)
2991 case AARCH64_MEMTAG_BUILTIN_IRG:
2992 case AARCH64_MEMTAG_BUILTIN_GMI:
2993 case AARCH64_MEMTAG_BUILTIN_SUBP:
2994 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
2996 if (! target
2997 || GET_MODE (target) != DImode
2998 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
2999 target = gen_reg_rtx (DImode);
3001 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
3003 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3005 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
3007 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
3008 break;
3010 error_at (EXPR_LOCATION (exp),
3011 "argument %d must be a constant immediate "
3012 "in range [0,15]", 2);
3013 return const0_rtx;
3015 else
3017 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3018 machine_mode mode1 = GET_MODE (op1);
3019 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
3020 op1 = convert_to_mode (DImode, op1, true);
3021 pat = GEN_FCN (icode) (target, op0, op1);
3023 break;
3025 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3026 target = op0;
3027 pat = GEN_FCN (icode) (target, op0, const0_rtx);
3028 break;
3029 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3030 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
3031 break;
3032 default:
3033 gcc_unreachable();
3036 if (!pat)
3037 return NULL_RTX;
3039 emit_insn (pat);
3040 return target;
3043 /* Function to expand an expression EXP which calls one of the ACLE Data
3044 Intrinsic builtins FCODE with the result going to TARGET. */
3045 static rtx
3046 aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
3048 expand_operand ops[2];
3049 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3050 create_output_operand (&ops[0], target, mode);
3051 create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode);
3052 enum insn_code icode;
3054 switch (fcode)
3056 case AARCH64_REV16:
3057 case AARCH64_REV16L:
3058 case AARCH64_REV16LL:
3059 icode = code_for_aarch64_rev16 (mode);
3060 break;
3061 case AARCH64_RBIT:
3062 case AARCH64_RBITL:
3063 case AARCH64_RBITLL:
3064 icode = code_for_aarch64_rbit (mode);
3065 break;
3066 default:
3067 gcc_unreachable ();
3070 expand_insn (icode, 2, ops);
3071 return ops[0].value;
3074 /* Expand an expression EXP as fpsr or fpcr setter (depending on
3075 UNSPEC) using MODE. */
3076 static void
3077 aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
3079 tree arg = CALL_EXPR_ARG (exp, 0);
3080 rtx op = force_reg (mode, expand_normal (arg));
3081 emit_insn (gen_aarch64_set (unspec, mode, op));
3084 /* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
3085 Return the target. */
3086 static rtx
3087 aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
3088 rtx target)
3090 expand_operand op;
3091 create_output_operand (&op, target, mode);
3092 expand_insn (icode, 1, &op);
3093 return op.value;
3096 /* Expand an expression EXP that calls built-in function FCODE,
3097 with result going to TARGET if that's convenient. IGNORE is true
3098 if the result of the builtin is ignored. */
3100 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
3101 int ignore)
3103 int icode;
3104 rtx op0;
3105 tree arg0;
3107 switch (fcode)
3109 case AARCH64_BUILTIN_GET_FPCR:
3110 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
3111 SImode, target);
3112 case AARCH64_BUILTIN_SET_FPCR:
3113 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
3114 return target;
3115 case AARCH64_BUILTIN_GET_FPSR:
3116 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
3117 SImode, target);
3118 case AARCH64_BUILTIN_SET_FPSR:
3119 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
3120 return target;
3121 case AARCH64_BUILTIN_GET_FPCR64:
3122 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
3123 DImode, target);
3124 case AARCH64_BUILTIN_SET_FPCR64:
3125 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
3126 return target;
3127 case AARCH64_BUILTIN_GET_FPSR64:
3128 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
3129 DImode, target);
3130 case AARCH64_BUILTIN_SET_FPSR64:
3131 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
3132 return target;
3133 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
3134 case AARCH64_PAUTH_BUILTIN_PACIA1716:
3135 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
3136 case AARCH64_PAUTH_BUILTIN_PACIB1716:
3137 case AARCH64_PAUTH_BUILTIN_XPACLRI:
3138 arg0 = CALL_EXPR_ARG (exp, 0);
3139 op0 = force_reg (Pmode, expand_normal (arg0));
3141 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
3143 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
3144 icode = CODE_FOR_xpaclri;
3145 emit_move_insn (lr, op0);
3146 emit_insn (GEN_FCN (icode) ());
3147 return lr;
3149 else
3151 tree arg1 = CALL_EXPR_ARG (exp, 1);
3152 rtx op1 = force_reg (Pmode, expand_normal (arg1));
3153 switch (fcode)
3155 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
3156 icode = CODE_FOR_autia1716;
3157 break;
3158 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
3159 icode = CODE_FOR_autib1716;
3160 break;
3161 case AARCH64_PAUTH_BUILTIN_PACIA1716:
3162 icode = CODE_FOR_pacia1716;
3163 break;
3164 case AARCH64_PAUTH_BUILTIN_PACIB1716:
3165 icode = CODE_FOR_pacib1716;
3166 break;
3167 default:
3168 icode = 0;
3169 gcc_unreachable ();
3172 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
3173 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
3174 emit_move_insn (x17_reg, op0);
3175 emit_move_insn (x16_reg, op1);
3176 emit_insn (GEN_FCN (icode) ());
3177 return x17_reg;
3180 case AARCH64_JSCVT:
3182 expand_operand ops[2];
3183 create_output_operand (&ops[0], target, SImode);
3184 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3185 create_input_operand (&ops[1], op0, DFmode);
3186 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
3187 return ops[0].value;
3190 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
3191 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
3192 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
3193 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
3194 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
3195 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
3196 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
3197 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
3198 return aarch64_expand_fcmla_builtin (exp, target, fcode);
3199 case AARCH64_BUILTIN_RNG_RNDR:
3200 case AARCH64_BUILTIN_RNG_RNDRRS:
3201 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
3202 case AARCH64_RSR:
3203 case AARCH64_RSRP:
3204 case AARCH64_RSR64:
3205 case AARCH64_RSRF:
3206 case AARCH64_RSRF64:
3207 case AARCH64_RSR128:
3208 case AARCH64_WSR:
3209 case AARCH64_WSRP:
3210 case AARCH64_WSR64:
3211 case AARCH64_WSRF:
3212 case AARCH64_WSRF64:
3213 case AARCH64_WSR128:
3214 return aarch64_expand_rwsr_builtin (exp, target, fcode);
3215 case AARCH64_PLD:
3216 case AARCH64_PLDX:
3217 case AARCH64_PLI:
3218 case AARCH64_PLIX:
3219 aarch64_expand_prefetch_builtin (exp, fcode);
3220 return target;
3223 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
3224 return aarch64_simd_expand_builtin (fcode, exp, target);
3225 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
3226 return aarch64_crc32_expand_builtin (fcode, exp, target);
3228 if (fcode == AARCH64_BUILTIN_RSQRT_DF
3229 || fcode == AARCH64_BUILTIN_RSQRT_SF
3230 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
3231 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
3232 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
3233 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
3235 if (fcode == AARCH64_TME_BUILTIN_TSTART
3236 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
3237 || fcode == AARCH64_TME_BUILTIN_TTEST
3238 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
3239 return aarch64_expand_builtin_tme (fcode, exp, target);
3241 if (fcode == AARCH64_LS64_BUILTIN_LD64B
3242 || fcode == AARCH64_LS64_BUILTIN_ST64B
3243 || fcode == AARCH64_LS64_BUILTIN_ST64BV
3244 || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
3245 return aarch64_expand_builtin_ls64 (fcode, exp, target);
3247 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
3248 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
3249 return aarch64_expand_builtin_memtag (fcode, exp, target);
3250 if (fcode >= AARCH64_REV16
3251 && fcode <= AARCH64_RBITLL)
3252 return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
3254 gcc_unreachable ();
3257 /* Return builtin for reciprocal square root. */
3259 tree
3260 aarch64_general_builtin_rsqrt (unsigned int fn)
3262 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
3263 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
3264 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
3265 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
3266 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
3267 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
3268 return NULL_TREE;
3271 /* Return true if the lane check can be removed as there is no
3272 error going to be emitted. */
3273 static bool
3274 aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
3276 if (TREE_CODE (arg0) != INTEGER_CST)
3277 return false;
3278 if (TREE_CODE (arg1) != INTEGER_CST)
3279 return false;
3280 if (TREE_CODE (arg2) != INTEGER_CST)
3281 return false;
3283 auto totalsize = wi::to_widest (arg0);
3284 auto elementsize = wi::to_widest (arg1);
3285 if (totalsize == 0 || elementsize == 0)
3286 return false;
3287 auto lane = wi::to_widest (arg2);
3288 auto high = wi::udiv_trunc (totalsize, elementsize);
3289 return wi::ltu_p (lane, high);
3292 #undef VAR1
3293 #define VAR1(T, N, MAP, FLAG, A) \
3294 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
3296 #undef VREINTERPRET_BUILTIN
3297 #define VREINTERPRET_BUILTIN(A, B, L) \
3298 case AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B:
3300 #undef VGET_LOW_BUILTIN
3301 #define VGET_LOW_BUILTIN(A) \
3302 case AARCH64_SIMD_BUILTIN_VGET_LOW_##A:
3304 #undef VGET_HIGH_BUILTIN
3305 #define VGET_HIGH_BUILTIN(A) \
3306 case AARCH64_SIMD_BUILTIN_VGET_HIGH_##A:
3308 /* Try to fold a call to the built-in function with subcode FCODE. The
3309 function is passed the N_ARGS arguments in ARGS and it returns a value
3310 of type TYPE. Return the new expression on success and NULL_TREE on
3311 failure. */
3312 tree
3313 aarch64_general_fold_builtin (unsigned int fcode, tree type,
3314 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
3316 switch (fcode)
3318 BUILTIN_VDQF (UNOP, abs, 2, ALL)
3319 return fold_build1 (ABS_EXPR, type, args[0]);
3320 VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
3321 VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
3322 VAR1 (UNOP, floatv2di, 2, ALL, v2df)
3323 return fold_build1 (FLOAT_EXPR, type, args[0]);
3324 AARCH64_SIMD_VREINTERPRET_BUILTINS
3325 return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
3326 AARCH64_SIMD_VGET_LOW_BUILTINS
3328 auto pos = BYTES_BIG_ENDIAN ? 64 : 0;
3330 return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
3331 bitsize_int (pos));
3333 AARCH64_SIMD_VGET_HIGH_BUILTINS
3335 auto pos = BYTES_BIG_ENDIAN ? 0 : 64;
3337 return fold_build3 (BIT_FIELD_REF, type, args[0], bitsize_int (64),
3338 bitsize_int (pos));
3340 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
3341 gcc_assert (n_args == 3);
3342 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
3343 return void_node;
3344 break;
3345 default:
3346 break;
3349 return NULL_TREE;
3352 enum aarch64_simd_type
3353 get_mem_type_for_load_store (unsigned int fcode)
3355 switch (fcode)
3357 VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
3358 VAR1 (STORE1, st1, 0, STORE, v8qi)
3359 return Int8x8_t;
3360 VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
3361 VAR1 (STORE1, st1, 0, STORE, v16qi)
3362 return Int8x16_t;
3363 VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
3364 VAR1 (STORE1, st1, 0, STORE, v4hi)
3365 return Int16x4_t;
3366 VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
3367 VAR1 (STORE1, st1, 0, STORE, v8hi)
3368 return Int16x8_t;
3369 VAR1 (LOAD1, ld1, 0, LOAD, v2si)
3370 VAR1 (STORE1, st1, 0, STORE, v2si)
3371 return Int32x2_t;
3372 VAR1 (LOAD1, ld1, 0, LOAD, v4si)
3373 VAR1 (STORE1, st1, 0, STORE, v4si)
3374 return Int32x4_t;
3375 VAR1 (LOAD1, ld1, 0, LOAD, v2di)
3376 VAR1 (STORE1, st1, 0, STORE, v2di)
3377 return Int64x2_t;
3378 VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
3379 VAR1 (STORE1_U, st1, 0, STORE, v8qi)
3380 return Uint8x8_t;
3381 VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
3382 VAR1 (STORE1_U, st1, 0, STORE, v16qi)
3383 return Uint8x16_t;
3384 VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
3385 VAR1 (STORE1_U, st1, 0, STORE, v4hi)
3386 return Uint16x4_t;
3387 VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
3388 VAR1 (STORE1_U, st1, 0, STORE, v8hi)
3389 return Uint16x8_t;
3390 VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
3391 VAR1 (STORE1_U, st1, 0, STORE, v2si)
3392 return Uint32x2_t;
3393 VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
3394 VAR1 (STORE1_U, st1, 0, STORE, v4si)
3395 return Uint32x4_t;
3396 VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
3397 VAR1 (STORE1_U, st1, 0, STORE, v2di)
3398 return Uint64x2_t;
3399 VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
3400 VAR1 (STORE1_P, st1, 0, STORE, v8qi)
3401 return Poly8x8_t;
3402 VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
3403 VAR1 (STORE1_P, st1, 0, STORE, v16qi)
3404 return Poly8x16_t;
3405 VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
3406 VAR1 (STORE1_P, st1, 0, STORE, v4hi)
3407 return Poly16x4_t;
3408 VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
3409 VAR1 (STORE1_P, st1, 0, STORE, v8hi)
3410 return Poly16x8_t;
3411 VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
3412 VAR1 (STORE1_P, st1, 0, STORE, v2di)
3413 return Poly64x2_t;
3414 VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
3415 VAR1 (STORE1, st1, 0, STORE, v4hf)
3416 return Float16x4_t;
3417 VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
3418 VAR1 (STORE1, st1, 0, STORE, v8hf)
3419 return Float16x8_t;
3420 VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
3421 VAR1 (STORE1, st1, 0, STORE, v4bf)
3422 return Bfloat16x4_t;
3423 VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
3424 VAR1 (STORE1, st1, 0, STORE, v8bf)
3425 return Bfloat16x8_t;
3426 VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
3427 VAR1 (STORE1, st1, 0, STORE, v2sf)
3428 return Float32x2_t;
3429 VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
3430 VAR1 (STORE1, st1, 0, STORE, v4sf)
3431 return Float32x4_t;
3432 VAR1 (LOAD1, ld1, 0, LOAD, v2df)
3433 VAR1 (STORE1, st1, 0, STORE, v2df)
3434 return Float64x2_t;
3435 default:
3436 gcc_unreachable ();
3437 break;
3441 /* We've seen a vector load from address ADDR. Record it in
3442 vector_load_decls, if appropriate. */
3443 static void
3444 aarch64_record_vector_load_arg (tree addr)
3446 tree decl = aarch64_vector_load_decl (addr);
3447 if (!decl)
3448 return;
3449 if (!cfun->machine->vector_load_decls)
3450 cfun->machine->vector_load_decls = hash_set<tree>::create_ggc (31);
3451 cfun->machine->vector_load_decls->add (decl);
3454 /* Try to fold STMT, given that it's a call to the built-in function with
3455 subcode FCODE. Return the new statement on success and null on
3456 failure. */
3457 gimple *
3458 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
3459 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
3461 gimple *new_stmt = NULL;
3462 unsigned nargs = gimple_call_num_args (stmt);
3463 tree *args = (nargs > 0
3464 ? gimple_call_arg_ptr (stmt, 0)
3465 : &error_mark_node);
3467 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
3468 and unsigned int; it will distinguish according to the types of
3469 the arguments to the __builtin. */
3470 switch (fcode)
3472 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
3473 BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE)
3474 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
3475 1, args[0]);
3476 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3477 break;
3479 /* Lower sqrt builtins to gimple/internal function sqrt. */
3480 BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
3481 new_stmt = gimple_build_call_internal (IFN_SQRT,
3482 1, args[0]);
3483 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3484 break;
3486 BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
3487 BUILTIN_VD_I (BINOPU, combine, 0, NONE)
3488 BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
3490 tree first_part, second_part;
3491 if (BYTES_BIG_ENDIAN)
3493 second_part = args[0];
3494 first_part = args[1];
3496 else
3498 first_part = args[0];
3499 second_part = args[1];
3501 tree ret_type = gimple_call_return_type (stmt);
3502 tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
3503 NULL_TREE, second_part);
3504 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor);
3506 break;
3508 /*lower store and load neon builtins to gimple. */
3509 BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
3510 BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
3511 BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
3512 /* Punt until after inlining, so that we stand more chance of
3513 recording something meaningful in vector_load_decls. */
3514 if (!cfun->after_inlining)
3515 break;
3516 aarch64_record_vector_load_arg (args[0]);
3517 if (!BYTES_BIG_ENDIAN)
3519 enum aarch64_simd_type mem_type
3520 = get_mem_type_for_load_store(fcode);
3521 aarch64_simd_type_info simd_type
3522 = aarch64_simd_types[mem_type];
3523 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
3524 VOIDmode, true);
3525 tree zero = build_zero_cst (elt_ptr_type);
3526 /* Use element type alignment. */
3527 tree access_type
3528 = build_aligned_type (simd_type.itype,
3529 TYPE_ALIGN (simd_type.eltype));
3530 new_stmt
3531 = gimple_build_assign (gimple_get_lhs (stmt),
3532 fold_build2 (MEM_REF,
3533 access_type,
3534 args[0], zero));
3535 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3536 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3538 break;
3540 BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
3541 BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
3542 BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
3543 if (!BYTES_BIG_ENDIAN)
3545 enum aarch64_simd_type mem_type
3546 = get_mem_type_for_load_store(fcode);
3547 aarch64_simd_type_info simd_type
3548 = aarch64_simd_types[mem_type];
3549 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
3550 VOIDmode, true);
3551 tree zero = build_zero_cst (elt_ptr_type);
3552 /* Use element type alignment. */
3553 tree access_type
3554 = build_aligned_type (simd_type.itype,
3555 TYPE_ALIGN (simd_type.eltype));
3556 new_stmt
3557 = gimple_build_assign (fold_build2 (MEM_REF, access_type,
3558 args[0], zero),
3559 args[1]);
3560 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3561 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3563 break;
3565 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
3566 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
3567 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
3568 1, args[0]);
3569 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3570 break;
3571 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
3572 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
3573 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
3574 1, args[0]);
3575 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3576 break;
3577 BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
3578 if (TREE_CODE (args[1]) == INTEGER_CST
3579 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3580 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3581 LSHIFT_EXPR, args[0], args[1]);
3582 break;
3583 BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
3584 BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
3586 tree cst = args[1];
3587 tree ctype = TREE_TYPE (cst);
3588 /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
3589 treated as a scalar type not a vector one. */
3590 if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
3592 wide_int wcst = wi::to_wide (cst);
3593 tree unit_ty = TREE_TYPE (cst);
3595 wide_int abs_cst = wi::abs (wcst);
3596 if (wi::geu_p (abs_cst, element_precision (args[0])))
3597 break;
3599 if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
3601 tree final_cst;
3602 final_cst = wide_int_to_tree (unit_ty, abs_cst);
3603 if (TREE_CODE (cst) != INTEGER_CST)
3604 final_cst = build_uniform_cst (ctype, final_cst);
3606 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3607 RSHIFT_EXPR, args[0],
3608 final_cst);
3610 else
3611 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3612 LSHIFT_EXPR, args[0], args[1]);
3615 break;
3616 BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
3617 VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
3618 BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
3619 VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
3620 if (TREE_CODE (args[1]) == INTEGER_CST
3621 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3622 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3623 RSHIFT_EXPR, args[0], args[1]);
3624 break;
3625 BUILTIN_GPF (BINOP, fmulx, 0, ALL)
3627 gcc_assert (nargs == 2);
3628 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
3629 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
3630 if (a0_cst_p || a1_cst_p)
3632 if (a0_cst_p && a1_cst_p)
3634 tree t0 = TREE_TYPE (args[0]);
3635 real_value a0 = (TREE_REAL_CST (args[0]));
3636 real_value a1 = (TREE_REAL_CST (args[1]));
3637 if (real_equal (&a1, &dconst0))
3638 std::swap (a0, a1);
3639 /* According to real_equal (), +0 equals -0. */
3640 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
3642 real_value res = dconst2;
3643 res.sign = a0.sign ^ a1.sign;
3644 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3645 REAL_CST,
3646 build_real (t0, res));
3648 else
3649 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3650 MULT_EXPR,
3651 args[0], args[1]);
3653 else /* a0_cst_p ^ a1_cst_p. */
3655 real_value const_part = a0_cst_p
3656 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
3657 if (!real_equal (&const_part, &dconst0)
3658 && !real_isinf (&const_part))
3659 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3660 MULT_EXPR, args[0],
3661 args[1]);
3664 if (new_stmt)
3666 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3667 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3669 break;
3671 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
3672 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
3674 unlink_stmt_vdef (stmt);
3675 release_defs (stmt);
3676 new_stmt = gimple_build_nop ();
3678 break;
3679 default:
3680 break;
3683 /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
3684 created an assign statement with a null lhs, then fix this by assigning
3685 to a new (and subsequently unused) variable. */
3686 if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs (new_stmt))
3688 tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
3689 gimple_assign_set_lhs (new_stmt, new_lhs);
3692 return new_stmt;
3695 void
3696 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
3698 const unsigned AARCH64_FE_INVALID = 1;
3699 const unsigned AARCH64_FE_DIVBYZERO = 2;
3700 const unsigned AARCH64_FE_OVERFLOW = 4;
3701 const unsigned AARCH64_FE_UNDERFLOW = 8;
3702 const unsigned AARCH64_FE_INEXACT = 16;
3703 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
3704 | AARCH64_FE_DIVBYZERO
3705 | AARCH64_FE_OVERFLOW
3706 | AARCH64_FE_UNDERFLOW
3707 | AARCH64_FE_INEXACT);
3708 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
3709 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
3710 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
3711 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
3712 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
3714 /* Generate the equivalence of :
3715 unsigned int fenv_cr;
3716 fenv_cr = __builtin_aarch64_get_fpcr ();
3718 unsigned int fenv_sr;
3719 fenv_sr = __builtin_aarch64_get_fpsr ();
3721 Now set all exceptions to non-stop
3722 unsigned int mask_cr
3723 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
3724 unsigned int masked_cr;
3725 masked_cr = fenv_cr & mask_cr;
3727 And clear all exception flags
3728 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
3729 unsigned int masked_cr;
3730 masked_sr = fenv_sr & mask_sr;
3732 __builtin_aarch64_set_cr (masked_cr);
3733 __builtin_aarch64_set_sr (masked_sr); */
3735 fenv_cr = create_tmp_var_raw (unsigned_type_node);
3736 fenv_sr = create_tmp_var_raw (unsigned_type_node);
3738 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
3739 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
3740 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
3741 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
3743 mask_cr = build_int_cst (unsigned_type_node,
3744 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
3745 mask_sr = build_int_cst (unsigned_type_node,
3746 ~(AARCH64_FE_ALL_EXCEPT));
3748 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
3749 fenv_cr, build_call_expr (get_fpcr, 0),
3750 NULL_TREE, NULL_TREE);
3751 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
3752 fenv_sr, build_call_expr (get_fpsr, 0),
3753 NULL_TREE, NULL_TREE);
3755 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
3756 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
3758 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
3759 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3761 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
3762 hold_fnclex_sr);
3763 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
3764 masked_fenv_sr);
3765 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
3767 *hold = build2 (COMPOUND_EXPR, void_type_node,
3768 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
3769 hold_fnclex);
3771 /* Store the value of masked_fenv to clear the exceptions:
3772 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
3774 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3776 /* Generate the equivalent of :
3777 unsigned int new_fenv_var;
3778 new_fenv_var = __builtin_aarch64_get_fpsr ();
3780 __builtin_aarch64_set_fpsr (fenv_sr);
3782 __atomic_feraiseexcept (new_fenv_var); */
3784 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
3785 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
3786 new_fenv_var, build_call_expr (get_fpsr, 0),
3787 NULL_TREE, NULL_TREE);
3788 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
3789 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
3790 update_call = build_call_expr (atomic_feraiseexcept, 1,
3791 fold_convert (integer_type_node, new_fenv_var));
3792 *update = build2 (COMPOUND_EXPR, void_type_node,
3793 build2 (COMPOUND_EXPR, void_type_node,
3794 reload_fenv, restore_fnenv), update_call);
3797 /* Resolve overloaded MEMTAG build-in functions. */
3798 #define AARCH64_BUILTIN_SUBCODE(F) \
3799 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
3801 static tree
3802 aarch64_resolve_overloaded_memtag (location_t loc,
3803 tree fndecl, void *pass_params)
3805 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
3806 unsigned param_num = params ? params->length() : 0;
3807 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
3808 tree inittype = aarch64_memtag_builtin_data[
3809 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
3810 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
3812 if (param_num != arg_num)
3814 TREE_TYPE (fndecl) = inittype;
3815 return NULL_TREE;
3817 tree retype = NULL;
3819 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
3821 tree t0 = TREE_TYPE ((*params)[0]);
3822 tree t1 = TREE_TYPE ((*params)[1]);
3824 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3825 t0 = ptr_type_node;
3826 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
3827 t1 = ptr_type_node;
3829 if (TYPE_MODE (t0) != DImode)
3830 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3831 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3833 if (TYPE_MODE (t1) != DImode)
3834 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
3835 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
3837 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
3839 else
3841 tree t0 = TREE_TYPE ((*params)[0]);
3843 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3845 TREE_TYPE (fndecl) = inittype;
3846 return NULL_TREE;
3849 if (TYPE_MODE (t0) != DImode)
3850 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3851 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3853 switch (fcode)
3855 case AARCH64_MEMTAG_BUILTIN_IRG:
3856 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
3857 break;
3858 case AARCH64_MEMTAG_BUILTIN_GMI:
3859 retype = build_function_type_list (uint64_type_node, t0,
3860 uint64_type_node, NULL);
3861 break;
3862 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
3863 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
3864 break;
3865 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3866 retype = build_function_type_list (void_type_node, t0, NULL);
3867 break;
3868 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3869 retype = build_function_type_list (t0, t0, NULL);
3870 break;
3871 default:
3872 return NULL_TREE;
3876 if (!retype || retype == error_mark_node)
3877 TREE_TYPE (fndecl) = inittype;
3878 else
3879 TREE_TYPE (fndecl) = retype;
3881 return NULL_TREE;
3884 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
3885 tree
3886 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
3887 void *pass_params)
3889 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
3891 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
3892 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
3893 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
3895 return NULL_TREE;
3898 #undef AARCH64_CHECK_BUILTIN_MODE
3899 #undef AARCH64_FIND_FRINT_VARIANT
3900 #undef CF0
3901 #undef CF1
3902 #undef CF2
3903 #undef CF3
3904 #undef CF4
3905 #undef CF10
3906 #undef VAR1
3907 #undef VAR2
3908 #undef VAR3
3909 #undef VAR4
3910 #undef VAR5
3911 #undef VAR6
3912 #undef VAR7
3913 #undef VAR8
3914 #undef VAR9
3915 #undef VAR10
3916 #undef VAR11
3918 #include "gt-aarch64-builtins.h"