Missed part from my V1DI'fication. This makes something around 50
[llvm-gcc-4.2.git] / gcc / config / arm / neon.ml
blob6c10c409883fca86d5062dad54a6f125a3f5b750
1 (* APPLE LOCAL file v7 support. Merge from Codesourcery *)
2 (* Common code for ARM NEON header file, documentation and test case
3 generators.
5 Copyright (C) 2006 Free Software Foundation, Inc.
6 Contributed by CodeSourcery.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 02110-1301, USA. *)
25 (* Shorthand types for vector elements. *)
26 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
27 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
28 | Cast of elts * elts | NoElts
30 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
31 | ConvClass of eltclass * eltclass | NoType
33 (* These vector types correspond directly to C types. *)
34 type vectype = T_int8x8 | T_int8x16
35 | T_int16x4 | T_int16x8
36 | T_int32x2 | T_int32x4
37 | T_int64x1 | T_int64x2
38 | T_uint8x8 | T_uint8x16
39 | T_uint16x4 | T_uint16x8
40 | T_uint32x2 | T_uint32x4
41 | T_uint64x1 | T_uint64x2
42 | T_float32x2 | T_float32x4
43 | T_poly8x8 | T_poly8x16
44 | T_poly16x4 | T_poly16x8
45 | T_immediate of int * int
46 | T_int8 | T_int16
47 | T_int32 | T_int64
48 | T_uint8 | T_uint16
49 | T_uint32 | T_uint64
50 | T_poly8 | T_poly16
51 | T_float32 | T_arrayof of int * vectype
52 | T_ptrto of vectype | T_const of vectype
53 | T_void | T_intQI
54 | T_intHI | T_intSI
55 | T_intDI
57 (* The meanings of the following are:
58 TImode : "Tetra", two registers (four words).
59 EImode : "hExa", three registers (six words).
60 OImode : "Octa", four registers (eight words).
61 CImode : "dodeCa", six registers (twelve words).
62 XImode : "heXadeca", eight registers (sixteen words).
65 (* LLVM LOCAL begin Use a different type for each vector type. *)
66 type inttype = B_TId8mode | B_EId8mode | B_OId8mode
67 | B_TId16mode | B_EId16mode | B_OId16mode
68 | B_TId32mode | B_EId32mode | B_OId32mode
69 | B_TId64mode | B_EId64mode | B_OId64mode
70 | B_TIdSFmode | B_EIdSFmode | B_OIdSFmode
71 | B_OIq8mode | B_CIq8mode | B_XIq8mode
72 | B_OIq16mode | B_CIq16mode | B_XIq16mode
73 | B_OIq32mode | B_CIq32mode | B_XIq32mode
74 | B_OIq64mode | B_CIq64mode | B_XIq64mode
75 | B_OIqSFmode | B_CIqSFmode | B_XIqSFmode
76 (* LLVM LOCAL end Use a different type for each vector type. *)
78 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
79 | PtrTo of shape_elt | CstPtrTo of shape_elt
80 (* These next ones are used only in the test generator. *)
81 | Element_of_dreg (* Used for "lane" variants. *)
82 | Element_of_qreg (* Likewise. *)
83 | All_elements_of_dreg (* Used for "dup" variants. *)
85 type shape_form = All of int * shape_elt
86 | Long
87 | Long_noreg of shape_elt
88 | Wide
89 | Wide_noreg of shape_elt
90 | Narrow
91 | Long_imm
92 | Narrow_imm
93 | Binary_imm of shape_elt
94 | Use_operands of shape_elt array
95 | By_scalar of shape_elt
96 | Unary_scalar of shape_elt
97 | Wide_lane
98 | Wide_scalar
99 | Pair_result of shape_elt
101 type arity = Arity0 of vectype
102 | Arity1 of vectype * vectype
103 | Arity2 of vectype * vectype * vectype
104 | Arity3 of vectype * vectype * vectype * vectype
105 | Arity4 of vectype * vectype * vectype * vectype * vectype
107 (* LLVM LOCAL *)
108 type vecmode = V8QI | V4HI | V2SI | V2SF | V1DI
109 | V16QI | V8HI | V4SI | V4SF | V2DI
110 | QI | HI | SI | SF | DI
112 type opcode =
113 (* Binary ops. *)
114 Vadd
115 | Vmul
116 | Vmla
117 | Vmls
118 | Vsub
119 | Vceq
120 | Vcge
121 | Vcgt
122 | Vcle
123 | Vclt
124 | Vcage
125 | Vcagt
126 | Vcale
127 | Vcalt
128 | Vtst
129 | Vabd
130 | Vaba
131 | Vmax
132 | Vmin
133 | Vpadd
134 | Vpada
135 | Vpmax
136 | Vpmin
137 | Vrecps
138 | Vrsqrts
139 | Vshl
140 | Vshr_n
141 | Vshl_n
142 | Vsra_n
143 | Vsri
144 | Vsli
145 (* Logic binops. *)
146 | Vand
147 | Vorr
148 | Veor
149 | Vbic
150 | Vorn
151 | Vbsl
152 (* Ops with scalar. *)
153 | Vmul_lane
154 | Vmla_lane
155 | Vmls_lane
156 | Vmul_n
157 | Vmla_n
158 | Vmls_n
159 | Vmull_n
160 | Vmull_lane
161 | Vqdmull_n
162 | Vqdmull_lane
163 | Vqdmulh_n
164 | Vqdmulh_lane
165 (* Unary ops. *)
166 | Vabs
167 | Vneg
168 | Vcls
169 | Vclz
170 | Vcnt
171 | Vrecpe
172 | Vrsqrte
173 | Vmvn
174 (* Vector extract. *)
175 | Vext
176 (* Reverse elements. *)
177 | Vrev64
178 | Vrev32
179 | Vrev16
180 (* Transposition ops. *)
181 | Vtrn
182 | Vzip
183 | Vuzp
184 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
185 | Vldx of int
186 | Vstx of int
187 | Vldx_lane of int
188 | Vldx_dup of int
189 | Vstx_lane of int
190 (* Set/extract lanes from a vector. *)
191 | Vget_lane
192 | Vset_lane
193 (* Initialise vector from bit pattern. *)
194 | Vcreate
195 (* Set all lanes to same value. *)
196 | Vdup_n
197 | Vmov_n (* Is this the same? *)
198 (* Duplicate scalar to all lanes of vector. *)
199 | Vdup_lane
200 (* Combine vectors. *)
201 | Vcombine
202 (* Get quadword high/low parts. *)
203 | Vget_high
204 | Vget_low
205 (* Convert vectors. *)
206 | Vcvt
207 | Vcvt_n
208 (* Narrow/lengthen vectors. *)
209 | Vmovn
210 | Vmovl
211 (* Table lookup. *)
212 | Vtbl of int
213 | Vtbx of int
214 (* Reinterpret casts. *)
215 | Vreinterp
217 (* Features used for documentation, to distinguish between some instruction
218 variants, and to signal special requirements (e.g. swapping arguments). *)
220 type features =
221 Halving
222 | Rounding
223 | Saturating
224 | Dst_unsign
225 | High_half
226 | Doubling
227 | Flipped of string (* Builtin name to use with flipped arguments. *)
228 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
229 for All _, Long, Wide, Narrow shape_forms. *)
230 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
231 (* A specification as to the shape of instruction expected upon
232 disassembly, used if it differs from the shape used to build the
233 intrinsic prototype. Multiple entries in the constructor's argument
234 indicate that the intrinsic expands to more than one assembly
235 instruction, each with a corresponding shape specified here. *)
236 | Disassembles_as of shape_form list
237 | Builtin_name of string (* Override the name of the builtin. *)
238 (* Override the name of the instruction. If more than one name
239 is specified, it means that the instruction can have any of those
240 names. *)
241 | Instruction_name of string list
242 (* Mark that the intrinsic yields no instructions, or expands to yield
243 behaviour that the test generator cannot test. *)
244 | No_op
245 (* Mark that the intrinsic has constant arguments that cannot be set
246 to the defaults (zero for pointers and one otherwise) in the test
247 cases. The function supplied must return the integer to be written
248 into the testcase for the argument number (0-based) supplied to it. *)
249 | Const_valuator of (int -> int)
251 exception MixedMode of elts * elts
253 let rec elt_width = function
254 S8 | U8 | P8 | I8 | B8 -> 8
255 | S16 | U16 | P16 | I16 | B16 -> 16
256 | S32 | F32 | U32 | I32 | B32 -> 32
257 | S64 | U64 | I64 | B64 -> 64
258 | Conv (a, b) ->
259 let wa = elt_width a and wb = elt_width b in
260 if wa = wb then wa else failwith "element width?"
261 | Cast (a, b) -> raise (MixedMode (a, b))
262 | NoElts -> failwith "No elts"
264 let rec elt_class = function
265 S8 | S16 | S32 | S64 -> Signed
266 | U8 | U16 | U32 | U64 -> Unsigned
267 | P8 | P16 -> Poly
268 | F32 -> Float
269 | I8 | I16 | I32 | I64 -> Int
270 | B8 | B16 | B32 | B64 -> Bits
271 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
272 | NoElts -> NoType
274 let elt_of_class_width c w =
275 match c, w with
276 Signed, 8 -> S8
277 | Signed, 16 -> S16
278 | Signed, 32 -> S32
279 | Signed, 64 -> S64
280 | Float, 32 -> F32
281 | Unsigned, 8 -> U8
282 | Unsigned, 16 -> U16
283 | Unsigned, 32 -> U32
284 | Unsigned, 64 -> U64
285 | Poly, 8 -> P8
286 | Poly, 16 -> P16
287 | Int, 8 -> I8
288 | Int, 16 -> I16
289 | Int, 32 -> I32
290 | Int, 64 -> I64
291 | Bits, 8 -> B8
292 | Bits, 16 -> B16
293 | Bits, 32 -> B32
294 | Bits, 64 -> B64
295 | _ -> failwith "Bad element type"
297 (* Return unsigned integer element the same width as argument. *)
298 let unsigned_of_elt elt =
299 elt_of_class_width Unsigned (elt_width elt)
301 let signed_of_elt elt =
302 elt_of_class_width Signed (elt_width elt)
304 (* Return untyped bits element the same width as argument. *)
305 let bits_of_elt elt =
306 elt_of_class_width Bits (elt_width elt)
308 let non_signed_variant = function
309 S8 -> I8
310 | S16 -> I16
311 | S32 -> I32
312 | S64 -> I64
313 | U8 -> I8
314 | U16 -> I16
315 | U32 -> I32
316 | U64 -> I64
317 | x -> x
319 let poly_unsigned_variant v =
320 let elclass = match elt_class v with
321 Poly -> Unsigned
322 | x -> x in
323 elt_of_class_width elclass (elt_width v)
325 let widen_elt elt =
326 let w = elt_width elt
327 and c = elt_class elt in
328 elt_of_class_width c (w * 2)
330 let narrow_elt elt =
331 let w = elt_width elt
332 and c = elt_class elt in
333 elt_of_class_width c (w / 2)
335 (* If we're trying to find a mode from a "Use_operands" instruction, use the
336 last vector operand as the dominant mode used to invoke the correct builtin.
337 We must stick to this rule in neon.md. *)
338 let find_key_operand operands =
339 let rec scan opno =
340 match operands.(opno) with
341 Qreg -> Qreg
342 | Dreg -> Dreg
343 | VecArray (_, Qreg) -> Qreg
344 | VecArray (_, Dreg) -> Dreg
345 | _ -> scan (opno-1)
347 scan ((Array.length operands) - 1)
349 let rec mode_of_elt elt shape =
350 let flt = match elt_class elt with
351 Float | ConvClass(_, Float) -> true | _ -> false in
352 let idx =
353 match elt_width elt with
354 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
355 | _ -> failwith "Bad element width"
356 in match shape with
357 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
358 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
359 (* LLVM LOCAL *)
360 [| V8QI; V4HI; if flt then V2SF else V2SI; V1DI |].(idx)
361 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
362 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
363 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
364 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
365 [| QI; HI; if flt then SF else SI; DI |].(idx)
366 | Long | Wide | Wide_lane | Wide_scalar
367 | Long_imm ->
368 (* LLVM LOCAL *)
369 [| V8QI; V4HI; V2SI; V1DI |].(idx)
370 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
371 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
372 | _ -> failwith "invalid shape"
374 (* Modify an element type dependent on the shape of the instruction and the
375 operand number. *)
377 let shapemap shape no =
378 let ident = fun x -> x in
379 match shape with
380 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
381 | Binary_imm _ -> ident
382 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
383 [| widen_elt; ident; ident |].(no)
384 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
385 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
386 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
388 (* Register type (D/Q) of an operand, based on shape and operand number. *)
390 let regmap shape no =
391 match shape with
392 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
393 | Long -> [| Qreg; Dreg; Dreg |].(no)
394 | Wide -> [| Qreg; Qreg; Dreg |].(no)
395 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
396 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
397 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
398 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
399 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
400 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
401 | Binary_imm reg -> [| reg; reg; Immed |].(no)
402 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
403 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
404 | Use_operands these -> these.(no)
406 let type_for_elt shape elt no =
407 let elt = (shapemap shape no) elt in
408 let reg = regmap shape no in
409 let rec type_for_reg_elt reg elt =
410 match reg with
411 Dreg ->
412 begin match elt with
413 S8 -> T_int8x8
414 | S16 -> T_int16x4
415 | S32 -> T_int32x2
416 | S64 -> T_int64x1
417 | U8 -> T_uint8x8
418 | U16 -> T_uint16x4
419 | U32 -> T_uint32x2
420 | U64 -> T_uint64x1
421 | F32 -> T_float32x2
422 | P8 -> T_poly8x8
423 | P16 -> T_poly16x4
424 | _ -> failwith "Bad elt type"
426 | Qreg ->
427 begin match elt with
428 S8 -> T_int8x16
429 | S16 -> T_int16x8
430 | S32 -> T_int32x4
431 | S64 -> T_int64x2
432 | U8 -> T_uint8x16
433 | U16 -> T_uint16x8
434 | U32 -> T_uint32x4
435 | U64 -> T_uint64x2
436 | F32 -> T_float32x4
437 | P8 -> T_poly8x16
438 | P16 -> T_poly16x8
439 | _ -> failwith "Bad elt type"
441 | Corereg ->
442 begin match elt with
443 S8 -> T_int8
444 | S16 -> T_int16
445 | S32 -> T_int32
446 | S64 -> T_int64
447 | U8 -> T_uint8
448 | U16 -> T_uint16
449 | U32 -> T_uint32
450 | U64 -> T_uint64
451 | P8 -> T_poly8
452 | P16 -> T_poly16
453 | F32 -> T_float32
454 | _ -> failwith "Bad elt type"
456 | Immed ->
457 T_immediate (0, 0)
458 | VecArray (num, sub) ->
459 T_arrayof (num, type_for_reg_elt sub elt)
460 | PtrTo x ->
461 T_ptrto (type_for_reg_elt x elt)
462 | CstPtrTo x ->
463 T_ptrto (T_const (type_for_reg_elt x elt))
464 (* Anything else is solely for the use of the test generator. *)
465 | _ -> assert false
467 type_for_reg_elt reg elt
469 (* Return size of a vector type, in bits. *)
470 let vectype_size = function
471 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
472 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
473 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
474 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
475 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
476 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
477 | _ -> raise Not_found
479 (* LLVM LOCAL begin Map vector types to modes. *)
480 let vectype_mode = function
481 T_int8x8 | T_uint8x8 | T_poly8x8 -> V8QI
482 | T_int8x16 | T_uint8x16 | T_poly8x16 -> V16QI
483 | T_int16x4 | T_uint16x4 | T_poly16x4 -> V4HI
484 | T_int16x8 | T_uint16x8 | T_poly16x8 -> V8HI
485 | T_int32x2 | T_uint32x2 -> V2SI
486 | T_int32x4 | T_uint32x4 -> V4SI
487 (* LLVM LOCAL *)
488 | T_int64x1 | T_uint64x1 -> V1DI
489 | T_int64x2 | T_uint64x2 -> V2DI
490 | T_float32x2 -> V2SF
491 | T_float32x4 -> V4SF
492 | _ -> raise Not_found
493 (* LLVM LOCAL end Map vector types to modes. *)
495 let inttype_for_array num elttype =
496 let eltsize = vectype_size elttype in
497 let numwords = (num * eltsize) / 32 in
498 (* LLVM LOCAL begin Match vector type, too. *)
499 let vecmode = vectype_mode elttype in
500 match numwords, vecmode with
501 4, V8QI -> B_TId8mode
502 | 4, V4HI -> B_TId16mode
503 | 4, V2SI -> B_TId32mode
504 (* LLVM LOCAL *)
505 | 4, V1DI -> B_TId64mode
506 | 4, V2SF -> B_TIdSFmode
507 | 6, V8QI -> B_EId8mode
508 | 6, V4HI -> B_EId16mode
509 | 6, V2SI -> B_EId32mode
510 (* LLVM LOCAL *)
511 | 6, V1DI -> B_EId64mode
512 | 6, V2SF -> B_EIdSFmode
513 | 8, V8QI -> B_OId8mode
514 | 8, V4HI -> B_OId16mode
515 | 8, V2SI -> B_OId32mode
516 (* LLVM LOCAL *)
517 | 8, V1DI -> B_OId64mode
518 | 8, V2SF -> B_OIdSFmode
519 | 8, V16QI -> B_OIq8mode
520 | 8, V8HI -> B_OIq16mode
521 | 8, V4SI -> B_OIq32mode
522 | 8, V2DI -> B_OIq64mode
523 | 8, V4SF -> B_OIqSFmode
524 | 12, V16QI -> B_CIq8mode
525 | 12, V8HI -> B_CIq16mode
526 | 12, V4SI -> B_CIq32mode
527 | 12, V2DI -> B_CIq64mode
528 | 12, V4SF -> B_CIqSFmode
529 | 16, V16QI -> B_XIq8mode
530 | 16, V8HI -> B_XIq16mode
531 | 16, V4SI -> B_XIq32mode
532 | 16, V2DI -> B_XIq64mode
533 | 16, V4SF -> B_XIqSFmode
534 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
535 (* LLVM LOCAL end Match vector type, too. *)
537 (* These functions return pairs of (internal, external) types, where "internal"
538 types are those seen by GCC, and "external" are those seen by the assembler.
539 These types aren't necessarily the same, since the intrinsics can munge more
540 than one C type into each assembler opcode. *)
542 let make_sign_invariant func shape elt =
543 let arity, elt' = func shape elt in
544 arity, non_signed_variant elt'
546 (* Don't restrict any types. *)
548 let elts_same make_arity shape elt =
549 let vtype = type_for_elt shape elt in
550 make_arity vtype, elt
552 (* As sign_invar_*, but when sign matters. *)
553 let elts_same_io_lane =
554 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
556 let elts_same_io =
557 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
559 let elts_same_2_lane =
560 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
562 let elts_same_3 = elts_same_2_lane
564 let elts_same_2 =
565 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
567 let elts_same_1 =
568 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
570 (* Use for signed/unsigned invariant operations (i.e. where the operation
571 doesn't depend on the sign of the data. *)
573 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
574 let sign_invar_io = make_sign_invariant elts_same_io
575 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
576 let sign_invar_2 = make_sign_invariant elts_same_2
577 let sign_invar_1 = make_sign_invariant elts_same_1
579 (* Sign-sensitive comparison. *)
581 let cmp_sign_matters shape elt =
582 let vtype = type_for_elt shape elt
583 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
584 Arity2 (rtype, vtype 1, vtype 2), elt
586 (* Signed/unsigned invariant comparison. *)
588 let cmp_sign_invar shape elt =
589 let shape', elt' = cmp_sign_matters shape elt in
590 let elt'' =
591 match non_signed_variant elt' with
592 P8 -> I8
593 | x -> x
595 shape', elt''
597 (* Comparison (VTST) where only the element width matters. *)
599 let cmp_bits shape elt =
600 let vtype = type_for_elt shape elt
601 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
602 and bits_only = bits_of_elt elt in
603 Arity2 (rtype, vtype 1, vtype 2), bits_only
605 let reg_shift shape elt =
606 let vtype = type_for_elt shape elt
607 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
608 Arity2 (vtype 0, vtype 1, op2type), elt
610 (* Genericised constant-shift type-generating function. *)
612 let const_shift mkimm ?arity ?result shape elt =
613 let op2type = (shapemap shape 2) elt in
614 let op2width = elt_width op2type in
615 let op2 = mkimm op2width
616 and op1 = type_for_elt shape elt 1
617 and r_elt =
618 match result with
619 None -> elt
620 | Some restriction -> restriction elt in
621 let rtype = type_for_elt shape r_elt 0 in
622 match arity with
623 None -> Arity2 (rtype, op1, op2), elt
624 | Some mkarity -> mkarity rtype op1 op2, elt
626 (* Use for immediate right-shifts. *)
628 let shift_right shape elt =
629 const_shift (fun imm -> T_immediate (1, imm)) shape elt
631 let shift_right_acc shape elt =
632 const_shift (fun imm -> T_immediate (1, imm))
633 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
635 (* Use for immediate right-shifts when the operation doesn't care about
636 signedness. *)
638 let shift_right_sign_invar =
639 make_sign_invariant shift_right
641 (* Immediate right-shift; result is unsigned even when operand is signed. *)
643 let shift_right_to_uns shape elt =
644 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
645 shape elt
647 (* Immediate left-shift. *)
649 let shift_left shape elt =
650 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
652 (* Immediate left-shift, unsigned result. *)
654 let shift_left_to_uns shape elt =
655 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
656 shape elt
658 (* Immediate left-shift, don't care about signs. *)
660 let shift_left_sign_invar =
661 make_sign_invariant shift_left
663 (* Shift left/right and insert: only element size matters. *)
665 let shift_insert shape elt =
666 let arity, elt =
667 const_shift (fun imm -> T_immediate (1, imm))
668 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
669 arity, bits_of_elt elt
671 (* Get/set lane. *)
673 let get_lane shape elt =
674 let vtype = type_for_elt shape elt in
675 Arity2 (vtype 0, vtype 1, vtype 2),
676 (match elt with P8 -> U8 | P16 -> U16 | x -> x)
678 let set_lane shape elt =
679 let vtype = type_for_elt shape elt in
680 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
682 let set_lane_notype shape elt =
683 let vtype = type_for_elt shape elt in
684 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
686 let create_vector shape elt =
687 let vtype = type_for_elt shape U64 1
688 and rtype = type_for_elt shape elt 0 in
689 Arity1 (rtype, vtype), elt
691 let conv make_arity shape elt =
692 let edest, esrc = match elt with
693 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
694 | _ -> failwith "Non-conversion element in conversion" in
695 let vtype = type_for_elt shape esrc
696 and rtype = type_for_elt shape edest 0 in
697 make_arity rtype vtype, elt
699 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
700 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
702 (* Operation has an unsigned result even if operands are signed. *)
704 let dst_unsign make_arity shape elt =
705 let vtype = type_for_elt shape elt
706 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
707 make_arity rtype vtype, elt
709 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
711 let make_bits_only func shape elt =
712 let arity, elt' = func shape elt in
713 arity, bits_of_elt elt'
715 (* Extend operation. *)
717 let extend shape elt =
718 let vtype = type_for_elt shape elt in
719 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
721 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
722 integer ops respectively, or unsigned for polynomial ops. *)
724 let table mkarity shape elt =
725 let vtype = type_for_elt shape elt in
726 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
727 mkarity vtype op2, bits_of_elt elt
729 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
730 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
732 (* Operations where only bits matter. *)
734 let bits_1 = make_bits_only elts_same_1
735 let bits_2 = make_bits_only elts_same_2
736 let bits_3 = make_bits_only elts_same_3
738 (* Store insns. *)
739 let store_1 shape elt =
740 let vtype = type_for_elt shape elt in
741 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
743 let store_3 shape elt =
744 let vtype = type_for_elt shape elt in
745 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
747 let make_notype func shape elt =
748 let arity, _ = func shape elt in
749 arity, NoElts
751 let notype_1 = make_notype elts_same_1
752 let notype_2 = make_notype elts_same_2
753 let notype_3 = make_notype elts_same_3
755 (* Bit-select operations (first operand is unsigned int). *)
757 let bit_select shape elt =
758 let vtype = type_for_elt shape elt
759 and itype = type_for_elt shape (unsigned_of_elt elt) in
760 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
762 (* Common lists of supported element types. *)
764 let su_8_32 = [S8; S16; S32; U8; U16; U32]
765 let su_8_64 = S64 :: U64 :: su_8_32
766 let su_16_64 = [S16; S32; S64; U16; U32; U64]
767 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
768 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
770 let ops =
772 (* Addition. *)
773 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
774 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
775 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
776 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
777 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
778 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
779 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
780 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
781 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
782 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
783 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
784 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
785 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
786 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
787 Narrow, "vRaddhn", sign_invar_2, su_16_64;
789 (* Multiplication. *)
790 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
791 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
792 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
793 elts_same_2, [S16; S32];
794 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
795 elts_same_2, [S16; S32];
796 Vmul,
797 [Saturating; Rounding; Doubling; High_half;
798 Instruction_name ["vqrdmulh"]],
799 All (3, Dreg), "vqRdmulh",
800 elts_same_2, [S16; S32];
801 Vmul,
802 [Saturating; Rounding; Doubling; High_half;
803 Instruction_name ["vqrdmulh"]],
804 All (3, Qreg), "vqRdmulhQ",
805 elts_same_2, [S16; S32];
806 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
807 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
809 (* Multiply-accumulate. *)
810 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
811 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
812 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
813 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
815 (* Multiply-subtract. *)
816 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
817 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
818 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
819 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
821 (* Subtraction. *)
822 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
823 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
824 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
825 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
826 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
827 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
828 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
829 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
830 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
831 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
832 Narrow, "vRsubhn", sign_invar_2, su_16_64;
834 (* Comparison, equal. *)
835 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
836 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
838 (* Comparison, greater-than or equal. *)
839 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
840 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
842 (* Comparison, less-than or equal. *)
843 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
844 F32 :: su_8_32;
845 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
846 All (3, Qreg), "vcleQ", cmp_sign_matters,
847 F32 :: su_8_32;
849 (* Comparison, greater-than. *)
850 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
851 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
853 (* Comparison, less-than. *)
854 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
855 F32 :: su_8_32;
856 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
857 All (3, Qreg), "vcltQ", cmp_sign_matters,
858 F32 :: su_8_32;
860 (* Compare absolute greater-than or equal. *)
861 Vcage, [Instruction_name ["vacge"]],
862 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
863 Vcage, [Instruction_name ["vacge"]],
864 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
866 (* Compare absolute less-than or equal. *)
867 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
868 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
869 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
870 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
872 (* Compare absolute greater-than or equal. *)
873 Vcagt, [Instruction_name ["vacgt"]],
874 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
875 Vcagt, [Instruction_name ["vacgt"]],
876 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
878 (* Compare absolute less-than or equal. *)
879 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
880 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
881 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
882 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
884 (* Test bits. *)
885 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
886 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
888 (* Absolute difference. *)
889 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
890 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
891 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
893 (* Absolute difference and accumulate. *)
894 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
895 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
896 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
898 (* Max. *)
899 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
900 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
902 (* Min. *)
903 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
904 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
906 (* Pairwise add. *)
907 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
908 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
909 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
911 (* Pairwise add, widen and accumulate. *)
912 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
913 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
915 (* Folding maximum, minimum. *)
916 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
917 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
919 (* Reciprocal step. *)
920 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
921 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
922 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
923 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
925 (* Vector shift left. *)
926 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
927 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
928 Vshl, [Instruction_name ["vrshl"]; Rounding],
929 All (3, Dreg), "vRshl", reg_shift, su_8_64;
930 Vshl, [Instruction_name ["vrshl"]; Rounding],
931 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
932 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
933 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
934 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
935 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
936 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
937 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
939 (* Vector shift right by constant. *)
940 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
941 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
942 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
943 "vRshr_n", shift_right, su_8_64;
944 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
945 "vRshrQ_n", shift_right, su_8_64;
946 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
947 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
948 shift_right_sign_invar, su_16_64;
949 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
950 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
951 "vqRshrn_n", shift_right, su_16_64;
952 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
953 shift_right_to_uns, [S16; S32; S64];
954 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
955 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
957 (* Vector shift left by constant. *)
958 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
959 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
960 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
961 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
962 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
963 shift_left_to_uns, [S8; S16; S32; S64];
964 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
965 shift_left_to_uns, [S8; S16; S32; S64];
966 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
968 (* Vector shift right by constant and accumulate. *)
969 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
970 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
971 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
972 "vRsra_n", shift_right_acc, su_8_64;
973 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
974 "vRsraQ_n", shift_right_acc, su_8_64;
976 (* Vector shift right and insert. *)
977 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
978 P8 :: P16 :: su_8_64;
979 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
980 P8 :: P16 :: su_8_64;
982 (* Vector shift left and insert. *)
983 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
984 P8 :: P16 :: su_8_64;
985 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
986 P8 :: P16 :: su_8_64;
988 (* Absolute value. *)
989 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
990 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
991 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
992 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
994 (* Negate. *)
995 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
996 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
997 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
998 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
1000 (* Bitwise not. *)
1001 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
1002 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
1004 (* Count leading sign bits. *)
1005 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
1006 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
1008 (* Count leading zeros. *)
1009 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
1010 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
1012 (* Count number of set bits. *)
1013 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
1014 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
1016 (* Reciprocal estimate. *)
1017 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
1018 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
1020 (* Reciprocal square-root estimate. *)
1021 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
1022 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
1024 (* Get lanes from a vector. *)
1025 Vget_lane,
1026 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1027 Instruction_name ["vmov"]],
1028 Use_operands [| Corereg; Dreg; Immed |],
1029 "vget_lane", get_lane, pf_su_8_32;
1030 Vget_lane,
1031 [InfoWord;
1032 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1033 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1034 Use_operands [| Corereg; Dreg; Immed |],
1035 "vget_lane", notype_2, [S64; U64];
1036 Vget_lane,
1037 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1038 Instruction_name ["vmov"]],
1039 Use_operands [| Corereg; Qreg; Immed |],
1040 "vgetQ_lane", get_lane, pf_su_8_32;
1041 Vget_lane,
1042 [InfoWord;
1043 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1044 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1045 Use_operands [| Corereg; Qreg; Immed |],
1046 "vgetQ_lane", notype_2, [S64; U64];
1048 (* Set lanes in a vector. *)
1049 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1050 Instruction_name ["vmov"]],
1051 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1052 set_lane, pf_su_8_32;
1053 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1054 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1055 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1056 set_lane_notype, [S64; U64];
1057 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1058 Instruction_name ["vmov"]],
1059 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1060 set_lane, pf_su_8_32;
1061 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1062 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1063 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1064 set_lane_notype, [S64; U64];
1066 (* Create vector from literal bit pattern. *)
1067 Vcreate,
1068 [No_op], (* Not really, but it can yield various things that are too
1069 hard for the test generator at this time. *)
1070 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1071 pf_su_8_64;
1073 (* Set all lanes to the same value. *)
1074 Vdup_n, [],
1075 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1076 pf_su_8_32;
1077 Vdup_n,
1078 [Instruction_name ["vmov"];
1079 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1080 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1081 [S64; U64];
1082 Vdup_n, [],
1083 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1084 pf_su_8_32;
1085 Vdup_n,
1086 [Instruction_name ["vmov"];
1087 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1088 Use_operands [| Dreg; Corereg; Corereg |]]],
1089 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1090 [S64; U64];
1092 (* These are just aliases for the above. *)
1093 Vmov_n,
1094 [Builtin_name "vdup_n"],
1095 Use_operands [| Dreg; Corereg |],
1096 "vmov_n", bits_1, pf_su_8_32;
1097 Vmov_n,
1098 [Builtin_name "vdup_n";
1099 Instruction_name ["vmov"];
1100 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1101 Use_operands [| Dreg; Corereg |],
1102 "vmov_n", notype_1, [S64; U64];
1103 Vmov_n,
1104 [Builtin_name "vdupQ_n"],
1105 Use_operands [| Qreg; Corereg |],
1106 "vmovQ_n", bits_1, pf_su_8_32;
1107 Vmov_n,
1108 [Builtin_name "vdupQ_n";
1109 Instruction_name ["vmov"];
1110 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1111 Use_operands [| Dreg; Corereg; Corereg |]]],
1112 Use_operands [| Qreg; Corereg |],
1113 "vmovQ_n", notype_1, [S64; U64];
1115 (* Duplicate, lane version. We can't use Use_operands here because the
1116 rightmost register (always Dreg) would be picked up by find_key_operand,
1117 when we want the leftmost register to be used in this case (otherwise
1118 the modes are indistinguishable in neon.md, etc. *)
1119 Vdup_lane,
1120 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1121 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1122 Vdup_lane,
1123 [No_op; Const_valuator (fun _ -> 0)],
1124 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1125 Vdup_lane,
1126 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1127 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1128 Vdup_lane,
1129 [No_op; Const_valuator (fun _ -> 0)],
1130 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1132 (* Combining vectors. *)
1133 Vcombine, [No_op],
1134 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1135 pf_su_8_64;
1137 (* Splitting vectors. *)
1138 Vget_high, [No_op],
1139 Use_operands [| Dreg; Qreg |], "vget_high",
1140 notype_1, pf_su_8_64;
1141 Vget_low, [Instruction_name ["vmov"];
1142 Disassembles_as [Use_operands [| Dreg; Dreg |]]],
1143 Use_operands [| Dreg; Qreg |], "vget_low",
1144 notype_1, pf_su_8_64;
1146 (* Conversions. *)
1147 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1148 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1149 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1150 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1151 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1152 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1153 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1154 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1156 (* Move, narrowing. *)
1157 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1158 Narrow, "vmovn", sign_invar_1, su_16_64;
1159 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1160 Narrow, "vqmovn", elts_same_1, su_16_64;
1161 Vmovn,
1162 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1163 Narrow, "vqmovun", dst_unsign_1,
1164 [S16; S32; S64];
1166 (* Move, long. *)
1167 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1168 Long, "vmovl", elts_same_1, su_8_32;
1170 (* Table lookup. *)
1171 Vtbl 1,
1172 [Instruction_name ["vtbl"];
1173 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1174 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1175 Vtbl 2, [Instruction_name ["vtbl"]],
1176 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1177 [U8; S8; P8];
1178 Vtbl 3, [Instruction_name ["vtbl"]],
1179 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1180 [U8; S8; P8];
1181 Vtbl 4, [Instruction_name ["vtbl"]],
1182 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1183 [U8; S8; P8];
1185 (* Extended table lookup. *)
1186 Vtbx 1,
1187 [Instruction_name ["vtbx"];
1188 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1189 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1190 Vtbx 2, [Instruction_name ["vtbx"]],
1191 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1192 [U8; S8; P8];
1193 Vtbx 3, [Instruction_name ["vtbx"]],
1194 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1195 [U8; S8; P8];
1196 Vtbx 4, [Instruction_name ["vtbx"]],
1197 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1198 [U8; S8; P8];
1200 (* Multiply, lane. (note: these were undocumented at the time of
1201 writing). *)
1202 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1203 [S16; S32; U16; U32; F32];
1204 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1205 [S16; S32; U16; U32; F32];
1207 (* Multiply-accumulate, lane. *)
1208 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1209 [S16; S32; U16; U32; F32];
1210 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1211 [S16; S32; U16; U32; F32];
1212 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1213 [S16; S32; U16; U32];
1214 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1215 elts_same_io_lane, [S16; S32];
1217 (* Multiply-subtract, lane. *)
1218 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1219 [S16; S32; U16; U32; F32];
1220 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1221 [S16; S32; U16; U32; F32];
1222 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1223 [S16; S32; U16; U32];
1224 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1225 elts_same_io_lane, [S16; S32];
1227 (* Long multiply, lane. *)
1228 Vmull_lane, [],
1229 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1231 (* Saturating doubling long multiply, lane. *)
1232 Vqdmull_lane, [Saturating; Doubling],
1233 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1235 (* Saturating doubling long multiply high, lane. *)
1236 Vqdmulh_lane, [Saturating; Halving],
1237 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1238 Vqdmulh_lane, [Saturating; Halving],
1239 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1240 Vqdmulh_lane, [Saturating; Halving; Rounding;
1241 Instruction_name ["vqrdmulh"]],
1242 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1243 Vqdmulh_lane, [Saturating; Halving; Rounding;
1244 Instruction_name ["vqrdmulh"]],
1245 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1247 (* Vector multiply by scalar. *)
1248 Vmul_n, [InfoWord;
1249 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1250 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1251 sign_invar_2, [S16; S32; U16; U32; F32];
1252 Vmul_n, [InfoWord;
1253 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1254 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1255 sign_invar_2, [S16; S32; U16; U32; F32];
1257 (* Vector long multiply by scalar. *)
1258 Vmull_n, [Instruction_name ["vmull"];
1259 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1260 Wide_scalar, "vmull_n",
1261 elts_same_2, [S16; S32; U16; U32];
1263 (* Vector saturating doubling long multiply by scalar. *)
1264 Vqdmull_n, [Saturating; Doubling;
1265 Disassembles_as [Use_operands [| Qreg; Dreg;
1266 Element_of_dreg |]]],
1267 Wide_scalar, "vqdmull_n",
1268 elts_same_2, [S16; S32];
1270 (* Vector saturating doubling long multiply high by scalar. *)
1271 Vqdmulh_n,
1272 [Saturating; Halving; InfoWord;
1273 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1274 Use_operands [| Qreg; Qreg; Corereg |],
1275 "vqdmulhQ_n", elts_same_2, [S16; S32];
1276 Vqdmulh_n,
1277 [Saturating; Halving; InfoWord;
1278 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1279 Use_operands [| Dreg; Dreg; Corereg |],
1280 "vqdmulh_n", elts_same_2, [S16; S32];
1281 Vqdmulh_n,
1282 [Saturating; Halving; Rounding; InfoWord;
1283 Instruction_name ["vqrdmulh"];
1284 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1285 Use_operands [| Qreg; Qreg; Corereg |],
1286 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1287 Vqdmulh_n,
1288 [Saturating; Halving; Rounding; InfoWord;
1289 Instruction_name ["vqrdmulh"];
1290 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1291 Use_operands [| Dreg; Dreg; Corereg |],
1292 "vqRdmulh_n", elts_same_2, [S16; S32];
1294 (* Vector multiply-accumulate by scalar. *)
1295 Vmla_n, [InfoWord;
1296 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1297 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1298 sign_invar_io, [S16; S32; U16; U32; F32];
1299 Vmla_n, [InfoWord;
1300 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1301 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1302 sign_invar_io, [S16; S32; U16; U32; F32];
1303 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1304 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1305 [S16; S32];
1307 (* Vector multiply subtract by scalar. *)
1308 Vmls_n, [InfoWord;
1309 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1310 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1311 sign_invar_io, [S16; S32; U16; U32; F32];
1312 Vmls_n, [InfoWord;
1313 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1314 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1315 sign_invar_io, [S16; S32; U16; U32; F32];
1316 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1317 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1318 [S16; S32];
1320 (* Vector extract. *)
1321 Vext, [Const_valuator (fun _ -> 0)],
1322 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1323 pf_su_8_64;
1324 Vext, [Const_valuator (fun _ -> 0)],
1325 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1326 pf_su_8_64;
1328 (* Reverse elements. *)
1329 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1330 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1331 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1332 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1333 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1334 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1336 (* Bit selection. *)
1337 Vbsl,
1338 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1339 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1340 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1341 pf_su_8_64;
1342 Vbsl,
1343 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1344 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1345 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1346 pf_su_8_64;
1348 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1349 generating good code for intrinsics which return structure types --
1350 builtins work well by themselves (and understand that the values being
1351 stored on e.g. the stack also reside in registers, so can optimise the
1352 stores away entirely if the results are used immediately), but
1353 intrinsics are very much less efficient. Maybe something can be improved
1354 re: inlining, or tweaking the ABI used for intrinsics (a special call
1355 attribute?).
1357 (* LLVM LOCAL begin Use return by value instead of ReturnPtr. *)
1358 Vtrn, [], Use_operands [| VecArray (2, Dreg); Dreg; Dreg |],
1359 "vtrn", bits_2, pf_su_8_32;
1360 Vtrn, [], Use_operands [| VecArray (2, Qreg); Qreg; Qreg |],
1361 "vtrnQ", bits_2, pf_su_8_32;
1363 (* Zip elements. *)
1364 Vzip, [], Use_operands [| VecArray (2, Dreg); Dreg; Dreg |],
1365 "vzip", bits_2, pf_su_8_32;
1366 Vzip, [], Use_operands [| VecArray (2, Qreg); Qreg; Qreg |],
1367 "vzipQ", bits_2, pf_su_8_32;
1369 (* Unzip elements. *)
1370 Vuzp, [], Use_operands [| VecArray (2, Dreg); Dreg; Dreg |],
1371 "vuzp", bits_2, pf_su_8_32;
1372 Vuzp, [], Use_operands [| VecArray (2, Qreg); Qreg; Qreg |],
1373 "vuzpQ", bits_2, pf_su_8_32;
1374 (* LLVM LOCAL end Use return by value instead of ReturnPtr. *)
1376 (* Element/structure loads. VLD1 variants. *)
1377 Vldx 1,
1378 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1379 CstPtrTo Corereg |]]],
1380 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1381 pf_su_8_64;
1382 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1383 CstPtrTo Corereg |]]],
1384 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1385 pf_su_8_64;
1387 Vldx_lane 1,
1388 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1389 CstPtrTo Corereg |]]],
1390 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1391 "vld1_lane", bits_3, pf_su_8_32;
1392 Vldx_lane 1,
1393 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1394 CstPtrTo Corereg |]];
1395 Const_valuator (fun _ -> 0)],
1396 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1397 "vld1_lane", bits_3, [S64; U64];
1398 Vldx_lane 1,
1399 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1400 CstPtrTo Corereg |]]],
1401 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1402 "vld1Q_lane", bits_3, pf_su_8_32;
1403 Vldx_lane 1,
1404 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1405 CstPtrTo Corereg |]]],
1406 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1407 "vld1Q_lane", bits_3, [S64; U64];
1409 Vldx_dup 1,
1410 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1411 CstPtrTo Corereg |]]],
1412 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1413 bits_1, pf_su_8_32;
1414 Vldx_dup 1,
1415 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1416 CstPtrTo Corereg |]]],
1417 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1418 bits_1, [S64; U64];
1419 Vldx_dup 1,
1420 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1421 CstPtrTo Corereg |]]],
1422 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1423 bits_1, pf_su_8_32;
1424 Vldx_dup 1,
1425 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1426 CstPtrTo Corereg |]]],
1427 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1428 bits_1, [S64; U64];
1430 (* VST1 variants. *)
1431 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1432 PtrTo Corereg |]]],
1433 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1434 store_1, pf_su_8_64;
1435 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1436 PtrTo Corereg |]]],
1437 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1438 store_1, pf_su_8_64;
1440 Vstx_lane 1,
1441 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1442 CstPtrTo Corereg |]]],
1443 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1444 "vst1_lane", store_3, pf_su_8_32;
1445 Vstx_lane 1,
1446 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1447 CstPtrTo Corereg |]];
1448 Const_valuator (fun _ -> 0)],
1449 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1450 "vst1_lane", store_3, [U64; S64];
1451 Vstx_lane 1,
1452 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1453 CstPtrTo Corereg |]]],
1454 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1455 "vst1Q_lane", store_3, pf_su_8_32;
1456 Vstx_lane 1,
1457 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1458 CstPtrTo Corereg |]]],
1459 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1460 "vst1Q_lane", store_3, [U64; S64];
1462 (* VLD2 variants. *)
1463 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1464 "vld2", bits_1, pf_su_8_32;
1465 Vldx 2, [Instruction_name ["vld1"]],
1466 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1467 "vld2", bits_1, [S64; U64];
1468 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1469 CstPtrTo Corereg |];
1470 Use_operands [| VecArray (2, Dreg);
1471 CstPtrTo Corereg |]]],
1472 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1473 "vld2Q", bits_1, pf_su_8_32;
1475 Vldx_lane 2,
1476 [Disassembles_as [Use_operands
1477 [| VecArray (2, Element_of_dreg);
1478 CstPtrTo Corereg |]]],
1479 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1480 VecArray (2, Dreg); Immed |],
1481 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1482 Vldx_lane 2,
1483 [Disassembles_as [Use_operands
1484 [| VecArray (2, Element_of_dreg);
1485 CstPtrTo Corereg |]]],
1486 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1487 VecArray (2, Qreg); Immed |],
1488 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1490 Vldx_dup 2,
1491 [Disassembles_as [Use_operands
1492 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1493 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1494 "vld2_dup", bits_1, pf_su_8_32;
1495 Vldx_dup 2,
1496 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1497 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1498 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1499 "vld2_dup", bits_1, [S64; U64];
1501 (* VST2 variants. *)
1502 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1503 PtrTo Corereg |]]],
1504 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1505 store_1, pf_su_8_32;
1506 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1507 PtrTo Corereg |]];
1508 Instruction_name ["vst1"]],
1509 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1510 store_1, [S64; U64];
1511 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1512 PtrTo Corereg |];
1513 Use_operands [| VecArray (2, Dreg);
1514 PtrTo Corereg |]]],
1515 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1516 store_1, pf_su_8_32;
1518 Vstx_lane 2,
1519 [Disassembles_as [Use_operands
1520 [| VecArray (2, Element_of_dreg);
1521 CstPtrTo Corereg |]]],
1522 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1523 store_3, P8 :: P16 :: F32 :: su_8_32;
1524 Vstx_lane 2,
1525 [Disassembles_as [Use_operands
1526 [| VecArray (2, Element_of_dreg);
1527 CstPtrTo Corereg |]]],
1528 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1529 store_3, [P16; F32; U16; U32; S16; S32];
1531 (* VLD3 variants. *)
1532 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1533 "vld3", bits_1, pf_su_8_32;
1534 Vldx 3, [Instruction_name ["vld1"]],
1535 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1536 "vld3", bits_1, [S64; U64];
1537 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1538 CstPtrTo Corereg |];
1539 Use_operands [| VecArray (3, Dreg);
1540 CstPtrTo Corereg |]]],
1541 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1542 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1544 Vldx_lane 3,
1545 [Disassembles_as [Use_operands
1546 [| VecArray (3, Element_of_dreg);
1547 CstPtrTo Corereg |]]],
1548 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1549 VecArray (3, Dreg); Immed |],
1550 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1551 Vldx_lane 3,
1552 [Disassembles_as [Use_operands
1553 [| VecArray (3, Element_of_dreg);
1554 CstPtrTo Corereg |]]],
1555 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1556 VecArray (3, Qreg); Immed |],
1557 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1559 Vldx_dup 3,
1560 [Disassembles_as [Use_operands
1561 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1562 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1563 "vld3_dup", bits_1, pf_su_8_32;
1564 Vldx_dup 3,
1565 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1566 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1567 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1568 "vld3_dup", bits_1, [S64; U64];
1570 (* VST3 variants. *)
1571 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1572 PtrTo Corereg |]]],
1573 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1574 store_1, pf_su_8_32;
1575 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1576 PtrTo Corereg |]];
1577 Instruction_name ["vst1"]],
1578 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1579 store_1, [S64; U64];
1580 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1581 PtrTo Corereg |];
1582 Use_operands [| VecArray (3, Dreg);
1583 PtrTo Corereg |]]],
1584 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1585 store_1, pf_su_8_32;
1587 Vstx_lane 3,
1588 [Disassembles_as [Use_operands
1589 [| VecArray (3, Element_of_dreg);
1590 CstPtrTo Corereg |]]],
1591 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1592 store_3, P8 :: P16 :: F32 :: su_8_32;
1593 Vstx_lane 3,
1594 [Disassembles_as [Use_operands
1595 [| VecArray (3, Element_of_dreg);
1596 CstPtrTo Corereg |]]],
1597 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1598 store_3, [P16; F32; U16; U32; S16; S32];
1600 (* VLD4/VST4 variants. *)
1601 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1602 "vld4", bits_1, pf_su_8_32;
1603 Vldx 4, [Instruction_name ["vld1"]],
1604 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1605 "vld4", bits_1, [S64; U64];
1606 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1607 CstPtrTo Corereg |];
1608 Use_operands [| VecArray (4, Dreg);
1609 CstPtrTo Corereg |]]],
1610 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1611 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1613 Vldx_lane 4,
1614 [Disassembles_as [Use_operands
1615 [| VecArray (4, Element_of_dreg);
1616 CstPtrTo Corereg |]]],
1617 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1618 VecArray (4, Dreg); Immed |],
1619 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1620 Vldx_lane 4,
1621 [Disassembles_as [Use_operands
1622 [| VecArray (4, Element_of_dreg);
1623 CstPtrTo Corereg |]]],
1624 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1625 VecArray (4, Qreg); Immed |],
1626 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1628 Vldx_dup 4,
1629 [Disassembles_as [Use_operands
1630 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1631 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1632 "vld4_dup", bits_1, pf_su_8_32;
1633 Vldx_dup 4,
1634 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1635 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1636 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1637 "vld4_dup", bits_1, [S64; U64];
1639 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1640 PtrTo Corereg |]]],
1641 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1642 store_1, pf_su_8_32;
1643 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1644 PtrTo Corereg |]];
1645 Instruction_name ["vst1"]],
1646 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1647 store_1, [S64; U64];
1648 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1649 PtrTo Corereg |];
1650 Use_operands [| VecArray (4, Dreg);
1651 PtrTo Corereg |]]],
1652 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1653 store_1, pf_su_8_32;
1655 Vstx_lane 4,
1656 [Disassembles_as [Use_operands
1657 [| VecArray (4, Element_of_dreg);
1658 CstPtrTo Corereg |]]],
1659 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1660 store_3, P8 :: P16 :: F32 :: su_8_32;
1661 Vstx_lane 4,
1662 [Disassembles_as [Use_operands
1663 [| VecArray (4, Element_of_dreg);
1664 CstPtrTo Corereg |]]],
1665 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1666 store_3, [P16; F32; U16; U32; S16; S32];
1668 (* Logical operations. And. *)
1669 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
1670 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1672 (* Or. *)
1673 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
1674 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1676 (* Eor. *)
1677 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
1678 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1680 (* Bic (And-not). *)
1681 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
1682 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1684 (* Or-not. *)
1685 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
1686 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1689 let reinterp =
1690 let elems = P8 :: P16 :: F32 :: su_8_64 in
1691 List.fold_right
1692 (fun convto acc ->
1693 let types = List.fold_right
1694 (fun convfrom acc ->
1695 if convfrom <> convto then
1696 Cast (convto, convfrom) :: acc
1697 else
1698 acc)
1699 elems
1702 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1703 "vreinterpret", conv_1, types
1704 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1705 "vreinterpretQ", conv_1, types in
1706 dconv :: qconv :: acc)
1707 elems
1710 (* Output routines. *)
1712 let rec string_of_elt = function
1713 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1714 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1715 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1716 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1717 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1718 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1719 | NoElts -> failwith "No elts"
1721 let string_of_elt_dots elt =
1722 match elt with
1723 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1724 | _ -> string_of_elt elt
1726 let string_of_vectype vt =
1727 let rec name affix = function
1728 T_int8x8 -> affix "int8x8"
1729 | T_int8x16 -> affix "int8x16"
1730 | T_int16x4 -> affix "int16x4"
1731 | T_int16x8 -> affix "int16x8"
1732 | T_int32x2 -> affix "int32x2"
1733 | T_int32x4 -> affix "int32x4"
1734 | T_int64x1 -> affix "int64x1"
1735 | T_int64x2 -> affix "int64x2"
1736 | T_uint8x8 -> affix "uint8x8"
1737 | T_uint8x16 -> affix "uint8x16"
1738 | T_uint16x4 -> affix "uint16x4"
1739 | T_uint16x8 -> affix "uint16x8"
1740 | T_uint32x2 -> affix "uint32x2"
1741 | T_uint32x4 -> affix "uint32x4"
1742 | T_uint64x1 -> affix "uint64x1"
1743 | T_uint64x2 -> affix "uint64x2"
1744 | T_float32x2 -> affix "float32x2"
1745 | T_float32x4 -> affix "float32x4"
1746 | T_poly8x8 -> affix "poly8x8"
1747 | T_poly8x16 -> affix "poly8x16"
1748 | T_poly16x4 -> affix "poly16x4"
1749 | T_poly16x8 -> affix "poly16x8"
1750 | T_int8 -> affix "int8"
1751 | T_int16 -> affix "int16"
1752 | T_int32 -> affix "int32"
1753 | T_int64 -> affix "int64"
1754 | T_uint8 -> affix "uint8"
1755 | T_uint16 -> affix "uint16"
1756 | T_uint32 -> affix "uint32"
1757 | T_uint64 -> affix "uint64"
1758 | T_poly8 -> affix "poly8"
1759 | T_poly16 -> affix "poly16"
1760 | T_float32 -> affix "float32"
1761 | T_immediate _ -> "const int"
1762 | T_void -> "void"
1763 | T_intQI -> "__builtin_neon_qi"
1764 | T_intHI -> "__builtin_neon_hi"
1765 | T_intSI -> "__builtin_neon_si"
1766 | T_intDI -> "__builtin_neon_di"
1767 | T_arrayof (num, base) ->
1768 let basename = name (fun x -> x) base in
1769 affix (Printf.sprintf "%sx%d" basename num)
1770 | T_ptrto x ->
1771 let basename = name affix x in
1772 Printf.sprintf "%s *" basename
1773 | T_const x ->
1774 let basename = name affix x in
1775 Printf.sprintf "const %s" basename
1777 name (fun x -> x ^ "_t") vt
1779 (* LLVM LOCAL begin Print builtin type names that include the vector type. *)
1780 let string_of_inttype = function
1781 B_TId8mode -> "__builtin_neon_v8qi2"
1782 | B_TId16mode -> "__builtin_neon_v4hi2"
1783 | B_TId32mode -> "__builtin_neon_v2si2"
1784 | B_TId64mode -> "__builtin_neon_v1di2"
1785 | B_TIdSFmode -> "__builtin_neon_v2sf2"
1786 | B_EId8mode -> "__builtin_neon_v8qi3"
1787 | B_EId16mode -> "__builtin_neon_v4hi3"
1788 | B_EId32mode -> "__builtin_neon_v2si3"
1789 | B_EId64mode -> "__builtin_neon_v1di3"
1790 | B_EIdSFmode -> "__builtin_neon_v2sf3"
1791 | B_OId8mode -> "__builtin_neon_v8qi4"
1792 | B_OId16mode -> "__builtin_neon_v4hi4"
1793 | B_OId32mode -> "__builtin_neon_v2si4"
1794 | B_OId64mode -> "__builtin_neon_v1di4"
1795 | B_OIdSFmode -> "__builtin_neon_v2sf4"
1796 | B_OIq8mode -> "__builtin_neon_v16qi2"
1797 | B_OIq16mode -> "__builtin_neon_v8hi2"
1798 | B_OIq32mode -> "__builtin_neon_v4si2"
1799 | B_OIq64mode -> "__builtin_neon_v2di2"
1800 | B_OIqSFmode -> "__builtin_neon_v4sf2"
1801 | B_CIq8mode -> "__builtin_neon_v16qi3"
1802 | B_CIq16mode -> "__builtin_neon_v8hi3"
1803 | B_CIq32mode -> "__builtin_neon_v4si3"
1804 | B_CIq64mode -> "__builtin_neon_v2di3"
1805 | B_CIqSFmode -> "__builtin_neon_v4sf3"
1806 | B_XIq8mode -> "__builtin_neon_v16qi4"
1807 | B_XIq16mode -> "__builtin_neon_v8hi4"
1808 | B_XIq32mode -> "__builtin_neon_v4si4"
1809 | B_XIq64mode -> "__builtin_neon_v2di4"
1810 | B_XIqSFmode -> "__builtin_neon_v4sf4"
1811 (* LLVM LOCAL end Print builtin type names that include the vector type. *)
1813 let string_of_mode = function
1814 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1815 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1816 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1817 (* LLVM LOCAL *)
1818 | SF -> "sf" | V1DI -> "v1di"
1820 (* Use uppercase chars for letters which form part of the intrinsic name, but
1821 should be omitted from the builtin name (the info is passed in an extra
1822 argument, instead). *)
1823 let intrinsic_name name = String.lowercase name
1825 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1826 found in the features list. *)
1827 let builtin_name features name =
1828 let name = List.fold_right
1829 (fun el name ->
1830 match el with
1831 Flipped x | Builtin_name x -> x
1832 | _ -> name)
1833 features name in
1834 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1835 and buf = Buffer.create (String.length name) in
1836 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1837 Buffer.contents buf
1839 (* Transform an arity into a list of strings. *)
1840 let strings_of_arity a =
1841 match a with
1842 | Arity0 vt -> [string_of_vectype vt]
1843 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1844 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1845 string_of_vectype vt2;
1846 string_of_vectype vt3]
1847 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1848 string_of_vectype vt2;
1849 string_of_vectype vt3;
1850 string_of_vectype vt4]
1851 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1852 string_of_vectype vt2;
1853 string_of_vectype vt3;
1854 string_of_vectype vt4;
1855 string_of_vectype vt5]
1857 (* Suffixes on the end of builtin names that are to be stripped in order
1858 to obtain the name used as an instruction. They are only stripped if
1859 preceded immediately by an underscore. *)
1860 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1862 (* Get the possible names of an instruction corresponding to a "name" from the
1863 ops table. This is done by getting the equivalent builtin name and
1864 stripping any suffixes from the list at the top of this file, unless
1865 the features list presents with an Instruction_name entry, in which
1866 case that is used; or unless the features list presents with a Flipped
1867 entry, in which case that is used. If both such entries are present,
1868 the first in the list will be chosen. *)
1869 let get_insn_names features name =
1870 let names = try
1871 begin
1872 match List.find (fun feature -> match feature with
1873 Instruction_name _ -> true
1874 | Flipped _ -> true
1875 | _ -> false) features
1876 with
1877 Instruction_name names -> names
1878 | Flipped name -> [name]
1879 | _ -> assert false
1881 with Not_found -> [builtin_name features name]
1883 begin
1884 List.map (fun name' ->
1886 let underscore = String.rindex name' '_' in
1887 let our_suffix = String.sub name' (underscore + 1)
1888 ((String.length name') - underscore - 1)
1890 let rec strip remaining_suffixes =
1891 match remaining_suffixes with
1892 [] -> name'
1893 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1894 | _::ss -> strip ss
1896 strip suffixes_to_strip
1897 with (Not_found | Invalid_argument _) -> name') names
1900 (* Apply a function to each element of a list and then comma-separate
1901 the resulting strings. *)
1902 let rec commas f elts acc =
1903 match elts with
1904 [] -> acc
1905 | [elt] -> acc ^ (f elt)
1906 | elt::elts ->
1907 commas f elts (acc ^ (f elt) ^ ", ")
1909 (* Given a list of features and the shape specified in the "ops" table, apply
1910 a function to each possible shape that the instruction may have.
1911 By default, this is the "shape" entry in "ops". If the features list
1912 contains a Disassembles_as entry, the shapes contained in that entry are
1913 mapped to corresponding outputs and returned in a list. If there is more
1914 than one Disassembles_as entry, only the first is used. *)
1915 let analyze_all_shapes features shape f =
1917 match List.find (fun feature ->
1918 match feature with Disassembles_as _ -> true
1919 | _ -> false)
1920 features with
1921 Disassembles_as shapes -> List.map f shapes
1922 | _ -> assert false
1923 with Not_found -> [f shape]