1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts
= S8
| S16
| S32
| S64
| F32
| U8
| U16
| U32
| U64
| P8
| P16
25 | I8
| I16
| I32
| I64
| B8
| B16
| B32
| B64
| Conv
of elts
* elts
26 | Cast
of elts
* elts
| NoElts
28 type eltclass
= Signed
| Unsigned
| Float
| Poly
| Int
| Bits
29 | ConvClass
of eltclass
* eltclass
| NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype
= T_int8x8
| T_int8x16
33 | T_int16x4
| T_int16x8
34 | T_int32x2
| T_int32x4
35 | T_int64x1
| T_int64x2
36 | T_uint8x8
| T_uint8x16
37 | T_uint16x4
| T_uint16x8
38 | T_uint32x2
| T_uint32x4
39 | T_uint64x1
| T_uint64x2
40 | T_float32x2
| T_float32x4
41 | T_poly8x8
| T_poly8x16
42 | T_poly16x4
| T_poly16x8
43 | T_immediate
of int * int
49 | T_float32
| T_arrayof
of int * vectype
50 | T_ptrto
of vectype
| T_const
of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype
= B_TImode
| B_EImode
| B_OImode
| B_CImode
| B_XImode
65 type shape_elt
= Dreg
| Qreg
| Corereg
| Immed
| VecArray
of int * shape_elt
66 | PtrTo
of shape_elt
| CstPtrTo
of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg
(* Used for "lane" variants. *)
69 | Element_of_qreg
(* Likewise. *)
70 | All_elements_of_dreg
(* Used for "dup" variants. *)
71 | Alternatives
of shape_elt list
(* Used for multiple valid operands *)
73 type shape_form
= All
of int * shape_elt
75 | Long_noreg
of shape_elt
77 | Wide_noreg
of shape_elt
81 | Binary_imm
of shape_elt
82 | Use_operands
of shape_elt array
83 | By_scalar
of shape_elt
84 | Unary_scalar
of shape_elt
87 | Pair_result
of shape_elt
89 type arity
= Arity0
of vectype
90 | Arity1
of vectype
* vectype
91 | Arity2
of vectype
* vectype
* vectype
92 | Arity3
of vectype
* vectype
* vectype
* vectype
93 | Arity4
of vectype
* vectype
* vectype
* vectype
* vectype
95 type vecmode
= V8QI
| V4HI
| V2SI
| V2SF
| DI
96 | V16QI
| V8HI
| V4SI
| V4SF
| V2DI
139 (* Ops with scalar. *)
161 (* Vector extract. *)
163 (* Reverse elements. *)
167 (* Transposition ops. *)
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
177 (* Set/extract lanes from a vector. *)
180 (* Initialize vector from bit pattern. *)
182 (* Set all lanes to same value. *)
184 | Vmov_n
(* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
187 (* Combine vectors. *)
189 (* Get quadword high/low parts. *)
192 (* Convert vectors. *)
195 (* Narrow/lengthen vectors. *)
201 (* Reinterpret casts. *)
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
214 | Flipped
of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord
(* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr
(* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as
of shape_form list
224 | Builtin_name
of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
228 | Instruction_name
of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator
of (int -> int)
239 exception MixedMode
of elts
* elts
241 let rec elt_width = function
242 S8
| U8
| P8
| I8
| B8
-> 8
243 | S16
| U16
| P16
| I16
| B16
-> 16
244 | S32
| F32
| U32
| I32
| B32
-> 32
245 | S64
| U64
| I64
| B64
-> 64
247 let wa = elt_width a
and wb
= elt_width b
in
248 if wa = wb
then wa else failwith
"element width?"
249 | Cast
(a
, b
) -> raise
(MixedMode
(a
, b
))
250 | NoElts
-> failwith
"No elts"
252 let rec elt_class = function
253 S8
| S16
| S32
| S64
-> Signed
254 | U8
| U16
| U32
| U64
-> Unsigned
257 | I8
| I16
| I32
| I64
-> Int
258 | B8
| B16
| B32
| B64
-> Bits
259 | Conv
(a
, b
) | Cast
(a
, b
) -> ConvClass
(elt_class a
, elt_class b
)
262 let elt_of_class_width c w
=
270 | Unsigned
, 16 -> U16
271 | Unsigned
, 32 -> U32
272 | Unsigned
, 64 -> U64
283 | _
-> failwith
"Bad element type"
285 (* Return unsigned integer element the same width as argument. *)
286 let unsigned_of_elt elt
=
287 elt_of_class_width Unsigned
(elt_width elt
)
289 let signed_of_elt elt
=
290 elt_of_class_width Signed
(elt_width elt
)
292 (* Return untyped bits element the same width as argument. *)
293 let bits_of_elt elt
=
294 elt_of_class_width Bits
(elt_width elt
)
296 let non_signed_variant = function
307 let poly_unsigned_variant v
=
308 let elclass = match elt_class v
with
311 elt_of_class_width elclass (elt_width v
)
314 let w = elt_width elt
315 and c
= elt_class elt
in
316 elt_of_class_width c
(w * 2)
319 let w = elt_width elt
320 and c
= elt_class elt
in
321 elt_of_class_width c
(w / 2)
323 (* If we're trying to find a mode from a "Use_operands" instruction, use the
324 last vector operand as the dominant mode used to invoke the correct builtin.
325 We must stick to this rule in neon.md. *)
326 let find_key_operand operands
=
328 match operands
.(opno
) with
331 | VecArray
(_
, Qreg
) -> Qreg
332 | VecArray
(_
, Dreg
) -> Dreg
335 scan ((Array.length operands
) - 1)
337 let rec mode_of_elt elt shape
=
338 let flt = match elt_class elt
with
339 Float
| ConvClass
(_
, Float
) -> true | _
-> false in
341 match elt_width elt
with
342 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
343 | _
-> failwith
"Bad element width"
345 All
(_
, Dreg
) | By_scalar Dreg
| Pair_result Dreg
| Unary_scalar Dreg
346 | Binary_imm Dreg
| Long_noreg Dreg
| Wide_noreg Dreg
->
347 [| V8QI
; V4HI
; if flt then V2SF
else V2SI
; DI
|].(idx)
348 | All
(_
, Qreg
) | By_scalar Qreg
| Pair_result Qreg
| Unary_scalar Qreg
349 | Binary_imm Qreg
| Long_noreg Qreg
| Wide_noreg Qreg
->
350 [| V16QI
; V8HI
; if flt then V4SF
else V4SI
; V2DI
|].(idx)
351 | All
(_
, (Corereg
| PtrTo _
| CstPtrTo _
)) ->
352 [| QI
; HI
; if flt then SF
else SI
; DI
|].(idx)
353 | Long
| Wide
| Wide_lane
| Wide_scalar
355 [| V8QI
; V4HI
; V2SI
; DI
|].(idx)
356 | Narrow
| Narrow_imm
-> [| V16QI
; V8HI
; V4SI
; V2DI
|].(idx)
357 | Use_operands ops
-> mode_of_elt elt
(All
(0, (find_key_operand ops
)))
358 | _
-> failwith
"invalid shape"
360 (* Modify an element type dependent on the shape of the instruction and the
363 let shapemap shape no
=
364 let ident = fun x
-> x
in
366 All _
| Use_operands _
| By_scalar _
| Pair_result _
| Unary_scalar _
367 | Binary_imm _
-> ident
368 | Long
| Long_noreg _
| Wide_scalar
| Long_imm
->
369 [| widen_elt; ident; ident |].(no
)
370 | Wide
| Wide_noreg _
-> [| widen_elt; widen_elt; ident |].(no
)
371 | Wide_lane
-> [| widen_elt; ident; ident; ident |].(no
)
372 | Narrow
| Narrow_imm
-> [| narrow_elt; ident; ident |].(no
)
374 (* Register type (D/Q) of an operand, based on shape and operand number. *)
376 let regmap shape no
=
378 All
(_
, reg
) | Long_noreg reg
| Wide_noreg reg
-> reg
379 | Long
-> [| Qreg
; Dreg
; Dreg
|].(no
)
380 | Wide
-> [| Qreg
; Qreg
; Dreg
|].(no
)
381 | Narrow
-> [| Dreg
; Qreg
; Qreg
|].(no
)
382 | Wide_lane
-> [| Qreg
; Dreg
; Dreg
; Immed
|].(no
)
383 | Wide_scalar
-> [| Qreg
; Dreg
; Corereg
|].(no
)
384 | By_scalar reg
-> [| reg
; reg
; Dreg
; Immed
|].(no
)
385 | Unary_scalar reg
-> [| reg
; Dreg
; Immed
|].(no
)
386 | Pair_result reg
-> [| VecArray
(2, reg
); reg
; reg
|].(no
)
387 | Binary_imm reg
-> [| reg
; reg
; Immed
|].(no
)
388 | Long_imm
-> [| Qreg
; Dreg
; Immed
|].(no
)
389 | Narrow_imm
-> [| Dreg
; Qreg
; Immed
|].(no
)
390 | Use_operands these
-> these
.(no
)
392 let type_for_elt shape elt no
=
393 let elt = (shapemap shape no
) elt in
394 let reg = regmap shape no
in
395 let rec type_for_reg_elt reg elt =
410 | _
-> failwith
"Bad elt type"
425 | _
-> failwith
"Bad elt type"
440 | _
-> failwith
"Bad elt type"
444 | VecArray
(num
, sub
) ->
445 T_arrayof
(num
, type_for_reg_elt sub
elt)
447 T_ptrto
(type_for_reg_elt x
elt)
449 T_ptrto
(T_const
(type_for_reg_elt x
elt))
450 (* Anything else is solely for the use of the test generator. *)
453 type_for_reg_elt reg elt
455 (* Return size of a vector type, in bits. *)
456 let vectype_size = function
457 T_int8x8
| T_int16x4
| T_int32x2
| T_int64x1
458 | T_uint8x8
| T_uint16x4
| T_uint32x2
| T_uint64x1
459 | T_float32x2
| T_poly8x8
| T_poly16x4
-> 64
460 | T_int8x16
| T_int16x8
| T_int32x4
| T_int64x2
461 | T_uint8x16
| T_uint16x8
| T_uint32x4
| T_uint64x2
462 | T_float32x4
| T_poly8x16
| T_poly16x8
-> 128
463 | _
-> raise Not_found
465 let inttype_for_array num elttype
=
466 let eltsize = vectype_size elttype
in
467 let numwords = (num
* eltsize) / 32 in
474 | _
-> failwith
("no int type for size " ^ string_of_int
numwords)
476 (* These functions return pairs of (internal, external) types, where "internal"
477 types are those seen by GCC, and "external" are those seen by the assembler.
478 These types aren't necessarily the same, since the intrinsics can munge more
479 than one C type into each assembler opcode. *)
481 let make_sign_invariant func shape
elt =
482 let arity, elt'
= func shape
elt in
483 arity, non_signed_variant elt'
485 (* Don't restrict any types. *)
487 let elts_same make_arity shape
elt =
488 let vtype = type_for_elt shape
elt in
489 make_arity
vtype, elt
491 (* As sign_invar_*, but when sign matters. *)
492 let elts_same_io_lane =
493 elts_same (fun vtype -> Arity4
(vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
496 elts_same (fun vtype -> Arity3
(vtype 0, vtype 0, vtype 1, vtype 2))
498 let elts_same_2_lane =
499 elts_same (fun vtype -> Arity3
(vtype 0, vtype 1, vtype 2, vtype 3))
501 let elts_same_3 = elts_same_2_lane
504 elts_same (fun vtype -> Arity2
(vtype 0, vtype 1, vtype 2))
507 elts_same (fun vtype -> Arity1
(vtype 0, vtype 1))
509 (* Use for signed/unsigned invariant operations (i.e. where the operation
510 doesn't depend on the sign of the data. *)
512 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
513 let sign_invar_io = make_sign_invariant elts_same_io
514 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
515 let sign_invar_2 = make_sign_invariant elts_same_2
516 let sign_invar_1 = make_sign_invariant elts_same_1
518 (* Sign-sensitive comparison. *)
520 let cmp_sign_matters shape
elt =
521 let vtype = type_for_elt shape
elt
522 and rtype
= type_for_elt shape
(unsigned_of_elt elt) 0 in
523 Arity2
(rtype
, vtype 1, vtype 2), elt
525 (* Signed/unsigned invariant comparison. *)
527 let cmp_sign_invar shape
elt =
528 let shape'
, elt'
= cmp_sign_matters shape elt in
530 match non_signed_variant elt'
with
536 (* Comparison (VTST) where only the element width matters. *)
538 let cmp_bits shape elt =
539 let vtype = type_for_elt shape elt
540 and rtype
= type_for_elt shape (unsigned_of_elt elt) 0
541 and bits_only
= bits_of_elt elt in
542 Arity2
(rtype
, vtype 1, vtype 2), bits_only
544 let reg_shift shape elt =
545 let vtype = type_for_elt shape elt
546 and op2type
= type_for_elt shape (signed_of_elt elt) 2 in
547 Arity2
(vtype 0, vtype 1, op2type
), elt
549 (* Genericised constant-shift type-generating function. *)
551 let const_shift mkimm ?
arity ?result
shape elt =
552 let op2type = (shapemap shape 2) elt in
553 let op2width = elt_width op2type in
554 let op2 = mkimm
op2width
555 and op1
= type_for_elt shape elt 1
559 | Some restriction
-> restriction
elt in
560 let rtype = type_for_elt shape r_elt
0 in
562 None
-> Arity2
(rtype, op1
, op2), elt
563 | Some mkarity
-> mkarity
rtype op1
op2, elt
565 (* Use for immediate right-shifts. *)
567 let shift_right shape elt =
568 const_shift (fun imm
-> T_immediate
(1, imm
)) shape elt
570 let shift_right_acc shape elt =
571 const_shift (fun imm
-> T_immediate
(1, imm
))
572 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt
574 (* Use for immediate right-shifts when the operation doesn't care about
577 let shift_right_sign_invar =
578 make_sign_invariant shift_right
580 (* Immediate right-shift; result is unsigned even when operand is signed. *)
582 let shift_right_to_uns shape elt =
583 const_shift (fun imm
-> T_immediate
(1, imm
)) ~result
:unsigned_of_elt
586 (* Immediate left-shift. *)
588 let shift_left shape elt =
589 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) shape elt
591 (* Immediate left-shift, unsigned result. *)
593 let shift_left_to_uns shape elt =
594 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) ~result
:unsigned_of_elt
597 (* Immediate left-shift, don't care about signs. *)
599 let shift_left_sign_invar =
600 make_sign_invariant shift_left
602 (* Shift left/right and insert: only element size matters. *)
604 let shift_insert shape elt =
606 const_shift (fun imm
-> T_immediate
(1, imm
))
607 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt in
608 arity, bits_of_elt elt
612 let get_lane shape elt =
613 let vtype = type_for_elt shape elt in
614 Arity2
(vtype 0, vtype 1, vtype 2),
615 (match elt with P8
-> U8
| P16
-> U16
| S32
| U32
| F32
-> B32
| x
-> x
)
617 let set_lane shape elt =
618 let vtype = type_for_elt shape elt in
619 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
621 let set_lane_notype shape elt =
622 let vtype = type_for_elt shape elt in
623 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), NoElts
625 let create_vector shape elt =
626 let vtype = type_for_elt shape U64
1
627 and rtype = type_for_elt shape elt 0 in
628 Arity1
(rtype, vtype), elt
630 let conv make_arity
shape elt =
631 let edest, esrc
= match elt with
632 Conv
(edest, esrc
) | Cast
(edest, esrc
) -> edest, esrc
633 | _
-> failwith
"Non-conversion element in conversion" in
634 let vtype = type_for_elt shape esrc
635 and rtype = type_for_elt shape edest 0 in
636 make_arity
rtype vtype, elt
638 let conv_1 = conv (fun rtype vtype -> Arity1
(rtype, vtype 1))
639 let conv_2 = conv (fun rtype vtype -> Arity2
(rtype, vtype 1, vtype 2))
641 (* Operation has an unsigned result even if operands are signed. *)
643 let dst_unsign make_arity
shape elt =
644 let vtype = type_for_elt shape elt
645 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
646 make_arity
rtype vtype, elt
648 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1
(rtype, vtype 1))
650 let make_bits_only func
shape elt =
651 let arity, elt'
= func
shape elt in
652 arity, bits_of_elt elt'
654 (* Extend operation. *)
656 let extend shape elt =
657 let vtype = type_for_elt shape elt in
658 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
660 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
661 integer ops respectively, or unsigned for polynomial ops. *)
663 let table mkarity
shape elt =
664 let vtype = type_for_elt shape elt in
665 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
666 mkarity
vtype op2, bits_of_elt elt
668 let table_2 = table (fun vtype op2 -> Arity2
(vtype 0, vtype 1, op2))
669 let table_io = table (fun vtype op2 -> Arity3
(vtype 0, vtype 0, vtype 1, op2))
671 (* Operations where only bits matter. *)
673 let bits_1 = make_bits_only elts_same_1
674 let bits_2 = make_bits_only elts_same_2
675 let bits_3 = make_bits_only elts_same_3
678 let store_1 shape elt =
679 let vtype = type_for_elt shape elt in
680 Arity2
(T_void
, vtype 0, vtype 1), bits_of_elt elt
682 let store_3 shape elt =
683 let vtype = type_for_elt shape elt in
684 Arity3
(T_void
, vtype 0, vtype 1, vtype 2), bits_of_elt elt
686 let make_notype func
shape elt =
687 let arity, _
= func
shape elt in
690 let notype_1 = make_notype elts_same_1
691 let notype_2 = make_notype elts_same_2
692 let notype_3 = make_notype elts_same_3
694 (* Bit-select operations (first operand is unsigned int). *)
696 let bit_select shape elt =
697 let vtype = type_for_elt shape elt
698 and itype
= type_for_elt shape (unsigned_of_elt elt) in
699 Arity3
(vtype 0, itype
1, vtype 2, vtype 3), NoElts
701 (* Common lists of supported element types. *)
703 let su_8_32 = [S8
; S16
; S32
; U8
; U16
; U32
]
704 let su_8_64 = S64
:: U64
:: su_8_32
705 let su_16_64 = [S16
; S32
; S64
; U16
; U32
; U64
]
706 let pf_su_8_32 = P8
:: P16
:: F32
:: su_8_32
707 let pf_su_8_64 = P8
:: P16
:: F32
:: su_8_64
712 Vadd
, [], All
(3, Dreg
), "vadd", sign_invar_2, F32
:: su_8_64;
713 Vadd
, [], All
(3, Qreg
), "vaddQ", sign_invar_2, F32
:: su_8_64;
714 Vadd
, [], Long
, "vaddl", elts_same_2, su_8_32;
715 Vadd
, [], Wide
, "vaddw", elts_same_2, su_8_32;
716 Vadd
, [Halving
], All
(3, Dreg
), "vhadd", elts_same_2, su_8_32;
717 Vadd
, [Halving
], All
(3, Qreg
), "vhaddQ", elts_same_2, su_8_32;
718 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
719 All
(3, Dreg
), "vRhadd", elts_same_2, su_8_32;
720 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
721 All
(3, Qreg
), "vRhaddQ", elts_same_2, su_8_32;
722 Vadd
, [Saturating
], All
(3, Dreg
), "vqadd", elts_same_2, su_8_64;
723 Vadd
, [Saturating
], All
(3, Qreg
), "vqaddQ", elts_same_2, su_8_64;
724 Vadd
, [High_half
], Narrow
, "vaddhn", sign_invar_2, su_16_64;
725 Vadd
, [Instruction_name
["vraddhn"]; Rounding
; High_half
],
726 Narrow
, "vRaddhn", sign_invar_2, su_16_64;
728 (* Multiplication. *)
729 Vmul
, [], All
(3, Dreg
), "vmul", sign_invar_2, P8
:: F32
:: su_8_32;
730 Vmul
, [], All
(3, Qreg
), "vmulQ", sign_invar_2, P8
:: F32
:: su_8_32;
731 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Dreg
), "vqdmulh",
732 elts_same_2, [S16
; S32
];
733 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Qreg
), "vqdmulhQ",
734 elts_same_2, [S16
; S32
];
736 [Saturating
; Rounding
; Doubling
; High_half
;
737 Instruction_name
["vqrdmulh"]],
738 All
(3, Dreg
), "vqRdmulh",
739 elts_same_2, [S16
; S32
];
741 [Saturating
; Rounding
; Doubling
; High_half
;
742 Instruction_name
["vqrdmulh"]],
743 All
(3, Qreg
), "vqRdmulhQ",
744 elts_same_2, [S16
; S32
];
745 Vmul
, [], Long
, "vmull", elts_same_2, P8
:: su_8_32;
746 Vmul
, [Saturating
; Doubling
], Long
, "vqdmull", elts_same_2, [S16
; S32
];
748 (* Multiply-accumulate. *)
749 Vmla
, [], All
(3, Dreg
), "vmla", sign_invar_io, F32
:: su_8_32;
750 Vmla
, [], All
(3, Qreg
), "vmlaQ", sign_invar_io, F32
:: su_8_32;
751 Vmla
, [], Long
, "vmlal", elts_same_io, su_8_32;
752 Vmla
, [Saturating
; Doubling
], Long
, "vqdmlal", elts_same_io, [S16
; S32
];
754 (* Multiply-subtract. *)
755 Vmls
, [], All
(3, Dreg
), "vmls", sign_invar_io, F32
:: su_8_32;
756 Vmls
, [], All
(3, Qreg
), "vmlsQ", sign_invar_io, F32
:: su_8_32;
757 Vmls
, [], Long
, "vmlsl", elts_same_io, su_8_32;
758 Vmls
, [Saturating
; Doubling
], Long
, "vqdmlsl", elts_same_io, [S16
; S32
];
761 Vsub
, [], All
(3, Dreg
), "vsub", sign_invar_2, F32
:: su_8_64;
762 Vsub
, [], All
(3, Qreg
), "vsubQ", sign_invar_2, F32
:: su_8_64;
763 Vsub
, [], Long
, "vsubl", elts_same_2, su_8_32;
764 Vsub
, [], Wide
, "vsubw", elts_same_2, su_8_32;
765 Vsub
, [Halving
], All
(3, Dreg
), "vhsub", elts_same_2, su_8_32;
766 Vsub
, [Halving
], All
(3, Qreg
), "vhsubQ", elts_same_2, su_8_32;
767 Vsub
, [Saturating
], All
(3, Dreg
), "vqsub", elts_same_2, su_8_64;
768 Vsub
, [Saturating
], All
(3, Qreg
), "vqsubQ", elts_same_2, su_8_64;
769 Vsub
, [High_half
], Narrow
, "vsubhn", sign_invar_2, su_16_64;
770 Vsub
, [Instruction_name
["vrsubhn"]; Rounding
; High_half
],
771 Narrow
, "vRsubhn", sign_invar_2, su_16_64;
773 (* Comparison, equal. *)
774 Vceq
, [], All
(3, Dreg
), "vceq", cmp_sign_invar, P8
:: F32
:: su_8_32;
775 Vceq
, [], All
(3, Qreg
), "vceqQ", cmp_sign_invar, P8
:: F32
:: su_8_32;
777 (* Comparison, greater-than or equal. *)
778 Vcge
, [], All
(3, Dreg
), "vcge", cmp_sign_matters, F32
:: su_8_32;
779 Vcge
, [], All
(3, Qreg
), "vcgeQ", cmp_sign_matters, F32
:: su_8_32;
781 (* Comparison, less-than or equal. *)
782 Vcle
, [Flipped
"vcge"], All
(3, Dreg
), "vcle", cmp_sign_matters,
784 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeQ"],
785 All
(3, Qreg
), "vcleQ", cmp_sign_matters,
788 (* Comparison, greater-than. *)
789 Vcgt
, [], All
(3, Dreg
), "vcgt", cmp_sign_matters, F32
:: su_8_32;
790 Vcgt
, [], All
(3, Qreg
), "vcgtQ", cmp_sign_matters, F32
:: su_8_32;
792 (* Comparison, less-than. *)
793 Vclt
, [Flipped
"vcgt"], All
(3, Dreg
), "vclt", cmp_sign_matters,
795 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtQ"],
796 All
(3, Qreg
), "vcltQ", cmp_sign_matters,
799 (* Compare absolute greater-than or equal. *)
800 Vcage
, [Instruction_name
["vacge"]],
801 All
(3, Dreg
), "vcage", cmp_sign_matters, [F32
];
802 Vcage
, [Instruction_name
["vacge"]],
803 All
(3, Qreg
), "vcageQ", cmp_sign_matters, [F32
];
805 (* Compare absolute less-than or equal. *)
806 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcage"],
807 All
(3, Dreg
), "vcale", cmp_sign_matters, [F32
];
808 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcageQ"],
809 All
(3, Qreg
), "vcaleQ", cmp_sign_matters, [F32
];
811 (* Compare absolute greater-than or equal. *)
812 Vcagt
, [Instruction_name
["vacgt"]],
813 All
(3, Dreg
), "vcagt", cmp_sign_matters, [F32
];
814 Vcagt
, [Instruction_name
["vacgt"]],
815 All
(3, Qreg
), "vcagtQ", cmp_sign_matters, [F32
];
817 (* Compare absolute less-than or equal. *)
818 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagt"],
819 All
(3, Dreg
), "vcalt", cmp_sign_matters, [F32
];
820 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagtQ"],
821 All
(3, Qreg
), "vcaltQ", cmp_sign_matters, [F32
];
824 Vtst
, [], All
(3, Dreg
), "vtst", cmp_bits, P8
:: su_8_32;
825 Vtst
, [], All
(3, Qreg
), "vtstQ", cmp_bits, P8
:: su_8_32;
827 (* Absolute difference. *)
828 Vabd
, [], All
(3, Dreg
), "vabd", elts_same_2, F32
:: su_8_32;
829 Vabd
, [], All
(3, Qreg
), "vabdQ", elts_same_2, F32
:: su_8_32;
830 Vabd
, [], Long
, "vabdl", elts_same_2, su_8_32;
832 (* Absolute difference and accumulate. *)
833 Vaba
, [], All
(3, Dreg
), "vaba", elts_same_io, su_8_32;
834 Vaba
, [], All
(3, Qreg
), "vabaQ", elts_same_io, su_8_32;
835 Vaba
, [], Long
, "vabal", elts_same_io, su_8_32;
838 Vmax
, [], All
(3, Dreg
), "vmax", elts_same_2, F32
:: su_8_32;
839 Vmax
, [], All
(3, Qreg
), "vmaxQ", elts_same_2, F32
:: su_8_32;
842 Vmin
, [], All
(3, Dreg
), "vmin", elts_same_2, F32
:: su_8_32;
843 Vmin
, [], All
(3, Qreg
), "vminQ", elts_same_2, F32
:: su_8_32;
846 Vpadd
, [], All
(3, Dreg
), "vpadd", sign_invar_2, F32
:: su_8_32;
847 Vpadd
, [], Long_noreg Dreg
, "vpaddl", elts_same_1, su_8_32;
848 Vpadd
, [], Long_noreg Qreg
, "vpaddlQ", elts_same_1, su_8_32;
850 (* Pairwise add, widen and accumulate. *)
851 Vpada
, [], Wide_noreg Dreg
, "vpadal", elts_same_2, su_8_32;
852 Vpada
, [], Wide_noreg Qreg
, "vpadalQ", elts_same_2, su_8_32;
854 (* Folding maximum, minimum. *)
855 Vpmax
, [], All
(3, Dreg
), "vpmax", elts_same_2, F32
:: su_8_32;
856 Vpmin
, [], All
(3, Dreg
), "vpmin", elts_same_2, F32
:: su_8_32;
858 (* Reciprocal step. *)
859 Vrecps
, [], All
(3, Dreg
), "vrecps", elts_same_2, [F32
];
860 Vrecps
, [], All
(3, Qreg
), "vrecpsQ", elts_same_2, [F32
];
861 Vrsqrts
, [], All
(3, Dreg
), "vrsqrts", elts_same_2, [F32
];
862 Vrsqrts
, [], All
(3, Qreg
), "vrsqrtsQ", elts_same_2, [F32
];
864 (* Vector shift left. *)
865 Vshl
, [], All
(3, Dreg
), "vshl", reg_shift, su_8_64;
866 Vshl
, [], All
(3, Qreg
), "vshlQ", reg_shift, su_8_64;
867 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
868 All
(3, Dreg
), "vRshl", reg_shift, su_8_64;
869 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
870 All
(3, Qreg
), "vRshlQ", reg_shift, su_8_64;
871 Vshl
, [Saturating
], All
(3, Dreg
), "vqshl", reg_shift, su_8_64;
872 Vshl
, [Saturating
], All
(3, Qreg
), "vqshlQ", reg_shift, su_8_64;
873 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
874 All
(3, Dreg
), "vqRshl", reg_shift, su_8_64;
875 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
876 All
(3, Qreg
), "vqRshlQ", reg_shift, su_8_64;
878 (* Vector shift right by constant. *)
879 Vshr_n
, [], Binary_imm Dreg
, "vshr_n", shift_right, su_8_64;
880 Vshr_n
, [], Binary_imm Qreg
, "vshrQ_n", shift_right, su_8_64;
881 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Dreg
,
882 "vRshr_n", shift_right, su_8_64;
883 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Qreg
,
884 "vRshrQ_n", shift_right, su_8_64;
885 Vshr_n
, [], Narrow_imm
, "vshrn_n", shift_right_sign_invar, su_16_64;
886 Vshr_n
, [Instruction_name
["vrshrn"]; Rounding
], Narrow_imm
, "vRshrn_n",
887 shift_right_sign_invar, su_16_64;
888 Vshr_n
, [Saturating
], Narrow_imm
, "vqshrn_n", shift_right, su_16_64;
889 Vshr_n
, [Instruction_name
["vqrshrn"]; Saturating
; Rounding
], Narrow_imm
,
890 "vqRshrn_n", shift_right, su_16_64;
891 Vshr_n
, [Saturating
; Dst_unsign
], Narrow_imm
, "vqshrun_n",
892 shift_right_to_uns, [S16
; S32
; S64
];
893 Vshr_n
, [Instruction_name
["vqrshrun"]; Saturating
; Dst_unsign
; Rounding
],
894 Narrow_imm
, "vqRshrun_n", shift_right_to_uns, [S16
; S32
; S64
];
896 (* Vector shift left by constant. *)
897 Vshl_n
, [], Binary_imm Dreg
, "vshl_n", shift_left_sign_invar, su_8_64;
898 Vshl_n
, [], Binary_imm Qreg
, "vshlQ_n", shift_left_sign_invar, su_8_64;
899 Vshl_n
, [Saturating
], Binary_imm Dreg
, "vqshl_n", shift_left, su_8_64;
900 Vshl_n
, [Saturating
], Binary_imm Qreg
, "vqshlQ_n", shift_left, su_8_64;
901 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Dreg
, "vqshlu_n",
902 shift_left_to_uns, [S8
; S16
; S32
; S64
];
903 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Qreg
, "vqshluQ_n",
904 shift_left_to_uns, [S8
; S16
; S32
; S64
];
905 Vshl_n
, [], Long_imm
, "vshll_n", shift_left, su_8_32;
907 (* Vector shift right by constant and accumulate. *)
908 Vsra_n
, [], Binary_imm Dreg
, "vsra_n", shift_right_acc, su_8_64;
909 Vsra_n
, [], Binary_imm Qreg
, "vsraQ_n", shift_right_acc, su_8_64;
910 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Dreg
,
911 "vRsra_n", shift_right_acc, su_8_64;
912 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Qreg
,
913 "vRsraQ_n", shift_right_acc, su_8_64;
915 (* Vector shift right and insert. *)
916 Vsri
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsri_n", shift_insert,
917 P8
:: P16
:: su_8_64;
918 Vsri
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsriQ_n", shift_insert,
919 P8
:: P16
:: su_8_64;
921 (* Vector shift left and insert. *)
922 Vsli
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsli_n", shift_insert,
923 P8
:: P16
:: su_8_64;
924 Vsli
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsliQ_n", shift_insert,
925 P8
:: P16
:: su_8_64;
927 (* Absolute value. *)
928 Vabs
, [], All
(2, Dreg
), "vabs", elts_same_1, [S8
; S16
; S32
; F32
];
929 Vabs
, [], All
(2, Qreg
), "vabsQ", elts_same_1, [S8
; S16
; S32
; F32
];
930 Vabs
, [Saturating
], All
(2, Dreg
), "vqabs", elts_same_1, [S8
; S16
; S32
];
931 Vabs
, [Saturating
], All
(2, Qreg
), "vqabsQ", elts_same_1, [S8
; S16
; S32
];
934 Vneg
, [], All
(2, Dreg
), "vneg", elts_same_1, [S8
; S16
; S32
; F32
];
935 Vneg
, [], All
(2, Qreg
), "vnegQ", elts_same_1, [S8
; S16
; S32
; F32
];
936 Vneg
, [Saturating
], All
(2, Dreg
), "vqneg", elts_same_1, [S8
; S16
; S32
];
937 Vneg
, [Saturating
], All
(2, Qreg
), "vqnegQ", elts_same_1, [S8
; S16
; S32
];
940 Vmvn
, [], All
(2, Dreg
), "vmvn", notype_1, P8
:: su_8_32;
941 Vmvn
, [], All
(2, Qreg
), "vmvnQ", notype_1, P8
:: su_8_32;
943 (* Count leading sign bits. *)
944 Vcls
, [], All
(2, Dreg
), "vcls", elts_same_1, [S8
; S16
; S32
];
945 Vcls
, [], All
(2, Qreg
), "vclsQ", elts_same_1, [S8
; S16
; S32
];
947 (* Count leading zeros. *)
948 Vclz
, [], All
(2, Dreg
), "vclz", sign_invar_1, su_8_32;
949 Vclz
, [], All
(2, Qreg
), "vclzQ", sign_invar_1, su_8_32;
951 (* Count number of set bits. *)
952 Vcnt
, [], All
(2, Dreg
), "vcnt", bits_1, [P8
; S8
; U8
];
953 Vcnt
, [], All
(2, Qreg
), "vcntQ", bits_1, [P8
; S8
; U8
];
955 (* Reciprocal estimate. *)
956 Vrecpe
, [], All
(2, Dreg
), "vrecpe", elts_same_1, [U32
; F32
];
957 Vrecpe
, [], All
(2, Qreg
), "vrecpeQ", elts_same_1, [U32
; F32
];
959 (* Reciprocal square-root estimate. *)
960 Vrsqrte
, [], All
(2, Dreg
), "vrsqrte", elts_same_1, [U32
; F32
];
961 Vrsqrte
, [], All
(2, Qreg
), "vrsqrteQ", elts_same_1, [U32
; F32
];
963 (* Get lanes from a vector. *)
965 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
966 Instruction_name
["vmov"]],
967 Use_operands
[| Corereg
; Dreg
; Immed
|],
968 "vget_lane", get_lane, pf_su_8_32;
971 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
972 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
973 Use_operands
[| Corereg
; Dreg
; Immed
|],
974 "vget_lane", notype_2, [S64
; U64
];
976 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
977 Instruction_name
["vmov"]],
978 Use_operands
[| Corereg
; Qreg
; Immed
|],
979 "vgetQ_lane", get_lane, pf_su_8_32;
982 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
983 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
984 Use_operands
[| Corereg
; Qreg
; Immed
|],
985 "vgetQ_lane", notype_2, [S64
; U64
];
987 (* Set lanes in a vector. *)
988 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
989 Instruction_name
["vmov"]],
990 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
991 set_lane, pf_su_8_32;
992 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
993 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
994 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
995 set_lane_notype, [S64
; U64
];
996 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
997 Instruction_name
["vmov"]],
998 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
999 set_lane, pf_su_8_32;
1000 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
1001 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1002 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1003 set_lane_notype, [S64
; U64
];
1005 (* Create vector from literal bit pattern. *)
1007 [No_op
], (* Not really, but it can yield various things that are too
1008 hard for the test generator at this time. *)
1009 Use_operands
[| Dreg
; Corereg
|], "vcreate", create_vector,
1012 (* Set all lanes to the same value. *)
1014 [Disassembles_as
[Use_operands
[| Dreg
;
1015 Alternatives
[ Corereg
;
1016 Element_of_dreg
] |]]],
1017 Use_operands
[| Dreg
; Corereg
|], "vdup_n", bits_1,
1020 [Instruction_name
["vmov"];
1021 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1022 Use_operands
[| Dreg
; Corereg
|], "vdup_n", notype_1,
1025 [Disassembles_as
[Use_operands
[| Qreg
;
1026 Alternatives
[ Corereg
;
1027 Element_of_dreg
] |]]],
1028 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", bits_1,
1031 [Instruction_name
["vmov"];
1032 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1033 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1034 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", notype_1,
1037 (* These are just aliases for the above. *)
1039 [Builtin_name
"vdup_n";
1040 Disassembles_as
[Use_operands
[| Dreg
;
1041 Alternatives
[ Corereg
;
1042 Element_of_dreg
] |]]],
1043 Use_operands
[| Dreg
; Corereg
|],
1044 "vmov_n", bits_1, pf_su_8_32;
1046 [Builtin_name
"vdup_n";
1047 Instruction_name
["vmov"];
1048 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1049 Use_operands
[| Dreg
; Corereg
|],
1050 "vmov_n", notype_1, [S64
; U64
];
1052 [Builtin_name
"vdupQ_n";
1053 Disassembles_as
[Use_operands
[| Qreg
;
1054 Alternatives
[ Corereg
;
1055 Element_of_dreg
] |]]],
1056 Use_operands
[| Qreg
; Corereg
|],
1057 "vmovQ_n", bits_1, pf_su_8_32;
1059 [Builtin_name
"vdupQ_n";
1060 Instruction_name
["vmov"];
1061 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1062 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1063 Use_operands
[| Qreg
; Corereg
|],
1064 "vmovQ_n", notype_1, [S64
; U64
];
1066 (* Duplicate, lane version. We can't use Use_operands here because the
1067 rightmost register (always Dreg) would be picked up by find_key_operand,
1068 when we want the leftmost register to be used in this case (otherwise
1069 the modes are indistinguishable in neon.md, etc. *)
1071 [Disassembles_as
[Use_operands
[| Dreg
; Element_of_dreg
|]]],
1072 Unary_scalar Dreg
, "vdup_lane", bits_2, pf_su_8_32;
1074 [No_op
; Const_valuator
(fun _
-> 0)],
1075 Unary_scalar Dreg
, "vdup_lane", bits_2, [S64
; U64
];
1077 [Disassembles_as
[Use_operands
[| Qreg
; Element_of_dreg
|]]],
1078 Unary_scalar Qreg
, "vdupQ_lane", bits_2, pf_su_8_32;
1080 [No_op
; Const_valuator
(fun _
-> 0)],
1081 Unary_scalar Qreg
, "vdupQ_lane", bits_2, [S64
; U64
];
1083 (* Combining vectors. *)
1085 Use_operands
[| Qreg
; Dreg
; Dreg
|], "vcombine", notype_2,
1088 (* Splitting vectors. *)
1090 Use_operands
[| Dreg
; Qreg
|], "vget_high",
1091 notype_1, pf_su_8_64;
1092 Vget_low
, [Instruction_name
["vmov"];
1093 Disassembles_as
[Use_operands
[| Dreg
; Dreg
|]];
1095 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1096 notype_1, pf_su_8_32;
1098 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1099 notype_1, [S64
; U64
];
1102 Vcvt
, [InfoWord
], All
(2, Dreg
), "vcvt", conv_1,
1103 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1104 Vcvt
, [InfoWord
], All
(2, Qreg
), "vcvtQ", conv_1,
1105 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1106 Vcvt_n
, [InfoWord
], Use_operands
[| Dreg
; Dreg
; Immed
|], "vcvt_n", conv_2,
1107 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1108 Vcvt_n
, [InfoWord
], Use_operands
[| Qreg
; Qreg
; Immed
|], "vcvtQ_n", conv_2,
1109 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1111 (* Move, narrowing. *)
1112 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]],
1113 Narrow
, "vmovn", sign_invar_1, su_16_64;
1114 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
],
1115 Narrow
, "vqmovn", elts_same_1, su_16_64;
1117 [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
; Dst_unsign
],
1118 Narrow
, "vqmovun", dst_unsign_1,
1122 Vmovl
, [Disassembles_as
[Use_operands
[| Qreg
; Dreg
|]]],
1123 Long
, "vmovl", elts_same_1, su_8_32;
1127 [Instruction_name
["vtbl"];
1128 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1129 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbl1", table_2, [U8
; S8
; P8
];
1130 Vtbl
2, [Instruction_name
["vtbl"]],
1131 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbl2", table_2,
1133 Vtbl
3, [Instruction_name
["vtbl"]],
1134 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbl3", table_2,
1136 Vtbl
4, [Instruction_name
["vtbl"]],
1137 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbl4", table_2,
1140 (* Extended table lookup. *)
1142 [Instruction_name
["vtbx"];
1143 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1144 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbx1", table_io, [U8
; S8
; P8
];
1145 Vtbx
2, [Instruction_name
["vtbx"]],
1146 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbx2", table_io,
1148 Vtbx
3, [Instruction_name
["vtbx"]],
1149 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbx3", table_io,
1151 Vtbx
4, [Instruction_name
["vtbx"]],
1152 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbx4", table_io,
1155 (* Multiply, lane. (note: these were undocumented at the time of
1157 Vmul_lane
, [], By_scalar Dreg
, "vmul_lane", sign_invar_2_lane,
1158 [S16
; S32
; U16
; U32
; F32
];
1159 Vmul_lane
, [], By_scalar Qreg
, "vmulQ_lane", sign_invar_2_lane,
1160 [S16
; S32
; U16
; U32
; F32
];
1162 (* Multiply-accumulate, lane. *)
1163 Vmla_lane
, [], By_scalar Dreg
, "vmla_lane", sign_invar_io_lane,
1164 [S16
; S32
; U16
; U32
; F32
];
1165 Vmla_lane
, [], By_scalar Qreg
, "vmlaQ_lane", sign_invar_io_lane,
1166 [S16
; S32
; U16
; U32
; F32
];
1167 Vmla_lane
, [], Wide_lane
, "vmlal_lane", elts_same_io_lane,
1168 [S16
; S32
; U16
; U32
];
1169 Vmla_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlal_lane",
1170 elts_same_io_lane, [S16
; S32
];
1172 (* Multiply-subtract, lane. *)
1173 Vmls_lane
, [], By_scalar Dreg
, "vmls_lane", sign_invar_io_lane,
1174 [S16
; S32
; U16
; U32
; F32
];
1175 Vmls_lane
, [], By_scalar Qreg
, "vmlsQ_lane", sign_invar_io_lane,
1176 [S16
; S32
; U16
; U32
; F32
];
1177 Vmls_lane
, [], Wide_lane
, "vmlsl_lane", elts_same_io_lane,
1178 [S16
; S32
; U16
; U32
];
1179 Vmls_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlsl_lane",
1180 elts_same_io_lane, [S16
; S32
];
1182 (* Long multiply, lane. *)
1184 Wide_lane
, "vmull_lane", elts_same_2_lane, [S16
; S32
; U16
; U32
];
1186 (* Saturating doubling long multiply, lane. *)
1187 Vqdmull_lane
, [Saturating
; Doubling
],
1188 Wide_lane
, "vqdmull_lane", elts_same_2_lane, [S16
; S32
];
1190 (* Saturating doubling long multiply high, lane. *)
1191 Vqdmulh_lane
, [Saturating
; Halving
],
1192 By_scalar Qreg
, "vqdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1193 Vqdmulh_lane
, [Saturating
; Halving
],
1194 By_scalar Dreg
, "vqdmulh_lane", elts_same_2_lane, [S16
; S32
];
1195 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1196 Instruction_name
["vqrdmulh"]],
1197 By_scalar Qreg
, "vqRdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1198 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1199 Instruction_name
["vqrdmulh"]],
1200 By_scalar Dreg
, "vqRdmulh_lane", elts_same_2_lane, [S16
; S32
];
1202 (* Vector multiply by scalar. *)
1204 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1205 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmul_n",
1206 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1208 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1209 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmulQ_n",
1210 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1212 (* Vector long multiply by scalar. *)
1213 Vmull_n
, [Instruction_name
["vmull"];
1214 Disassembles_as
[Use_operands
[| Qreg
; Dreg
; Element_of_dreg
|]]],
1215 Wide_scalar
, "vmull_n",
1216 elts_same_2, [S16
; S32
; U16
; U32
];
1218 (* Vector saturating doubling long multiply by scalar. *)
1219 Vqdmull_n
, [Saturating
; Doubling
;
1220 Disassembles_as
[Use_operands
[| Qreg
; Dreg
;
1221 Element_of_dreg
|]]],
1222 Wide_scalar
, "vqdmull_n",
1223 elts_same_2, [S16
; S32
];
1225 (* Vector saturating doubling long multiply high by scalar. *)
1227 [Saturating
; Halving
; InfoWord
;
1228 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1229 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1230 "vqdmulhQ_n", elts_same_2, [S16
; S32
];
1232 [Saturating
; Halving
; InfoWord
;
1233 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1234 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1235 "vqdmulh_n", elts_same_2, [S16
; S32
];
1237 [Saturating
; Halving
; Rounding
; InfoWord
;
1238 Instruction_name
["vqrdmulh"];
1239 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1240 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1241 "vqRdmulhQ_n", elts_same_2, [S16
; S32
];
1243 [Saturating
; Halving
; Rounding
; InfoWord
;
1244 Instruction_name
["vqrdmulh"];
1245 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1246 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1247 "vqRdmulh_n", elts_same_2, [S16
; S32
];
1249 (* Vector multiply-accumulate by scalar. *)
1251 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1252 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmla_n",
1253 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1255 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1256 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlaQ_n",
1257 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1258 Vmla_n
, [], Wide_scalar
, "vmlal_n", elts_same_io, [S16
; S32
; U16
; U32
];
1259 Vmla_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlal_n", elts_same_io,
1262 (* Vector multiply subtract by scalar. *)
1264 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1265 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmls_n",
1266 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1268 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1269 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlsQ_n",
1270 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1271 Vmls_n
, [], Wide_scalar
, "vmlsl_n", elts_same_io, [S16
; S32
; U16
; U32
];
1272 Vmls_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlsl_n", elts_same_io,
1275 (* Vector extract. *)
1276 Vext
, [Const_valuator
(fun _
-> 0)],
1277 Use_operands
[| Dreg
; Dreg
; Dreg
; Immed
|], "vext", extend,
1279 Vext
, [Const_valuator
(fun _
-> 0)],
1280 Use_operands
[| Qreg
; Qreg
; Qreg
; Immed
|], "vextQ", extend,
1283 (* Reverse elements. *)
1284 Vrev64
, [], All
(2, Dreg
), "vrev64", bits_1, P8
:: P16
:: F32
:: su_8_32;
1285 Vrev64
, [], All
(2, Qreg
), "vrev64Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1286 Vrev32
, [], All
(2, Dreg
), "vrev32", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1287 Vrev32
, [], All
(2, Qreg
), "vrev32Q", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1288 Vrev16
, [], All
(2, Dreg
), "vrev16", bits_1, [P8
; S8
; U8
];
1289 Vrev16
, [], All
(2, Qreg
), "vrev16Q", bits_1, [P8
; S8
; U8
];
1291 (* Bit selection. *)
1293 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1294 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Dreg
|]]],
1295 Use_operands
[| Dreg
; Dreg
; Dreg
; Dreg
|], "vbsl", bit_select,
1298 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1299 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Qreg
|]]],
1300 Use_operands
[| Qreg
; Qreg
; Qreg
; Qreg
|], "vbslQ", bit_select,
1303 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1304 generating good code for intrinsics which return structure types --
1305 builtins work well by themselves (and understand that the values being
1306 stored on e.g. the stack also reside in registers, so can optimise the
1307 stores away entirely if the results are used immediately), but
1308 intrinsics are very much less efficient. Maybe something can be improved
1309 re: inlining, or tweaking the ABI used for intrinsics (a special call
1312 Vtrn
, [ReturnPtr
], Pair_result Dreg
, "vtrn", bits_2, pf_su_8_32;
1313 Vtrn
, [ReturnPtr
], Pair_result Qreg
, "vtrnQ", bits_2, pf_su_8_32;
1316 Vzip
, [ReturnPtr
], Pair_result Dreg
, "vzip", bits_2, pf_su_8_32;
1317 Vzip
, [ReturnPtr
], Pair_result Qreg
, "vzipQ", bits_2, pf_su_8_32;
1319 (* Unzip elements. *)
1320 Vuzp
, [ReturnPtr
], Pair_result Dreg
, "vuzp", bits_2, pf_su_8_32;
1321 Vuzp
, [ReturnPtr
], Pair_result Qreg
, "vuzpQ", bits_2, pf_su_8_32;
1323 (* Element/structure loads. VLD1 variants. *)
1325 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1326 CstPtrTo Corereg
|]]],
1327 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1", bits_1,
1329 Vldx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1330 CstPtrTo Corereg
|]]],
1331 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q", bits_1,
1335 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1336 CstPtrTo Corereg
|]]],
1337 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1338 "vld1_lane", bits_3, pf_su_8_32;
1340 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1341 CstPtrTo Corereg
|]];
1342 Const_valuator
(fun _
-> 0)],
1343 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1344 "vld1_lane", bits_3, [S64
; U64
];
1346 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1347 CstPtrTo Corereg
|]]],
1348 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1349 "vld1Q_lane", bits_3, pf_su_8_32;
1351 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1352 CstPtrTo Corereg
|]]],
1353 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1354 "vld1Q_lane", bits_3, [S64
; U64
];
1357 [Disassembles_as
[Use_operands
[| VecArray
(1, All_elements_of_dreg
);
1358 CstPtrTo Corereg
|]]],
1359 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1362 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1363 CstPtrTo Corereg
|]]],
1364 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1367 [Disassembles_as
[Use_operands
[| VecArray
(2, All_elements_of_dreg
);
1368 CstPtrTo Corereg
|]]],
1369 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1372 [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1373 CstPtrTo Corereg
|]]],
1374 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1377 (* VST1 variants. *)
1378 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1380 Use_operands
[| PtrTo Corereg
; Dreg
|], "vst1",
1381 store_1, pf_su_8_64;
1382 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1384 Use_operands
[| PtrTo Corereg
; Qreg
|], "vst1Q",
1385 store_1, pf_su_8_64;
1388 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1389 CstPtrTo Corereg
|]]],
1390 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1391 "vst1_lane", store_3, pf_su_8_32;
1393 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1394 CstPtrTo Corereg
|]];
1395 Const_valuator
(fun _
-> 0)],
1396 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1397 "vst1_lane", store_3, [U64
; S64
];
1399 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1400 CstPtrTo Corereg
|]]],
1401 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1402 "vst1Q_lane", store_3, pf_su_8_32;
1404 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1405 CstPtrTo Corereg
|]]],
1406 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1407 "vst1Q_lane", store_3, [U64
; S64
];
1409 (* VLD2 variants. *)
1410 Vldx
2, [], Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1411 "vld2", bits_1, pf_su_8_32;
1412 Vldx
2, [Instruction_name
["vld1"]],
1413 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1414 "vld2", bits_1, [S64
; U64
];
1415 Vldx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1416 CstPtrTo Corereg
|];
1417 Use_operands
[| VecArray
(2, Dreg
);
1418 CstPtrTo Corereg
|]]],
1419 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
|],
1420 "vld2Q", bits_1, pf_su_8_32;
1423 [Disassembles_as
[Use_operands
1424 [| VecArray
(2, Element_of_dreg
);
1425 CstPtrTo Corereg
|]]],
1426 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
;
1427 VecArray
(2, Dreg
); Immed
|],
1428 "vld2_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1430 [Disassembles_as
[Use_operands
1431 [| VecArray
(2, Element_of_dreg
);
1432 CstPtrTo Corereg
|]]],
1433 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
;
1434 VecArray
(2, Qreg
); Immed
|],
1435 "vld2Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1438 [Disassembles_as
[Use_operands
1439 [| VecArray
(2, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1440 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1441 "vld2_dup", bits_1, pf_su_8_32;
1443 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1444 [| VecArray
(2, Dreg
); CstPtrTo Corereg
|]]],
1445 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1446 "vld2_dup", bits_1, [S64
; U64
];
1448 (* VST2 variants. *)
1449 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1451 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1452 store_1, pf_su_8_32;
1453 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1455 Instruction_name
["vst1"]],
1456 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1457 store_1, [S64
; U64
];
1458 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1460 Use_operands
[| VecArray
(2, Dreg
);
1462 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
) |], "vst2Q",
1463 store_1, pf_su_8_32;
1466 [Disassembles_as
[Use_operands
1467 [| VecArray
(2, Element_of_dreg
);
1468 CstPtrTo Corereg
|]]],
1469 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
); Immed
|], "vst2_lane",
1470 store_3, P8
:: P16
:: F32
:: su_8_32;
1472 [Disassembles_as
[Use_operands
1473 [| VecArray
(2, Element_of_dreg
);
1474 CstPtrTo Corereg
|]]],
1475 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
); Immed
|], "vst2Q_lane",
1476 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1478 (* VLD3 variants. *)
1479 Vldx
3, [], Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1480 "vld3", bits_1, pf_su_8_32;
1481 Vldx
3, [Instruction_name
["vld1"]],
1482 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1483 "vld3", bits_1, [S64
; U64
];
1484 Vldx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1485 CstPtrTo Corereg
|];
1486 Use_operands
[| VecArray
(3, Dreg
);
1487 CstPtrTo Corereg
|]]],
1488 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
|],
1489 "vld3Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1492 [Disassembles_as
[Use_operands
1493 [| VecArray
(3, Element_of_dreg
);
1494 CstPtrTo Corereg
|]]],
1495 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
;
1496 VecArray
(3, Dreg
); Immed
|],
1497 "vld3_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1499 [Disassembles_as
[Use_operands
1500 [| VecArray
(3, Element_of_dreg
);
1501 CstPtrTo Corereg
|]]],
1502 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
;
1503 VecArray
(3, Qreg
); Immed
|],
1504 "vld3Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1507 [Disassembles_as
[Use_operands
1508 [| VecArray
(3, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1509 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1510 "vld3_dup", bits_1, pf_su_8_32;
1512 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1513 [| VecArray
(3, Dreg
); CstPtrTo Corereg
|]]],
1514 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1515 "vld3_dup", bits_1, [S64
; U64
];
1517 (* VST3 variants. *)
1518 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1520 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1521 store_1, pf_su_8_32;
1522 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1524 Instruction_name
["vst1"]],
1525 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1526 store_1, [S64
; U64
];
1527 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1529 Use_operands
[| VecArray
(3, Dreg
);
1531 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
) |], "vst3Q",
1532 store_1, pf_su_8_32;
1535 [Disassembles_as
[Use_operands
1536 [| VecArray
(3, Element_of_dreg
);
1537 CstPtrTo Corereg
|]]],
1538 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
); Immed
|], "vst3_lane",
1539 store_3, P8
:: P16
:: F32
:: su_8_32;
1541 [Disassembles_as
[Use_operands
1542 [| VecArray
(3, Element_of_dreg
);
1543 CstPtrTo Corereg
|]]],
1544 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
); Immed
|], "vst3Q_lane",
1545 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1547 (* VLD4/VST4 variants. *)
1548 Vldx
4, [], Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1549 "vld4", bits_1, pf_su_8_32;
1550 Vldx
4, [Instruction_name
["vld1"]],
1551 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1552 "vld4", bits_1, [S64
; U64
];
1553 Vldx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1554 CstPtrTo Corereg
|];
1555 Use_operands
[| VecArray
(4, Dreg
);
1556 CstPtrTo Corereg
|]]],
1557 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
|],
1558 "vld4Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1561 [Disassembles_as
[Use_operands
1562 [| VecArray
(4, Element_of_dreg
);
1563 CstPtrTo Corereg
|]]],
1564 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
;
1565 VecArray
(4, Dreg
); Immed
|],
1566 "vld4_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1568 [Disassembles_as
[Use_operands
1569 [| VecArray
(4, Element_of_dreg
);
1570 CstPtrTo Corereg
|]]],
1571 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
;
1572 VecArray
(4, Qreg
); Immed
|],
1573 "vld4Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1576 [Disassembles_as
[Use_operands
1577 [| VecArray
(4, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1578 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1579 "vld4_dup", bits_1, pf_su_8_32;
1581 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1582 [| VecArray
(4, Dreg
); CstPtrTo Corereg
|]]],
1583 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1584 "vld4_dup", bits_1, [S64
; U64
];
1586 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1588 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1589 store_1, pf_su_8_32;
1590 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1592 Instruction_name
["vst1"]],
1593 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1594 store_1, [S64
; U64
];
1595 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1597 Use_operands
[| VecArray
(4, Dreg
);
1599 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
) |], "vst4Q",
1600 store_1, pf_su_8_32;
1603 [Disassembles_as
[Use_operands
1604 [| VecArray
(4, Element_of_dreg
);
1605 CstPtrTo Corereg
|]]],
1606 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
); Immed
|], "vst4_lane",
1607 store_3, P8
:: P16
:: F32
:: su_8_32;
1609 [Disassembles_as
[Use_operands
1610 [| VecArray
(4, Element_of_dreg
);
1611 CstPtrTo Corereg
|]]],
1612 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
); Immed
|], "vst4Q_lane",
1613 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1615 (* Logical operations. And. *)
1616 Vand
, [], All
(3, Dreg
), "vand", notype_2, su_8_64;
1617 Vand
, [], All
(3, Qreg
), "vandQ", notype_2, su_8_64;
1620 Vorr
, [], All
(3, Dreg
), "vorr", notype_2, su_8_64;
1621 Vorr
, [], All
(3, Qreg
), "vorrQ", notype_2, su_8_64;
1624 Veor
, [], All
(3, Dreg
), "veor", notype_2, su_8_64;
1625 Veor
, [], All
(3, Qreg
), "veorQ", notype_2, su_8_64;
1627 (* Bic (And-not). *)
1628 Vbic
, [], All
(3, Dreg
), "vbic", notype_2, su_8_64;
1629 Vbic
, [], All
(3, Qreg
), "vbicQ", notype_2, su_8_64;
1632 Vorn
, [], All
(3, Dreg
), "vorn", notype_2, su_8_64;
1633 Vorn
, [], All
(3, Qreg
), "vornQ", notype_2, su_8_64;
1637 let elems = P8
:: P16
:: F32
:: su_8_64 in
1640 let types = List.fold_right
1641 (fun convfrom acc
->
1642 if convfrom
<> convto
then
1643 Cast
(convto
, convfrom
) :: acc
1649 let dconv = Vreinterp
, [No_op
], Use_operands
[| Dreg
; Dreg
|],
1650 "vreinterpret", conv_1, types
1651 and qconv
= Vreinterp
, [No_op
], Use_operands
[| Qreg
; Qreg
|],
1652 "vreinterpretQ", conv_1, types in
1653 dconv :: qconv
:: acc
)
1657 (* Output routines. *)
1659 let rec string_of_elt = function
1660 S8
-> "s8" | S16
-> "s16" | S32
-> "s32" | S64
-> "s64"
1661 | U8
-> "u8" | U16
-> "u16" | U32
-> "u32" | U64
-> "u64"
1662 | I8
-> "i8" | I16
-> "i16" | I32
-> "i32" | I64
-> "i64"
1663 | B8
-> "8" | B16
-> "16" | B32
-> "32" | B64
-> "64"
1664 | F32
-> "f32" | P8
-> "p8" | P16
-> "p16"
1665 | Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"_" ^
string_of_elt b
1666 | NoElts
-> failwith
"No elts"
1668 let string_of_elt_dots elt =
1670 Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"." ^
string_of_elt b
1671 | _
-> string_of_elt elt
1673 let string_of_vectype vt
=
1674 let rec name affix
= function
1675 T_int8x8
-> affix
"int8x8"
1676 | T_int8x16
-> affix
"int8x16"
1677 | T_int16x4
-> affix
"int16x4"
1678 | T_int16x8
-> affix
"int16x8"
1679 | T_int32x2
-> affix
"int32x2"
1680 | T_int32x4
-> affix
"int32x4"
1681 | T_int64x1
-> affix
"int64x1"
1682 | T_int64x2
-> affix
"int64x2"
1683 | T_uint8x8
-> affix
"uint8x8"
1684 | T_uint8x16
-> affix
"uint8x16"
1685 | T_uint16x4
-> affix
"uint16x4"
1686 | T_uint16x8
-> affix
"uint16x8"
1687 | T_uint32x2
-> affix
"uint32x2"
1688 | T_uint32x4
-> affix
"uint32x4"
1689 | T_uint64x1
-> affix
"uint64x1"
1690 | T_uint64x2
-> affix
"uint64x2"
1691 | T_float32x2
-> affix
"float32x2"
1692 | T_float32x4
-> affix
"float32x4"
1693 | T_poly8x8
-> affix
"poly8x8"
1694 | T_poly8x16
-> affix
"poly8x16"
1695 | T_poly16x4
-> affix
"poly16x4"
1696 | T_poly16x8
-> affix
"poly16x8"
1697 | T_int8
-> affix
"int8"
1698 | T_int16
-> affix
"int16"
1699 | T_int32
-> affix
"int32"
1700 | T_int64
-> affix
"int64"
1701 | T_uint8
-> affix
"uint8"
1702 | T_uint16
-> affix
"uint16"
1703 | T_uint32
-> affix
"uint32"
1704 | T_uint64
-> affix
"uint64"
1705 | T_poly8
-> affix
"poly8"
1706 | T_poly16
-> affix
"poly16"
1707 | T_float32
-> affix
"float32"
1708 | T_immediate _
-> "const int"
1710 | T_intQI
-> "__builtin_neon_qi"
1711 | T_intHI
-> "__builtin_neon_hi"
1712 | T_intSI
-> "__builtin_neon_si"
1713 | T_intDI
-> "__builtin_neon_di"
1714 | T_floatSF
-> "__builtin_neon_sf"
1715 | T_arrayof
(num
, base
) ->
1716 let basename = name (fun x
-> x
) base
in
1717 affix
(Printf.sprintf
"%sx%d" basename num
)
1719 let basename = name affix x
in
1720 Printf.sprintf
"%s *" basename
1722 let basename = name affix x
in
1723 Printf.sprintf
"const %s" basename
1725 name (fun x
-> x ^
"_t") vt
1727 let string_of_inttype = function
1728 B_TImode
-> "__builtin_neon_ti"
1729 | B_EImode
-> "__builtin_neon_ei"
1730 | B_OImode
-> "__builtin_neon_oi"
1731 | B_CImode
-> "__builtin_neon_ci"
1732 | B_XImode
-> "__builtin_neon_xi"
1734 let string_of_mode = function
1735 V8QI
-> "v8qi" | V4HI
-> "v4hi" | V2SI
-> "v2si" | V2SF
-> "v2sf"
1736 | DI
-> "di" | V16QI
-> "v16qi" | V8HI
-> "v8hi" | V4SI
-> "v4si"
1737 | V4SF
-> "v4sf" | V2DI
-> "v2di" | QI
-> "qi" | HI
-> "hi" | SI
-> "si"
1740 (* Use uppercase chars for letters which form part of the intrinsic name, but
1741 should be omitted from the builtin name (the info is passed in an extra
1742 argument, instead). *)
1743 let intrinsic_name name = String.lowercase
name
1745 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1746 found in the features list. *)
1747 let builtin_name features
name =
1748 let name = List.fold_right
1751 Flipped x
| Builtin_name x
-> x
1754 let islower x
= let str = String.make
1 x
in (String.lowercase
str) = str
1755 and buf
= Buffer.create
(String.length
name) in
1756 String.iter
(fun c
-> if islower c
then Buffer.add_char buf c
) name;
1759 (* Transform an arity into a list of strings. *)
1760 let strings_of_arity a
=
1762 | Arity0 vt
-> [string_of_vectype vt
]
1763 | Arity1
(vt1
, vt2
) -> [string_of_vectype vt1
; string_of_vectype vt2
]
1764 | Arity2
(vt1
, vt2
, vt3
) -> [string_of_vectype vt1
;
1765 string_of_vectype vt2
;
1766 string_of_vectype vt3
]
1767 | Arity3
(vt1
, vt2
, vt3
, vt4
) -> [string_of_vectype vt1
;
1768 string_of_vectype vt2
;
1769 string_of_vectype vt3
;
1770 string_of_vectype vt4
]
1771 | Arity4
(vt1
, vt2
, vt3
, vt4
, vt5
) -> [string_of_vectype vt1
;
1772 string_of_vectype vt2
;
1773 string_of_vectype vt3
;
1774 string_of_vectype vt4
;
1775 string_of_vectype vt5
]
1777 (* Suffixes on the end of builtin names that are to be stripped in order
1778 to obtain the name used as an instruction. They are only stripped if
1779 preceded immediately by an underscore. *)
1780 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1782 (* Get the possible names of an instruction corresponding to a "name" from the
1783 ops table. This is done by getting the equivalent builtin name and
1784 stripping any suffixes from the list at the top of this file, unless
1785 the features list presents with an Instruction_name entry, in which
1786 case that is used; or unless the features list presents with a Flipped
1787 entry, in which case that is used. If both such entries are present,
1788 the first in the list will be chosen. *)
1789 let get_insn_names features
name =
1792 match List.find
(fun feature
-> match feature
with
1793 Instruction_name _
-> true
1795 | _
-> false) features
1797 Instruction_name
names -> names
1798 | Flipped
name -> [name]
1801 with Not_found
-> [builtin_name features
name]
1804 List.map
(fun name'
->
1806 let underscore = String.rindex
name' '_'
in
1807 let our_suffix = String.sub
name'
(underscore + 1)
1808 ((String.length
name'
) - underscore - 1)
1810 let rec strip remaining_suffixes
=
1811 match remaining_suffixes
with
1813 | s
::ss
when our_suffix = s
-> String.sub
name'
0 underscore
1816 strip suffixes_to_strip
1817 with (Not_found
| Invalid_argument _
) -> name'
) names
1820 (* Apply a function to each element of a list and then comma-separate
1821 the resulting strings. *)
1822 let rec commas f elts acc
=
1825 | [elt] -> acc ^
(f
elt)
1827 commas f elts
(acc ^
(f
elt) ^
", ")
1829 (* Given a list of features and the shape specified in the "ops" table, apply
1830 a function to each possible shape that the instruction may have.
1831 By default, this is the "shape" entry in "ops". If the features list
1832 contains a Disassembles_as entry, the shapes contained in that entry are
1833 mapped to corresponding outputs and returned in a list. If there is more
1834 than one Disassembles_as entry, only the first is used. *)
1835 let analyze_all_shapes features
shape f
=
1837 match List.find
(fun feature
->
1838 match feature
with Disassembles_as _
-> true
1841 Disassembles_as shapes
-> List.map f shapes
1843 with Not_found
-> [f
shape]