1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts
= S8
| S16
| S32
| S64
| F32
| U8
| U16
| U32
| U64
| P8
| P16
25 | I8
| I16
| I32
| I64
| B8
| B16
| B32
| B64
| Conv
of elts
* elts
26 | Cast
of elts
* elts
| NoElts
28 type eltclass
= Signed
| Unsigned
| Float
| Poly
| Int
| Bits
29 | ConvClass
of eltclass
* eltclass
| NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype
= T_int8x8
| T_int8x16
33 | T_int16x4
| T_int16x8
34 | T_int32x2
| T_int32x4
35 | T_int64x1
| T_int64x2
36 | T_uint8x8
| T_uint8x16
37 | T_uint16x4
| T_uint16x8
38 | T_uint32x2
| T_uint32x4
39 | T_uint64x1
| T_uint64x2
40 | T_float32x2
| T_float32x4
41 | T_poly8x8
| T_poly8x16
42 | T_poly16x4
| T_poly16x8
43 | T_immediate
of int * int
49 | T_float32
| T_arrayof
of int * vectype
50 | T_ptrto
of vectype
| T_const
of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype
= B_TImode
| B_EImode
| B_OImode
| B_CImode
| B_XImode
65 type shape_elt
= Dreg
| Qreg
| Corereg
| Immed
| VecArray
of int * shape_elt
66 | PtrTo
of shape_elt
| CstPtrTo
of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg
(* Used for "lane" variants. *)
69 | Element_of_qreg
(* Likewise. *)
70 | All_elements_of_dreg
(* Used for "dup" variants. *)
71 | Alternatives
of shape_elt list
(* Used for multiple valid operands *)
73 type shape_form
= All
of int * shape_elt
75 | Long_noreg
of shape_elt
77 | Wide_noreg
of shape_elt
81 | Binary_imm
of shape_elt
82 | Use_operands
of shape_elt array
83 | By_scalar
of shape_elt
84 | Unary_scalar
of shape_elt
87 | Pair_result
of shape_elt
89 type arity
= Arity0
of vectype
90 | Arity1
of vectype
* vectype
91 | Arity2
of vectype
* vectype
* vectype
92 | Arity3
of vectype
* vectype
* vectype
* vectype
93 | Arity4
of vectype
* vectype
* vectype
* vectype
* vectype
95 type vecmode
= V8QI
| V4HI
| V2SI
| V2SF
| DI
96 | V16QI
| V8HI
| V4SI
| V4SF
| V2DI
139 (* Ops with scalar. *)
161 (* Vector extract. *)
163 (* Reverse elements. *)
167 (* Transposition ops. *)
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
177 (* Set/extract lanes from a vector. *)
180 (* Initialize vector from bit pattern. *)
182 (* Set all lanes to same value. *)
184 | Vmov_n
(* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
187 (* Combine vectors. *)
189 (* Get quadword high/low parts. *)
192 (* Convert vectors. *)
195 (* Narrow/lengthen vectors. *)
201 (* Reinterpret casts. *)
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
214 | Flipped
of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord
(* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr
(* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as
of shape_form list
224 | Builtin_name
of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
228 | Instruction_name
of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator
of (int -> int)
240 exception MixedMode
of elts
* elts
242 let rec elt_width = function
243 S8
| U8
| P8
| I8
| B8
-> 8
244 | S16
| U16
| P16
| I16
| B16
-> 16
245 | S32
| F32
| U32
| I32
| B32
-> 32
246 | S64
| U64
| I64
| B64
-> 64
248 let wa = elt_width a
and wb
= elt_width b
in
249 if wa = wb
then wa else failwith
"element width?"
250 | Cast
(a
, b
) -> raise
(MixedMode
(a
, b
))
251 | NoElts
-> failwith
"No elts"
253 let rec elt_class = function
254 S8
| S16
| S32
| S64
-> Signed
255 | U8
| U16
| U32
| U64
-> Unsigned
258 | I8
| I16
| I32
| I64
-> Int
259 | B8
| B16
| B32
| B64
-> Bits
260 | Conv
(a
, b
) | Cast
(a
, b
) -> ConvClass
(elt_class a
, elt_class b
)
263 let elt_of_class_width c w
=
271 | Unsigned
, 16 -> U16
272 | Unsigned
, 32 -> U32
273 | Unsigned
, 64 -> U64
284 | _
-> failwith
"Bad element type"
286 (* Return unsigned integer element the same width as argument. *)
287 let unsigned_of_elt elt
=
288 elt_of_class_width Unsigned
(elt_width elt
)
290 let signed_of_elt elt
=
291 elt_of_class_width Signed
(elt_width elt
)
293 (* Return untyped bits element the same width as argument. *)
294 let bits_of_elt elt
=
295 elt_of_class_width Bits
(elt_width elt
)
297 let non_signed_variant = function
308 let poly_unsigned_variant v
=
309 let elclass = match elt_class v
with
312 elt_of_class_width elclass (elt_width v
)
315 let w = elt_width elt
316 and c
= elt_class elt
in
317 elt_of_class_width c
(w * 2)
320 let w = elt_width elt
321 and c
= elt_class elt
in
322 elt_of_class_width c
(w / 2)
324 (* If we're trying to find a mode from a "Use_operands" instruction, use the
325 last vector operand as the dominant mode used to invoke the correct builtin.
326 We must stick to this rule in neon.md. *)
327 let find_key_operand operands
=
329 match operands
.(opno
) with
332 | VecArray
(_
, Qreg
) -> Qreg
333 | VecArray
(_
, Dreg
) -> Dreg
336 scan ((Array.length operands
) - 1)
338 let rec mode_of_elt elt shape
=
339 let flt = match elt_class elt
with
340 Float
| ConvClass
(_
, Float
) -> true | _
-> false in
342 match elt_width elt
with
343 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
344 | _
-> failwith
"Bad element width"
346 All
(_
, Dreg
) | By_scalar Dreg
| Pair_result Dreg
| Unary_scalar Dreg
347 | Binary_imm Dreg
| Long_noreg Dreg
| Wide_noreg Dreg
->
348 [| V8QI
; V4HI
; if flt then V2SF
else V2SI
; DI
|].(idx)
349 | All
(_
, Qreg
) | By_scalar Qreg
| Pair_result Qreg
| Unary_scalar Qreg
350 | Binary_imm Qreg
| Long_noreg Qreg
| Wide_noreg Qreg
->
351 [| V16QI
; V8HI
; if flt then V4SF
else V4SI
; V2DI
|].(idx)
352 | All
(_
, (Corereg
| PtrTo _
| CstPtrTo _
)) ->
353 [| QI
; HI
; if flt then SF
else SI
; DI
|].(idx)
354 | Long
| Wide
| Wide_lane
| Wide_scalar
356 [| V8QI
; V4HI
; V2SI
; DI
|].(idx)
357 | Narrow
| Narrow_imm
-> [| V16QI
; V8HI
; V4SI
; V2DI
|].(idx)
358 | Use_operands ops
-> mode_of_elt elt
(All
(0, (find_key_operand ops
)))
359 | _
-> failwith
"invalid shape"
361 (* Modify an element type dependent on the shape of the instruction and the
364 let shapemap shape no
=
365 let ident = fun x
-> x
in
367 All _
| Use_operands _
| By_scalar _
| Pair_result _
| Unary_scalar _
368 | Binary_imm _
-> ident
369 | Long
| Long_noreg _
| Wide_scalar
| Long_imm
->
370 [| widen_elt; ident; ident |].(no
)
371 | Wide
| Wide_noreg _
-> [| widen_elt; widen_elt; ident |].(no
)
372 | Wide_lane
-> [| widen_elt; ident; ident; ident |].(no
)
373 | Narrow
| Narrow_imm
-> [| narrow_elt; ident; ident |].(no
)
375 (* Register type (D/Q) of an operand, based on shape and operand number. *)
377 let regmap shape no
=
379 All
(_
, reg
) | Long_noreg reg
| Wide_noreg reg
-> reg
380 | Long
-> [| Qreg
; Dreg
; Dreg
|].(no
)
381 | Wide
-> [| Qreg
; Qreg
; Dreg
|].(no
)
382 | Narrow
-> [| Dreg
; Qreg
; Qreg
|].(no
)
383 | Wide_lane
-> [| Qreg
; Dreg
; Dreg
; Immed
|].(no
)
384 | Wide_scalar
-> [| Qreg
; Dreg
; Corereg
|].(no
)
385 | By_scalar reg
-> [| reg
; reg
; Dreg
; Immed
|].(no
)
386 | Unary_scalar reg
-> [| reg
; Dreg
; Immed
|].(no
)
387 | Pair_result reg
-> [| VecArray
(2, reg
); reg
; reg
|].(no
)
388 | Binary_imm reg
-> [| reg
; reg
; Immed
|].(no
)
389 | Long_imm
-> [| Qreg
; Dreg
; Immed
|].(no
)
390 | Narrow_imm
-> [| Dreg
; Qreg
; Immed
|].(no
)
391 | Use_operands these
-> these
.(no
)
393 let type_for_elt shape elt no
=
394 let elt = (shapemap shape no
) elt in
395 let reg = regmap shape no
in
396 let rec type_for_reg_elt reg elt =
411 | _
-> failwith
"Bad elt type"
426 | _
-> failwith
"Bad elt type"
441 | _
-> failwith
"Bad elt type"
445 | VecArray
(num
, sub
) ->
446 T_arrayof
(num
, type_for_reg_elt sub
elt)
448 T_ptrto
(type_for_reg_elt x
elt)
450 T_ptrto
(T_const
(type_for_reg_elt x
elt))
451 (* Anything else is solely for the use of the test generator. *)
454 type_for_reg_elt reg elt
456 (* Return size of a vector type, in bits. *)
457 let vectype_size = function
458 T_int8x8
| T_int16x4
| T_int32x2
| T_int64x1
459 | T_uint8x8
| T_uint16x4
| T_uint32x2
| T_uint64x1
460 | T_float32x2
| T_poly8x8
| T_poly16x4
-> 64
461 | T_int8x16
| T_int16x8
| T_int32x4
| T_int64x2
462 | T_uint8x16
| T_uint16x8
| T_uint32x4
| T_uint64x2
463 | T_float32x4
| T_poly8x16
| T_poly16x8
-> 128
464 | _
-> raise Not_found
466 let inttype_for_array num elttype
=
467 let eltsize = vectype_size elttype
in
468 let numwords = (num
* eltsize) / 32 in
475 | _
-> failwith
("no int type for size " ^ string_of_int
numwords)
477 (* These functions return pairs of (internal, external) types, where "internal"
478 types are those seen by GCC, and "external" are those seen by the assembler.
479 These types aren't necessarily the same, since the intrinsics can munge more
480 than one C type into each assembler opcode. *)
482 let make_sign_invariant func shape
elt =
483 let arity, elt'
= func shape
elt in
484 arity, non_signed_variant elt'
486 (* Don't restrict any types. *)
488 let elts_same make_arity shape
elt =
489 let vtype = type_for_elt shape
elt in
490 make_arity
vtype, elt
492 (* As sign_invar_*, but when sign matters. *)
493 let elts_same_io_lane =
494 elts_same (fun vtype -> Arity4
(vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
497 elts_same (fun vtype -> Arity3
(vtype 0, vtype 0, vtype 1, vtype 2))
499 let elts_same_2_lane =
500 elts_same (fun vtype -> Arity3
(vtype 0, vtype 1, vtype 2, vtype 3))
502 let elts_same_3 = elts_same_2_lane
505 elts_same (fun vtype -> Arity2
(vtype 0, vtype 1, vtype 2))
508 elts_same (fun vtype -> Arity1
(vtype 0, vtype 1))
510 (* Use for signed/unsigned invariant operations (i.e. where the operation
511 doesn't depend on the sign of the data. *)
513 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
514 let sign_invar_io = make_sign_invariant elts_same_io
515 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
516 let sign_invar_2 = make_sign_invariant elts_same_2
517 let sign_invar_1 = make_sign_invariant elts_same_1
519 (* Sign-sensitive comparison. *)
521 let cmp_sign_matters shape
elt =
522 let vtype = type_for_elt shape
elt
523 and rtype
= type_for_elt shape
(unsigned_of_elt elt) 0 in
524 Arity2
(rtype
, vtype 1, vtype 2), elt
526 (* Signed/unsigned invariant comparison. *)
528 let cmp_sign_invar shape
elt =
529 let shape'
, elt'
= cmp_sign_matters shape elt in
531 match non_signed_variant elt'
with
537 (* Comparison (VTST) where only the element width matters. *)
539 let cmp_bits shape elt =
540 let vtype = type_for_elt shape elt
541 and rtype
= type_for_elt shape (unsigned_of_elt elt) 0
542 and bits_only
= bits_of_elt elt in
543 Arity2
(rtype
, vtype 1, vtype 2), bits_only
545 let reg_shift shape elt =
546 let vtype = type_for_elt shape elt
547 and op2type
= type_for_elt shape (signed_of_elt elt) 2 in
548 Arity2
(vtype 0, vtype 1, op2type
), elt
550 (* Genericised constant-shift type-generating function. *)
552 let const_shift mkimm ?
arity ?result
shape elt =
553 let op2type = (shapemap shape 2) elt in
554 let op2width = elt_width op2type in
555 let op2 = mkimm
op2width
556 and op1
= type_for_elt shape elt 1
560 | Some restriction
-> restriction
elt in
561 let rtype = type_for_elt shape r_elt
0 in
563 None
-> Arity2
(rtype, op1
, op2), elt
564 | Some mkarity
-> mkarity
rtype op1
op2, elt
566 (* Use for immediate right-shifts. *)
568 let shift_right shape elt =
569 const_shift (fun imm
-> T_immediate
(1, imm
)) shape elt
571 let shift_right_acc shape elt =
572 const_shift (fun imm
-> T_immediate
(1, imm
))
573 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt
575 (* Use for immediate right-shifts when the operation doesn't care about
578 let shift_right_sign_invar =
579 make_sign_invariant shift_right
581 (* Immediate right-shift; result is unsigned even when operand is signed. *)
583 let shift_right_to_uns shape elt =
584 const_shift (fun imm
-> T_immediate
(1, imm
)) ~result
:unsigned_of_elt
587 (* Immediate left-shift. *)
589 let shift_left shape elt =
590 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) shape elt
592 (* Immediate left-shift, unsigned result. *)
594 let shift_left_to_uns shape elt =
595 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) ~result
:unsigned_of_elt
598 (* Immediate left-shift, don't care about signs. *)
600 let shift_left_sign_invar =
601 make_sign_invariant shift_left
603 (* Shift left/right and insert: only element size matters. *)
605 let shift_insert shape elt =
607 const_shift (fun imm
-> T_immediate
(1, imm
))
608 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt in
609 arity, bits_of_elt elt
613 let get_lane shape elt =
614 let vtype = type_for_elt shape elt in
615 Arity2
(vtype 0, vtype 1, vtype 2),
616 (match elt with P8
-> U8
| P16
-> U16
| S32
| U32
| F32
-> B32
| x
-> x
)
618 let set_lane shape elt =
619 let vtype = type_for_elt shape elt in
620 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
622 let set_lane_notype shape elt =
623 let vtype = type_for_elt shape elt in
624 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), NoElts
626 let create_vector shape elt =
627 let vtype = type_for_elt shape U64
1
628 and rtype = type_for_elt shape elt 0 in
629 Arity1
(rtype, vtype), elt
631 let conv make_arity
shape elt =
632 let edest, esrc
= match elt with
633 Conv
(edest, esrc
) | Cast
(edest, esrc
) -> edest, esrc
634 | _
-> failwith
"Non-conversion element in conversion" in
635 let vtype = type_for_elt shape esrc
636 and rtype = type_for_elt shape edest 0 in
637 make_arity
rtype vtype, elt
639 let conv_1 = conv (fun rtype vtype -> Arity1
(rtype, vtype 1))
640 let conv_2 = conv (fun rtype vtype -> Arity2
(rtype, vtype 1, vtype 2))
642 (* Operation has an unsigned result even if operands are signed. *)
644 let dst_unsign make_arity
shape elt =
645 let vtype = type_for_elt shape elt
646 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
647 make_arity
rtype vtype, elt
649 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1
(rtype, vtype 1))
651 let make_bits_only func
shape elt =
652 let arity, elt'
= func
shape elt in
653 arity, bits_of_elt elt'
655 (* Extend operation. *)
657 let extend shape elt =
658 let vtype = type_for_elt shape elt in
659 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
661 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
662 integer ops respectively, or unsigned for polynomial ops. *)
664 let table mkarity
shape elt =
665 let vtype = type_for_elt shape elt in
666 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
667 mkarity
vtype op2, bits_of_elt elt
669 let table_2 = table (fun vtype op2 -> Arity2
(vtype 0, vtype 1, op2))
670 let table_io = table (fun vtype op2 -> Arity3
(vtype 0, vtype 0, vtype 1, op2))
672 (* Operations where only bits matter. *)
674 let bits_1 = make_bits_only elts_same_1
675 let bits_2 = make_bits_only elts_same_2
676 let bits_3 = make_bits_only elts_same_3
679 let store_1 shape elt =
680 let vtype = type_for_elt shape elt in
681 Arity2
(T_void
, vtype 0, vtype 1), bits_of_elt elt
683 let store_3 shape elt =
684 let vtype = type_for_elt shape elt in
685 Arity3
(T_void
, vtype 0, vtype 1, vtype 2), bits_of_elt elt
687 let make_notype func
shape elt =
688 let arity, _
= func
shape elt in
691 let notype_1 = make_notype elts_same_1
692 let notype_2 = make_notype elts_same_2
693 let notype_3 = make_notype elts_same_3
695 (* Bit-select operations (first operand is unsigned int). *)
697 let bit_select shape elt =
698 let vtype = type_for_elt shape elt
699 and itype
= type_for_elt shape (unsigned_of_elt elt) in
700 Arity3
(vtype 0, itype
1, vtype 2, vtype 3), NoElts
702 (* Common lists of supported element types. *)
704 let s_8_32 = [S8
; S16
; S32
]
705 let u_8_32 = [U8
; U16
; U32
]
706 let su_8_32 = [S8
; S16
; S32
; U8
; U16
; U32
]
707 let su_8_64 = S64
:: U64
:: su_8_32
708 let su_16_64 = [S16
; S32
; S64
; U16
; U32
; U64
]
709 let pf_su_8_32 = P8
:: P16
:: F32
:: su_8_32
710 let pf_su_8_64 = P8
:: P16
:: F32
:: su_8_64
715 Vadd
, [], All
(3, Dreg
), "vadd", sign_invar_2, F32
:: su_8_32;
716 Vadd
, [No_op
], All
(3, Dreg
), "vadd", sign_invar_2, [S64
; U64
];
717 Vadd
, [], All
(3, Qreg
), "vaddQ", sign_invar_2, F32
:: su_8_64;
718 Vadd
, [], Long
, "vaddl", elts_same_2, su_8_32;
719 Vadd
, [], Wide
, "vaddw", elts_same_2, su_8_32;
720 Vadd
, [Halving
], All
(3, Dreg
), "vhadd", elts_same_2, su_8_32;
721 Vadd
, [Halving
], All
(3, Qreg
), "vhaddQ", elts_same_2, su_8_32;
722 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
723 All
(3, Dreg
), "vRhadd", elts_same_2, su_8_32;
724 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
725 All
(3, Qreg
), "vRhaddQ", elts_same_2, su_8_32;
726 Vadd
, [Saturating
], All
(3, Dreg
), "vqadd", elts_same_2, su_8_64;
727 Vadd
, [Saturating
], All
(3, Qreg
), "vqaddQ", elts_same_2, su_8_64;
728 Vadd
, [High_half
], Narrow
, "vaddhn", sign_invar_2, su_16_64;
729 Vadd
, [Instruction_name
["vraddhn"]; Rounding
; High_half
],
730 Narrow
, "vRaddhn", sign_invar_2, su_16_64;
732 (* Multiplication. *)
733 Vmul
, [], All
(3, Dreg
), "vmul", sign_invar_2, P8
:: F32
:: su_8_32;
734 Vmul
, [], All
(3, Qreg
), "vmulQ", sign_invar_2, P8
:: F32
:: su_8_32;
735 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Dreg
), "vqdmulh",
736 elts_same_2, [S16
; S32
];
737 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Qreg
), "vqdmulhQ",
738 elts_same_2, [S16
; S32
];
740 [Saturating
; Rounding
; Doubling
; High_half
;
741 Instruction_name
["vqrdmulh"]],
742 All
(3, Dreg
), "vqRdmulh",
743 elts_same_2, [S16
; S32
];
745 [Saturating
; Rounding
; Doubling
; High_half
;
746 Instruction_name
["vqrdmulh"]],
747 All
(3, Qreg
), "vqRdmulhQ",
748 elts_same_2, [S16
; S32
];
749 Vmul
, [], Long
, "vmull", elts_same_2, P8
:: su_8_32;
750 Vmul
, [Saturating
; Doubling
], Long
, "vqdmull", elts_same_2, [S16
; S32
];
752 (* Multiply-accumulate. *)
753 Vmla
, [], All
(3, Dreg
), "vmla", sign_invar_io, F32
:: su_8_32;
754 Vmla
, [], All
(3, Qreg
), "vmlaQ", sign_invar_io, F32
:: su_8_32;
755 Vmla
, [], Long
, "vmlal", elts_same_io, su_8_32;
756 Vmla
, [Saturating
; Doubling
], Long
, "vqdmlal", elts_same_io, [S16
; S32
];
758 (* Multiply-subtract. *)
759 Vmls
, [], All
(3, Dreg
), "vmls", sign_invar_io, F32
:: su_8_32;
760 Vmls
, [], All
(3, Qreg
), "vmlsQ", sign_invar_io, F32
:: su_8_32;
761 Vmls
, [], Long
, "vmlsl", elts_same_io, su_8_32;
762 Vmls
, [Saturating
; Doubling
], Long
, "vqdmlsl", elts_same_io, [S16
; S32
];
765 Vsub
, [], All
(3, Dreg
), "vsub", sign_invar_2, F32
:: su_8_32;
766 Vsub
, [No_op
], All
(3, Dreg
), "vsub", sign_invar_2, [S64
; U64
];
767 Vsub
, [], All
(3, Qreg
), "vsubQ", sign_invar_2, F32
:: su_8_64;
768 Vsub
, [], Long
, "vsubl", elts_same_2, su_8_32;
769 Vsub
, [], Wide
, "vsubw", elts_same_2, su_8_32;
770 Vsub
, [Halving
], All
(3, Dreg
), "vhsub", elts_same_2, su_8_32;
771 Vsub
, [Halving
], All
(3, Qreg
), "vhsubQ", elts_same_2, su_8_32;
772 Vsub
, [Saturating
], All
(3, Dreg
), "vqsub", elts_same_2, su_8_64;
773 Vsub
, [Saturating
], All
(3, Qreg
), "vqsubQ", elts_same_2, su_8_64;
774 Vsub
, [High_half
], Narrow
, "vsubhn", sign_invar_2, su_16_64;
775 Vsub
, [Instruction_name
["vrsubhn"]; Rounding
; High_half
],
776 Narrow
, "vRsubhn", sign_invar_2, su_16_64;
778 (* Comparison, equal. *)
779 Vceq
, [], All
(3, Dreg
), "vceq", cmp_sign_invar, P8
:: F32
:: su_8_32;
780 Vceq
, [], All
(3, Qreg
), "vceqQ", cmp_sign_invar, P8
:: F32
:: su_8_32;
782 (* Comparison, greater-than or equal. *)
783 Vcge
, [], All
(3, Dreg
), "vcge", cmp_sign_matters, F32
:: s_8_32;
784 Vcge
, [Instruction_name
["vcge"]; Builtin_name
"vcgeu"],
785 All
(3, Dreg
), "vcge", cmp_sign_matters,
787 Vcge
, [], All
(3, Qreg
), "vcgeQ", cmp_sign_matters, F32
:: s_8_32;
788 Vcge
, [Instruction_name
["vcge"]; Builtin_name
"vcgeu"],
789 All
(3, Qreg
), "vcgeQ", cmp_sign_matters,
792 (* Comparison, less-than or equal. *)
793 Vcle
, [Flipped
"vcge"], All
(3, Dreg
), "vcle", cmp_sign_matters,
795 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeu"],
796 All
(3, Dreg
), "vcle", cmp_sign_matters,
798 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeQ"],
799 All
(3, Qreg
), "vcleQ", cmp_sign_matters,
801 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeuQ"],
802 All
(3, Qreg
), "vcleQ", cmp_sign_matters,
805 (* Comparison, greater-than. *)
806 Vcgt
, [], All
(3, Dreg
), "vcgt", cmp_sign_matters, F32
:: s_8_32;
807 Vcgt
, [Instruction_name
["vcgt"]; Builtin_name
"vcgtu"],
808 All
(3, Dreg
), "vcgt", cmp_sign_matters,
810 Vcgt
, [], All
(3, Qreg
), "vcgtQ", cmp_sign_matters, F32
:: s_8_32;
811 Vcgt
, [Instruction_name
["vcgt"]; Builtin_name
"vcgtu"],
812 All
(3, Qreg
), "vcgtQ", cmp_sign_matters,
815 (* Comparison, less-than. *)
816 Vclt
, [Flipped
"vcgt"], All
(3, Dreg
), "vclt", cmp_sign_matters,
818 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtu"],
819 All
(3, Dreg
), "vclt", cmp_sign_matters,
821 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtQ"],
822 All
(3, Qreg
), "vcltQ", cmp_sign_matters,
824 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtuQ"],
825 All
(3, Qreg
), "vcltQ", cmp_sign_matters,
828 (* Compare absolute greater-than or equal. *)
829 Vcage
, [Instruction_name
["vacge"]],
830 All
(3, Dreg
), "vcage", cmp_sign_matters, [F32
];
831 Vcage
, [Instruction_name
["vacge"]],
832 All
(3, Qreg
), "vcageQ", cmp_sign_matters, [F32
];
834 (* Compare absolute less-than or equal. *)
835 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcage"],
836 All
(3, Dreg
), "vcale", cmp_sign_matters, [F32
];
837 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcageQ"],
838 All
(3, Qreg
), "vcaleQ", cmp_sign_matters, [F32
];
840 (* Compare absolute greater-than or equal. *)
841 Vcagt
, [Instruction_name
["vacgt"]],
842 All
(3, Dreg
), "vcagt", cmp_sign_matters, [F32
];
843 Vcagt
, [Instruction_name
["vacgt"]],
844 All
(3, Qreg
), "vcagtQ", cmp_sign_matters, [F32
];
846 (* Compare absolute less-than or equal. *)
847 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagt"],
848 All
(3, Dreg
), "vcalt", cmp_sign_matters, [F32
];
849 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagtQ"],
850 All
(3, Qreg
), "vcaltQ", cmp_sign_matters, [F32
];
853 Vtst
, [], All
(3, Dreg
), "vtst", cmp_bits, P8
:: su_8_32;
854 Vtst
, [], All
(3, Qreg
), "vtstQ", cmp_bits, P8
:: su_8_32;
856 (* Absolute difference. *)
857 Vabd
, [], All
(3, Dreg
), "vabd", elts_same_2, F32
:: su_8_32;
858 Vabd
, [], All
(3, Qreg
), "vabdQ", elts_same_2, F32
:: su_8_32;
859 Vabd
, [], Long
, "vabdl", elts_same_2, su_8_32;
861 (* Absolute difference and accumulate. *)
862 Vaba
, [], All
(3, Dreg
), "vaba", elts_same_io, su_8_32;
863 Vaba
, [], All
(3, Qreg
), "vabaQ", elts_same_io, su_8_32;
864 Vaba
, [], Long
, "vabal", elts_same_io, su_8_32;
867 Vmax
, [], All
(3, Dreg
), "vmax", elts_same_2, F32
:: su_8_32;
868 Vmax
, [], All
(3, Qreg
), "vmaxQ", elts_same_2, F32
:: su_8_32;
871 Vmin
, [], All
(3, Dreg
), "vmin", elts_same_2, F32
:: su_8_32;
872 Vmin
, [], All
(3, Qreg
), "vminQ", elts_same_2, F32
:: su_8_32;
875 Vpadd
, [], All
(3, Dreg
), "vpadd", sign_invar_2, F32
:: su_8_32;
876 Vpadd
, [], Long_noreg Dreg
, "vpaddl", elts_same_1, su_8_32;
877 Vpadd
, [], Long_noreg Qreg
, "vpaddlQ", elts_same_1, su_8_32;
879 (* Pairwise add, widen and accumulate. *)
880 Vpada
, [], Wide_noreg Dreg
, "vpadal", elts_same_2, su_8_32;
881 Vpada
, [], Wide_noreg Qreg
, "vpadalQ", elts_same_2, su_8_32;
883 (* Folding maximum, minimum. *)
884 Vpmax
, [], All
(3, Dreg
), "vpmax", elts_same_2, F32
:: su_8_32;
885 Vpmin
, [], All
(3, Dreg
), "vpmin", elts_same_2, F32
:: su_8_32;
887 (* Reciprocal step. *)
888 Vrecps
, [], All
(3, Dreg
), "vrecps", elts_same_2, [F32
];
889 Vrecps
, [], All
(3, Qreg
), "vrecpsQ", elts_same_2, [F32
];
890 Vrsqrts
, [], All
(3, Dreg
), "vrsqrts", elts_same_2, [F32
];
891 Vrsqrts
, [], All
(3, Qreg
), "vrsqrtsQ", elts_same_2, [F32
];
893 (* Vector shift left. *)
894 Vshl
, [], All
(3, Dreg
), "vshl", reg_shift, su_8_64;
895 Vshl
, [], All
(3, Qreg
), "vshlQ", reg_shift, su_8_64;
896 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
897 All
(3, Dreg
), "vRshl", reg_shift, su_8_64;
898 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
899 All
(3, Qreg
), "vRshlQ", reg_shift, su_8_64;
900 Vshl
, [Saturating
], All
(3, Dreg
), "vqshl", reg_shift, su_8_64;
901 Vshl
, [Saturating
], All
(3, Qreg
), "vqshlQ", reg_shift, su_8_64;
902 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
903 All
(3, Dreg
), "vqRshl", reg_shift, su_8_64;
904 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
905 All
(3, Qreg
), "vqRshlQ", reg_shift, su_8_64;
907 (* Vector shift right by constant. *)
908 Vshr_n
, [], Binary_imm Dreg
, "vshr_n", shift_right, su_8_64;
909 Vshr_n
, [], Binary_imm Qreg
, "vshrQ_n", shift_right, su_8_64;
910 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Dreg
,
911 "vRshr_n", shift_right, su_8_64;
912 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Qreg
,
913 "vRshrQ_n", shift_right, su_8_64;
914 Vshr_n
, [], Narrow_imm
, "vshrn_n", shift_right_sign_invar, su_16_64;
915 Vshr_n
, [Instruction_name
["vrshrn"]; Rounding
], Narrow_imm
, "vRshrn_n",
916 shift_right_sign_invar, su_16_64;
917 Vshr_n
, [Saturating
], Narrow_imm
, "vqshrn_n", shift_right, su_16_64;
918 Vshr_n
, [Instruction_name
["vqrshrn"]; Saturating
; Rounding
], Narrow_imm
,
919 "vqRshrn_n", shift_right, su_16_64;
920 Vshr_n
, [Saturating
; Dst_unsign
], Narrow_imm
, "vqshrun_n",
921 shift_right_to_uns, [S16
; S32
; S64
];
922 Vshr_n
, [Instruction_name
["vqrshrun"]; Saturating
; Dst_unsign
; Rounding
],
923 Narrow_imm
, "vqRshrun_n", shift_right_to_uns, [S16
; S32
; S64
];
925 (* Vector shift left by constant. *)
926 Vshl_n
, [], Binary_imm Dreg
, "vshl_n", shift_left_sign_invar, su_8_64;
927 Vshl_n
, [], Binary_imm Qreg
, "vshlQ_n", shift_left_sign_invar, su_8_64;
928 Vshl_n
, [Saturating
], Binary_imm Dreg
, "vqshl_n", shift_left, su_8_64;
929 Vshl_n
, [Saturating
], Binary_imm Qreg
, "vqshlQ_n", shift_left, su_8_64;
930 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Dreg
, "vqshlu_n",
931 shift_left_to_uns, [S8
; S16
; S32
; S64
];
932 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Qreg
, "vqshluQ_n",
933 shift_left_to_uns, [S8
; S16
; S32
; S64
];
934 Vshl_n
, [], Long_imm
, "vshll_n", shift_left, su_8_32;
936 (* Vector shift right by constant and accumulate. *)
937 Vsra_n
, [], Binary_imm Dreg
, "vsra_n", shift_right_acc, su_8_64;
938 Vsra_n
, [], Binary_imm Qreg
, "vsraQ_n", shift_right_acc, su_8_64;
939 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Dreg
,
940 "vRsra_n", shift_right_acc, su_8_64;
941 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Qreg
,
942 "vRsraQ_n", shift_right_acc, su_8_64;
944 (* Vector shift right and insert. *)
945 Vsri
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsri_n", shift_insert,
946 P8
:: P16
:: su_8_64;
947 Vsri
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsriQ_n", shift_insert,
948 P8
:: P16
:: su_8_64;
950 (* Vector shift left and insert. *)
951 Vsli
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsli_n", shift_insert,
952 P8
:: P16
:: su_8_64;
953 Vsli
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsliQ_n", shift_insert,
954 P8
:: P16
:: su_8_64;
956 (* Absolute value. *)
957 Vabs
, [], All
(2, Dreg
), "vabs", elts_same_1, [S8
; S16
; S32
; F32
];
958 Vabs
, [], All
(2, Qreg
), "vabsQ", elts_same_1, [S8
; S16
; S32
; F32
];
959 Vabs
, [Saturating
], All
(2, Dreg
), "vqabs", elts_same_1, [S8
; S16
; S32
];
960 Vabs
, [Saturating
], All
(2, Qreg
), "vqabsQ", elts_same_1, [S8
; S16
; S32
];
963 Vneg
, [], All
(2, Dreg
), "vneg", elts_same_1, [S8
; S16
; S32
; F32
];
964 Vneg
, [], All
(2, Qreg
), "vnegQ", elts_same_1, [S8
; S16
; S32
; F32
];
965 Vneg
, [Saturating
], All
(2, Dreg
), "vqneg", elts_same_1, [S8
; S16
; S32
];
966 Vneg
, [Saturating
], All
(2, Qreg
), "vqnegQ", elts_same_1, [S8
; S16
; S32
];
969 Vmvn
, [], All
(2, Dreg
), "vmvn", notype_1, P8
:: su_8_32;
970 Vmvn
, [], All
(2, Qreg
), "vmvnQ", notype_1, P8
:: su_8_32;
972 (* Count leading sign bits. *)
973 Vcls
, [], All
(2, Dreg
), "vcls", elts_same_1, [S8
; S16
; S32
];
974 Vcls
, [], All
(2, Qreg
), "vclsQ", elts_same_1, [S8
; S16
; S32
];
976 (* Count leading zeros. *)
977 Vclz
, [], All
(2, Dreg
), "vclz", sign_invar_1, su_8_32;
978 Vclz
, [], All
(2, Qreg
), "vclzQ", sign_invar_1, su_8_32;
980 (* Count number of set bits. *)
981 Vcnt
, [], All
(2, Dreg
), "vcnt", bits_1, [P8
; S8
; U8
];
982 Vcnt
, [], All
(2, Qreg
), "vcntQ", bits_1, [P8
; S8
; U8
];
984 (* Reciprocal estimate. *)
985 Vrecpe
, [], All
(2, Dreg
), "vrecpe", elts_same_1, [U32
; F32
];
986 Vrecpe
, [], All
(2, Qreg
), "vrecpeQ", elts_same_1, [U32
; F32
];
988 (* Reciprocal square-root estimate. *)
989 Vrsqrte
, [], All
(2, Dreg
), "vrsqrte", elts_same_1, [U32
; F32
];
990 Vrsqrte
, [], All
(2, Qreg
), "vrsqrteQ", elts_same_1, [U32
; F32
];
992 (* Get lanes from a vector. *)
994 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
995 Instruction_name
["vmov"]],
996 Use_operands
[| Corereg
; Dreg
; Immed
|],
997 "vget_lane", get_lane, pf_su_8_32;
1001 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
1002 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1003 Use_operands
[| Corereg
; Dreg
; Immed
|],
1004 "vget_lane", notype_2, [S64
; U64
];
1006 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
1007 Instruction_name
["vmov"]],
1008 Use_operands
[| Corereg
; Qreg
; Immed
|],
1009 "vgetQ_lane", get_lane, pf_su_8_32;
1012 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
1013 Instruction_name
["vmov"; "fmrrd"]; Const_valuator
(fun _
-> 0);
1015 Use_operands
[| Corereg
; Qreg
; Immed
|],
1016 "vgetQ_lane", notype_2, [S64
; U64
];
1018 (* Set lanes in a vector. *)
1019 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
1020 Instruction_name
["vmov"]],
1021 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
1022 set_lane, pf_su_8_32;
1024 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
1025 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1026 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
1027 set_lane_notype, [S64
; U64
];
1028 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
1029 Instruction_name
["vmov"]],
1030 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1031 set_lane, pf_su_8_32;
1032 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
1033 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1034 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1035 set_lane_notype, [S64
; U64
];
1037 (* Create vector from literal bit pattern. *)
1039 [No_op
], (* Not really, but it can yield various things that are too
1040 hard for the test generator at this time. *)
1041 Use_operands
[| Dreg
; Corereg
|], "vcreate", create_vector,
1044 (* Set all lanes to the same value. *)
1046 [Disassembles_as
[Use_operands
[| Dreg
;
1047 Alternatives
[ Corereg
;
1048 Element_of_dreg
] |]]],
1049 Use_operands
[| Dreg
; Corereg
|], "vdup_n", bits_1,
1053 Instruction_name
["vmov"];
1054 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1055 Use_operands
[| Dreg
; Corereg
|], "vdup_n", notype_1,
1058 [Disassembles_as
[Use_operands
[| Qreg
;
1059 Alternatives
[ Corereg
;
1060 Element_of_dreg
] |]]],
1061 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", bits_1,
1065 Instruction_name
["vmov"];
1066 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1067 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1068 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", notype_1,
1071 (* These are just aliases for the above. *)
1073 [Builtin_name
"vdup_n";
1074 Disassembles_as
[Use_operands
[| Dreg
;
1075 Alternatives
[ Corereg
;
1076 Element_of_dreg
] |]]],
1077 Use_operands
[| Dreg
; Corereg
|],
1078 "vmov_n", bits_1, pf_su_8_32;
1081 Builtin_name
"vdup_n";
1082 Instruction_name
["vmov"];
1083 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1084 Use_operands
[| Dreg
; Corereg
|],
1085 "vmov_n", notype_1, [S64
; U64
];
1087 [Builtin_name
"vdupQ_n";
1088 Disassembles_as
[Use_operands
[| Qreg
;
1089 Alternatives
[ Corereg
;
1090 Element_of_dreg
] |]]],
1091 Use_operands
[| Qreg
; Corereg
|],
1092 "vmovQ_n", bits_1, pf_su_8_32;
1095 Builtin_name
"vdupQ_n";
1096 Instruction_name
["vmov"];
1097 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1098 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1099 Use_operands
[| Qreg
; Corereg
|],
1100 "vmovQ_n", notype_1, [S64
; U64
];
1102 (* Duplicate, lane version. We can't use Use_operands here because the
1103 rightmost register (always Dreg) would be picked up by find_key_operand,
1104 when we want the leftmost register to be used in this case (otherwise
1105 the modes are indistinguishable in neon.md, etc. *)
1107 [Disassembles_as
[Use_operands
[| Dreg
; Element_of_dreg
|]]],
1108 Unary_scalar Dreg
, "vdup_lane", bits_2, pf_su_8_32;
1110 [No_op
; Const_valuator
(fun _
-> 0)],
1111 Unary_scalar Dreg
, "vdup_lane", bits_2, [S64
; U64
];
1113 [Disassembles_as
[Use_operands
[| Qreg
; Element_of_dreg
|]]],
1114 Unary_scalar Qreg
, "vdupQ_lane", bits_2, pf_su_8_32;
1116 [No_op
; Const_valuator
(fun _
-> 0)],
1117 Unary_scalar Qreg
, "vdupQ_lane", bits_2, [S64
; U64
];
1119 (* Combining vectors. *)
1121 Use_operands
[| Qreg
; Dreg
; Dreg
|], "vcombine", notype_2,
1124 (* Splitting vectors. *)
1126 Use_operands
[| Dreg
; Qreg
|], "vget_high",
1127 notype_1, pf_su_8_64;
1128 Vget_low
, [Instruction_name
["vmov"];
1129 Disassembles_as
[Use_operands
[| Dreg
; Dreg
|]];
1131 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1132 notype_1, pf_su_8_32;
1134 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1135 notype_1, [S64
; U64
];
1138 Vcvt
, [InfoWord
], All
(2, Dreg
), "vcvt", conv_1,
1139 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1140 Vcvt
, [InfoWord
], All
(2, Qreg
), "vcvtQ", conv_1,
1141 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1142 Vcvt_n
, [InfoWord
], Use_operands
[| Dreg
; Dreg
; Immed
|], "vcvt_n", conv_2,
1143 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1144 Vcvt_n
, [InfoWord
], Use_operands
[| Qreg
; Qreg
; Immed
|], "vcvtQ_n", conv_2,
1145 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1147 (* Move, narrowing. *)
1148 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]],
1149 Narrow
, "vmovn", sign_invar_1, su_16_64;
1150 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
],
1151 Narrow
, "vqmovn", elts_same_1, su_16_64;
1153 [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
; Dst_unsign
],
1154 Narrow
, "vqmovun", dst_unsign_1,
1158 Vmovl
, [Disassembles_as
[Use_operands
[| Qreg
; Dreg
|]]],
1159 Long
, "vmovl", elts_same_1, su_8_32;
1163 [Instruction_name
["vtbl"];
1164 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1165 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbl1", table_2, [U8
; S8
; P8
];
1166 Vtbl
2, [Instruction_name
["vtbl"]],
1167 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbl2", table_2,
1169 Vtbl
3, [Instruction_name
["vtbl"]],
1170 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbl3", table_2,
1172 Vtbl
4, [Instruction_name
["vtbl"]],
1173 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbl4", table_2,
1176 (* Extended table lookup. *)
1178 [Instruction_name
["vtbx"];
1179 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1180 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbx1", table_io, [U8
; S8
; P8
];
1181 Vtbx
2, [Instruction_name
["vtbx"]],
1182 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbx2", table_io,
1184 Vtbx
3, [Instruction_name
["vtbx"]],
1185 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbx3", table_io,
1187 Vtbx
4, [Instruction_name
["vtbx"]],
1188 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbx4", table_io,
1191 (* Multiply, lane. (note: these were undocumented at the time of
1193 Vmul_lane
, [], By_scalar Dreg
, "vmul_lane", sign_invar_2_lane,
1194 [S16
; S32
; U16
; U32
; F32
];
1195 Vmul_lane
, [], By_scalar Qreg
, "vmulQ_lane", sign_invar_2_lane,
1196 [S16
; S32
; U16
; U32
; F32
];
1198 (* Multiply-accumulate, lane. *)
1199 Vmla_lane
, [], By_scalar Dreg
, "vmla_lane", sign_invar_io_lane,
1200 [S16
; S32
; U16
; U32
; F32
];
1201 Vmla_lane
, [], By_scalar Qreg
, "vmlaQ_lane", sign_invar_io_lane,
1202 [S16
; S32
; U16
; U32
; F32
];
1203 Vmla_lane
, [], Wide_lane
, "vmlal_lane", elts_same_io_lane,
1204 [S16
; S32
; U16
; U32
];
1205 Vmla_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlal_lane",
1206 elts_same_io_lane, [S16
; S32
];
1208 (* Multiply-subtract, lane. *)
1209 Vmls_lane
, [], By_scalar Dreg
, "vmls_lane", sign_invar_io_lane,
1210 [S16
; S32
; U16
; U32
; F32
];
1211 Vmls_lane
, [], By_scalar Qreg
, "vmlsQ_lane", sign_invar_io_lane,
1212 [S16
; S32
; U16
; U32
; F32
];
1213 Vmls_lane
, [], Wide_lane
, "vmlsl_lane", elts_same_io_lane,
1214 [S16
; S32
; U16
; U32
];
1215 Vmls_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlsl_lane",
1216 elts_same_io_lane, [S16
; S32
];
1218 (* Long multiply, lane. *)
1220 Wide_lane
, "vmull_lane", elts_same_2_lane, [S16
; S32
; U16
; U32
];
1222 (* Saturating doubling long multiply, lane. *)
1223 Vqdmull_lane
, [Saturating
; Doubling
],
1224 Wide_lane
, "vqdmull_lane", elts_same_2_lane, [S16
; S32
];
1226 (* Saturating doubling long multiply high, lane. *)
1227 Vqdmulh_lane
, [Saturating
; Halving
],
1228 By_scalar Qreg
, "vqdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1229 Vqdmulh_lane
, [Saturating
; Halving
],
1230 By_scalar Dreg
, "vqdmulh_lane", elts_same_2_lane, [S16
; S32
];
1231 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1232 Instruction_name
["vqrdmulh"]],
1233 By_scalar Qreg
, "vqRdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1234 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1235 Instruction_name
["vqrdmulh"]],
1236 By_scalar Dreg
, "vqRdmulh_lane", elts_same_2_lane, [S16
; S32
];
1238 (* Vector multiply by scalar. *)
1240 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1241 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmul_n",
1242 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1244 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1245 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmulQ_n",
1246 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1248 (* Vector long multiply by scalar. *)
1249 Vmull_n
, [Instruction_name
["vmull"];
1250 Disassembles_as
[Use_operands
[| Qreg
; Dreg
; Element_of_dreg
|]]],
1251 Wide_scalar
, "vmull_n",
1252 elts_same_2, [S16
; S32
; U16
; U32
];
1254 (* Vector saturating doubling long multiply by scalar. *)
1255 Vqdmull_n
, [Saturating
; Doubling
;
1256 Disassembles_as
[Use_operands
[| Qreg
; Dreg
;
1257 Element_of_dreg
|]]],
1258 Wide_scalar
, "vqdmull_n",
1259 elts_same_2, [S16
; S32
];
1261 (* Vector saturating doubling long multiply high by scalar. *)
1263 [Saturating
; Halving
; InfoWord
;
1264 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1265 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1266 "vqdmulhQ_n", elts_same_2, [S16
; S32
];
1268 [Saturating
; Halving
; InfoWord
;
1269 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1270 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1271 "vqdmulh_n", elts_same_2, [S16
; S32
];
1273 [Saturating
; Halving
; Rounding
; InfoWord
;
1274 Instruction_name
["vqrdmulh"];
1275 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1276 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1277 "vqRdmulhQ_n", elts_same_2, [S16
; S32
];
1279 [Saturating
; Halving
; Rounding
; InfoWord
;
1280 Instruction_name
["vqrdmulh"];
1281 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1282 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1283 "vqRdmulh_n", elts_same_2, [S16
; S32
];
1285 (* Vector multiply-accumulate by scalar. *)
1287 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1288 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmla_n",
1289 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1291 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1292 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlaQ_n",
1293 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1294 Vmla_n
, [], Wide_scalar
, "vmlal_n", elts_same_io, [S16
; S32
; U16
; U32
];
1295 Vmla_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlal_n", elts_same_io,
1298 (* Vector multiply subtract by scalar. *)
1300 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1301 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmls_n",
1302 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1304 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1305 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlsQ_n",
1306 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1307 Vmls_n
, [], Wide_scalar
, "vmlsl_n", elts_same_io, [S16
; S32
; U16
; U32
];
1308 Vmls_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlsl_n", elts_same_io,
1311 (* Vector extract. *)
1312 Vext
, [Const_valuator
(fun _
-> 0)],
1313 Use_operands
[| Dreg
; Dreg
; Dreg
; Immed
|], "vext", extend,
1315 Vext
, [Const_valuator
(fun _
-> 0)],
1316 Use_operands
[| Qreg
; Qreg
; Qreg
; Immed
|], "vextQ", extend,
1319 (* Reverse elements. *)
1320 Vrev64
, [], All
(2, Dreg
), "vrev64", bits_1, P8
:: P16
:: F32
:: su_8_32;
1321 Vrev64
, [], All
(2, Qreg
), "vrev64Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1322 Vrev32
, [], All
(2, Dreg
), "vrev32", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1323 Vrev32
, [], All
(2, Qreg
), "vrev32Q", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1324 Vrev16
, [], All
(2, Dreg
), "vrev16", bits_1, [P8
; S8
; U8
];
1325 Vrev16
, [], All
(2, Qreg
), "vrev16Q", bits_1, [P8
; S8
; U8
];
1327 (* Bit selection. *)
1329 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1330 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Dreg
|]]],
1331 Use_operands
[| Dreg
; Dreg
; Dreg
; Dreg
|], "vbsl", bit_select,
1334 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1335 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Qreg
|]]],
1336 Use_operands
[| Qreg
; Qreg
; Qreg
; Qreg
|], "vbslQ", bit_select,
1339 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1340 generating good code for intrinsics which return structure types --
1341 builtins work well by themselves (and understand that the values being
1342 stored on e.g. the stack also reside in registers, so can optimise the
1343 stores away entirely if the results are used immediately), but
1344 intrinsics are very much less efficient. Maybe something can be improved
1345 re: inlining, or tweaking the ABI used for intrinsics (a special call
1348 Vtrn
, [ReturnPtr
], Pair_result Dreg
, "vtrn", bits_2, pf_su_8_32;
1349 Vtrn
, [ReturnPtr
], Pair_result Qreg
, "vtrnQ", bits_2, pf_su_8_32;
1352 Vzip
, [ReturnPtr
], Pair_result Dreg
, "vzip", bits_2, pf_su_8_32;
1353 Vzip
, [ReturnPtr
], Pair_result Qreg
, "vzipQ", bits_2, pf_su_8_32;
1355 (* Unzip elements. *)
1356 Vuzp
, [ReturnPtr
], Pair_result Dreg
, "vuzp", bits_2, pf_su_8_32;
1357 Vuzp
, [ReturnPtr
], Pair_result Qreg
, "vuzpQ", bits_2, pf_su_8_32;
1359 (* Element/structure loads. VLD1 variants. *)
1361 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1362 CstPtrTo Corereg
|]]],
1363 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1", bits_1,
1365 Vldx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1366 CstPtrTo Corereg
|]]],
1367 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q", bits_1,
1371 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1372 CstPtrTo Corereg
|]]],
1373 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1374 "vld1_lane", bits_3, pf_su_8_32;
1376 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1377 CstPtrTo Corereg
|]];
1378 Const_valuator
(fun _
-> 0)],
1379 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1380 "vld1_lane", bits_3, [S64
; U64
];
1382 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1383 CstPtrTo Corereg
|]]],
1384 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1385 "vld1Q_lane", bits_3, pf_su_8_32;
1387 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1388 CstPtrTo Corereg
|]]],
1389 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1390 "vld1Q_lane", bits_3, [S64
; U64
];
1393 [Disassembles_as
[Use_operands
[| VecArray
(1, All_elements_of_dreg
);
1394 CstPtrTo Corereg
|]]],
1395 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1398 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1399 CstPtrTo Corereg
|]]],
1400 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1403 [Disassembles_as
[Use_operands
[| VecArray
(2, All_elements_of_dreg
);
1404 CstPtrTo Corereg
|]]],
1405 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1408 [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1409 CstPtrTo Corereg
|]]],
1410 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1413 (* VST1 variants. *)
1414 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1416 Use_operands
[| PtrTo Corereg
; Dreg
|], "vst1",
1417 store_1, pf_su_8_64;
1418 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1420 Use_operands
[| PtrTo Corereg
; Qreg
|], "vst1Q",
1421 store_1, pf_su_8_64;
1424 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1425 CstPtrTo Corereg
|]]],
1426 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1427 "vst1_lane", store_3, pf_su_8_32;
1429 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1430 CstPtrTo Corereg
|]];
1431 Const_valuator
(fun _
-> 0)],
1432 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1433 "vst1_lane", store_3, [U64
; S64
];
1435 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1436 CstPtrTo Corereg
|]]],
1437 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1438 "vst1Q_lane", store_3, pf_su_8_32;
1440 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1441 CstPtrTo Corereg
|]]],
1442 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1443 "vst1Q_lane", store_3, [U64
; S64
];
1445 (* VLD2 variants. *)
1446 Vldx
2, [], Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1447 "vld2", bits_1, pf_su_8_32;
1448 Vldx
2, [Instruction_name
["vld1"]],
1449 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1450 "vld2", bits_1, [S64
; U64
];
1451 Vldx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1452 CstPtrTo Corereg
|];
1453 Use_operands
[| VecArray
(2, Dreg
);
1454 CstPtrTo Corereg
|]]],
1455 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
|],
1456 "vld2Q", bits_1, pf_su_8_32;
1459 [Disassembles_as
[Use_operands
1460 [| VecArray
(2, Element_of_dreg
);
1461 CstPtrTo Corereg
|]]],
1462 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
;
1463 VecArray
(2, Dreg
); Immed
|],
1464 "vld2_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1466 [Disassembles_as
[Use_operands
1467 [| VecArray
(2, Element_of_dreg
);
1468 CstPtrTo Corereg
|]]],
1469 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
;
1470 VecArray
(2, Qreg
); Immed
|],
1471 "vld2Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1474 [Disassembles_as
[Use_operands
1475 [| VecArray
(2, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1476 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1477 "vld2_dup", bits_1, pf_su_8_32;
1479 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1480 [| VecArray
(2, Dreg
); CstPtrTo Corereg
|]]],
1481 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1482 "vld2_dup", bits_1, [S64
; U64
];
1484 (* VST2 variants. *)
1485 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1487 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1488 store_1, pf_su_8_32;
1489 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1491 Instruction_name
["vst1"]],
1492 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1493 store_1, [S64
; U64
];
1494 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1496 Use_operands
[| VecArray
(2, Dreg
);
1498 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
) |], "vst2Q",
1499 store_1, pf_su_8_32;
1502 [Disassembles_as
[Use_operands
1503 [| VecArray
(2, Element_of_dreg
);
1504 CstPtrTo Corereg
|]]],
1505 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
); Immed
|], "vst2_lane",
1506 store_3, P8
:: P16
:: F32
:: su_8_32;
1508 [Disassembles_as
[Use_operands
1509 [| VecArray
(2, Element_of_dreg
);
1510 CstPtrTo Corereg
|]]],
1511 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
); Immed
|], "vst2Q_lane",
1512 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1514 (* VLD3 variants. *)
1515 Vldx
3, [], Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1516 "vld3", bits_1, pf_su_8_32;
1517 Vldx
3, [Instruction_name
["vld1"]],
1518 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1519 "vld3", bits_1, [S64
; U64
];
1520 Vldx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1521 CstPtrTo Corereg
|];
1522 Use_operands
[| VecArray
(3, Dreg
);
1523 CstPtrTo Corereg
|]]],
1524 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
|],
1525 "vld3Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1528 [Disassembles_as
[Use_operands
1529 [| VecArray
(3, Element_of_dreg
);
1530 CstPtrTo Corereg
|]]],
1531 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
;
1532 VecArray
(3, Dreg
); Immed
|],
1533 "vld3_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1535 [Disassembles_as
[Use_operands
1536 [| VecArray
(3, Element_of_dreg
);
1537 CstPtrTo Corereg
|]]],
1538 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
;
1539 VecArray
(3, Qreg
); Immed
|],
1540 "vld3Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1543 [Disassembles_as
[Use_operands
1544 [| VecArray
(3, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1545 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1546 "vld3_dup", bits_1, pf_su_8_32;
1548 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1549 [| VecArray
(3, Dreg
); CstPtrTo Corereg
|]]],
1550 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1551 "vld3_dup", bits_1, [S64
; U64
];
1553 (* VST3 variants. *)
1554 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1556 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1557 store_1, pf_su_8_32;
1558 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1560 Instruction_name
["vst1"]],
1561 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1562 store_1, [S64
; U64
];
1563 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1565 Use_operands
[| VecArray
(3, Dreg
);
1567 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
) |], "vst3Q",
1568 store_1, pf_su_8_32;
1571 [Disassembles_as
[Use_operands
1572 [| VecArray
(3, Element_of_dreg
);
1573 CstPtrTo Corereg
|]]],
1574 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
); Immed
|], "vst3_lane",
1575 store_3, P8
:: P16
:: F32
:: su_8_32;
1577 [Disassembles_as
[Use_operands
1578 [| VecArray
(3, Element_of_dreg
);
1579 CstPtrTo Corereg
|]]],
1580 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
); Immed
|], "vst3Q_lane",
1581 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1583 (* VLD4/VST4 variants. *)
1584 Vldx
4, [], Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1585 "vld4", bits_1, pf_su_8_32;
1586 Vldx
4, [Instruction_name
["vld1"]],
1587 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1588 "vld4", bits_1, [S64
; U64
];
1589 Vldx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1590 CstPtrTo Corereg
|];
1591 Use_operands
[| VecArray
(4, Dreg
);
1592 CstPtrTo Corereg
|]]],
1593 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
|],
1594 "vld4Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1597 [Disassembles_as
[Use_operands
1598 [| VecArray
(4, Element_of_dreg
);
1599 CstPtrTo Corereg
|]]],
1600 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
;
1601 VecArray
(4, Dreg
); Immed
|],
1602 "vld4_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1604 [Disassembles_as
[Use_operands
1605 [| VecArray
(4, Element_of_dreg
);
1606 CstPtrTo Corereg
|]]],
1607 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
;
1608 VecArray
(4, Qreg
); Immed
|],
1609 "vld4Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1612 [Disassembles_as
[Use_operands
1613 [| VecArray
(4, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1614 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1615 "vld4_dup", bits_1, pf_su_8_32;
1617 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1618 [| VecArray
(4, Dreg
); CstPtrTo Corereg
|]]],
1619 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1620 "vld4_dup", bits_1, [S64
; U64
];
1622 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1624 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1625 store_1, pf_su_8_32;
1626 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1628 Instruction_name
["vst1"]],
1629 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1630 store_1, [S64
; U64
];
1631 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1633 Use_operands
[| VecArray
(4, Dreg
);
1635 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
) |], "vst4Q",
1636 store_1, pf_su_8_32;
1639 [Disassembles_as
[Use_operands
1640 [| VecArray
(4, Element_of_dreg
);
1641 CstPtrTo Corereg
|]]],
1642 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
); Immed
|], "vst4_lane",
1643 store_3, P8
:: P16
:: F32
:: su_8_32;
1645 [Disassembles_as
[Use_operands
1646 [| VecArray
(4, Element_of_dreg
);
1647 CstPtrTo Corereg
|]]],
1648 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
); Immed
|], "vst4Q_lane",
1649 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1651 (* Logical operations. And. *)
1652 Vand
, [], All
(3, Dreg
), "vand", notype_2, su_8_32;
1653 Vand
, [No_op
], All
(3, Dreg
), "vand", notype_2, [S64
; U64
];
1654 Vand
, [], All
(3, Qreg
), "vandQ", notype_2, su_8_64;
1657 Vorr
, [], All
(3, Dreg
), "vorr", notype_2, su_8_32;
1658 Vorr
, [No_op
], All
(3, Dreg
), "vorr", notype_2, [S64
; U64
];
1659 Vorr
, [], All
(3, Qreg
), "vorrQ", notype_2, su_8_64;
1662 Veor
, [], All
(3, Dreg
), "veor", notype_2, su_8_32;
1663 Veor
, [No_op
], All
(3, Dreg
), "veor", notype_2, [S64
; U64
];
1664 Veor
, [], All
(3, Qreg
), "veorQ", notype_2, su_8_64;
1666 (* Bic (And-not). *)
1667 Vbic
, [], All
(3, Dreg
), "vbic", notype_2, su_8_32;
1668 Vbic
, [No_op
], All
(3, Dreg
), "vbic", notype_2, [S64
; U64
];
1669 Vbic
, [], All
(3, Qreg
), "vbicQ", notype_2, su_8_64;
1672 Vorn
, [], All
(3, Dreg
), "vorn", notype_2, su_8_32;
1673 Vorn
, [No_op
], All
(3, Dreg
), "vorn", notype_2, [S64
; U64
];
1674 Vorn
, [], All
(3, Qreg
), "vornQ", notype_2, su_8_64;
1678 let elems = P8
:: P16
:: F32
:: su_8_64 in
1681 let types = List.fold_right
1682 (fun convfrom acc
->
1683 if convfrom
<> convto
then
1684 Cast
(convto
, convfrom
) :: acc
1690 let dconv = Vreinterp
, [No_op
], Use_operands
[| Dreg
; Dreg
|],
1691 "vreinterpret", conv_1, types
1692 and qconv
= Vreinterp
, [No_op
], Use_operands
[| Qreg
; Qreg
|],
1693 "vreinterpretQ", conv_1, types in
1694 dconv :: qconv
:: acc
)
1698 (* Output routines. *)
1700 let rec string_of_elt = function
1701 S8
-> "s8" | S16
-> "s16" | S32
-> "s32" | S64
-> "s64"
1702 | U8
-> "u8" | U16
-> "u16" | U32
-> "u32" | U64
-> "u64"
1703 | I8
-> "i8" | I16
-> "i16" | I32
-> "i32" | I64
-> "i64"
1704 | B8
-> "8" | B16
-> "16" | B32
-> "32" | B64
-> "64"
1705 | F32
-> "f32" | P8
-> "p8" | P16
-> "p16"
1706 | Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"_" ^
string_of_elt b
1707 | NoElts
-> failwith
"No elts"
1709 let string_of_elt_dots elt =
1711 Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"." ^
string_of_elt b
1712 | _
-> string_of_elt elt
1714 let string_of_vectype vt
=
1715 let rec name affix
= function
1716 T_int8x8
-> affix
"int8x8"
1717 | T_int8x16
-> affix
"int8x16"
1718 | T_int16x4
-> affix
"int16x4"
1719 | T_int16x8
-> affix
"int16x8"
1720 | T_int32x2
-> affix
"int32x2"
1721 | T_int32x4
-> affix
"int32x4"
1722 | T_int64x1
-> affix
"int64x1"
1723 | T_int64x2
-> affix
"int64x2"
1724 | T_uint8x8
-> affix
"uint8x8"
1725 | T_uint8x16
-> affix
"uint8x16"
1726 | T_uint16x4
-> affix
"uint16x4"
1727 | T_uint16x8
-> affix
"uint16x8"
1728 | T_uint32x2
-> affix
"uint32x2"
1729 | T_uint32x4
-> affix
"uint32x4"
1730 | T_uint64x1
-> affix
"uint64x1"
1731 | T_uint64x2
-> affix
"uint64x2"
1732 | T_float32x2
-> affix
"float32x2"
1733 | T_float32x4
-> affix
"float32x4"
1734 | T_poly8x8
-> affix
"poly8x8"
1735 | T_poly8x16
-> affix
"poly8x16"
1736 | T_poly16x4
-> affix
"poly16x4"
1737 | T_poly16x8
-> affix
"poly16x8"
1738 | T_int8
-> affix
"int8"
1739 | T_int16
-> affix
"int16"
1740 | T_int32
-> affix
"int32"
1741 | T_int64
-> affix
"int64"
1742 | T_uint8
-> affix
"uint8"
1743 | T_uint16
-> affix
"uint16"
1744 | T_uint32
-> affix
"uint32"
1745 | T_uint64
-> affix
"uint64"
1746 | T_poly8
-> affix
"poly8"
1747 | T_poly16
-> affix
"poly16"
1748 | T_float32
-> affix
"float32"
1749 | T_immediate _
-> "const int"
1751 | T_intQI
-> "__builtin_neon_qi"
1752 | T_intHI
-> "__builtin_neon_hi"
1753 | T_intSI
-> "__builtin_neon_si"
1754 | T_intDI
-> "__builtin_neon_di"
1755 | T_floatSF
-> "__builtin_neon_sf"
1756 | T_arrayof
(num
, base
) ->
1757 let basename = name (fun x
-> x
) base
in
1758 affix
(Printf.sprintf
"%sx%d" basename num
)
1760 let basename = name affix x
in
1761 Printf.sprintf
"%s *" basename
1763 let basename = name affix x
in
1764 Printf.sprintf
"const %s" basename
1766 name (fun x
-> x ^
"_t") vt
1768 let string_of_inttype = function
1769 B_TImode
-> "__builtin_neon_ti"
1770 | B_EImode
-> "__builtin_neon_ei"
1771 | B_OImode
-> "__builtin_neon_oi"
1772 | B_CImode
-> "__builtin_neon_ci"
1773 | B_XImode
-> "__builtin_neon_xi"
1775 let string_of_mode = function
1776 V8QI
-> "v8qi" | V4HI
-> "v4hi" | V2SI
-> "v2si" | V2SF
-> "v2sf"
1777 | DI
-> "di" | V16QI
-> "v16qi" | V8HI
-> "v8hi" | V4SI
-> "v4si"
1778 | V4SF
-> "v4sf" | V2DI
-> "v2di" | QI
-> "qi" | HI
-> "hi" | SI
-> "si"
1781 (* Use uppercase chars for letters which form part of the intrinsic name, but
1782 should be omitted from the builtin name (the info is passed in an extra
1783 argument, instead). *)
1784 let intrinsic_name name = String.lowercase
name
1786 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1787 found in the features list. *)
1788 let builtin_name features
name =
1789 let name = List.fold_right
1792 Flipped x
| Builtin_name x
-> x
1795 let islower x
= let str = String.make
1 x
in (String.lowercase
str) = str
1796 and buf
= Buffer.create
(String.length
name) in
1797 String.iter
(fun c
-> if islower c
then Buffer.add_char buf c
) name;
1800 (* Transform an arity into a list of strings. *)
1801 let strings_of_arity a
=
1803 | Arity0 vt
-> [string_of_vectype vt
]
1804 | Arity1
(vt1
, vt2
) -> [string_of_vectype vt1
; string_of_vectype vt2
]
1805 | Arity2
(vt1
, vt2
, vt3
) -> [string_of_vectype vt1
;
1806 string_of_vectype vt2
;
1807 string_of_vectype vt3
]
1808 | Arity3
(vt1
, vt2
, vt3
, vt4
) -> [string_of_vectype vt1
;
1809 string_of_vectype vt2
;
1810 string_of_vectype vt3
;
1811 string_of_vectype vt4
]
1812 | Arity4
(vt1
, vt2
, vt3
, vt4
, vt5
) -> [string_of_vectype vt1
;
1813 string_of_vectype vt2
;
1814 string_of_vectype vt3
;
1815 string_of_vectype vt4
;
1816 string_of_vectype vt5
]
1818 (* Suffixes on the end of builtin names that are to be stripped in order
1819 to obtain the name used as an instruction. They are only stripped if
1820 preceded immediately by an underscore. *)
1821 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1823 (* Get the possible names of an instruction corresponding to a "name" from the
1824 ops table. This is done by getting the equivalent builtin name and
1825 stripping any suffixes from the list at the top of this file, unless
1826 the features list presents with an Instruction_name entry, in which
1827 case that is used; or unless the features list presents with a Flipped
1828 entry, in which case that is used. If both such entries are present,
1829 the first in the list will be chosen. *)
1830 let get_insn_names features
name =
1833 match List.find
(fun feature
-> match feature
with
1834 Instruction_name _
-> true
1836 | _
-> false) features
1838 Instruction_name
names -> names
1839 | Flipped
name -> [name]
1842 with Not_found
-> [builtin_name features
name]
1845 List.map
(fun name'
->
1847 let underscore = String.rindex
name' '_'
in
1848 let our_suffix = String.sub
name'
(underscore + 1)
1849 ((String.length
name'
) - underscore - 1)
1851 let rec strip remaining_suffixes
=
1852 match remaining_suffixes
with
1854 | s
::ss
when our_suffix = s
-> String.sub
name'
0 underscore
1857 strip suffixes_to_strip
1858 with (Not_found
| Invalid_argument _
) -> name'
) names
1861 (* Apply a function to each element of a list and then comma-separate
1862 the resulting strings. *)
1863 let rec commas f elts acc
=
1866 | [elt] -> acc ^
(f
elt)
1868 commas f elts
(acc ^
(f
elt) ^
", ")
1870 (* Given a list of features and the shape specified in the "ops" table, apply
1871 a function to each possible shape that the instruction may have.
1872 By default, this is the "shape" entry in "ops". If the features list
1873 contains a Disassembles_as entry, the shapes contained in that entry are
1874 mapped to corresponding outputs and returned in a list. If there is more
1875 than one Disassembles_as entry, only the first is used. *)
1876 let analyze_all_shapes features
shape f
=
1878 match List.find
(fun feature
->
1879 match feature
with Disassembles_as _
-> true
1882 Disassembles_as shapes
-> List.map f shapes
1884 with Not_found
-> [f
shape]