1 (* Common code for ARM NEON header file, documentation and test case
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts
= S8
| S16
| S32
| S64
| F32
| U8
| U16
| U32
| U64
| P8
| P16
25 | I8
| I16
| I32
| I64
| B8
| B16
| B32
| B64
| Conv
of elts
* elts
26 | Cast
of elts
* elts
| NoElts
28 type eltclass
= Signed
| Unsigned
| Float
| Poly
| Int
| Bits
29 | ConvClass
of eltclass
* eltclass
| NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype
= T_int8x8
| T_int8x16
33 | T_int16x4
| T_int16x8
34 | T_int32x2
| T_int32x4
35 | T_int64x1
| T_int64x2
36 | T_uint8x8
| T_uint8x16
37 | T_uint16x4
| T_uint16x8
38 | T_uint32x2
| T_uint32x4
39 | T_uint64x1
| T_uint64x2
40 | T_float32x2
| T_float32x4
41 | T_poly8x8
| T_poly8x16
42 | T_poly16x4
| T_poly16x8
43 | T_immediate
of int * int
49 | T_float32
| T_arrayof
of int * vectype
50 | T_ptrto
of vectype
| T_const
of vectype
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype
= B_TImode
| B_EImode
| B_OImode
| B_CImode
| B_XImode
65 type shape_elt
= Dreg
| Qreg
| Corereg
| Immed
| VecArray
of int * shape_elt
66 | PtrTo
of shape_elt
| CstPtrTo
of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg
(* Used for "lane" variants. *)
69 | Element_of_qreg
(* Likewise. *)
70 | All_elements_of_dreg
(* Used for "dup" variants. *)
72 type shape_form
= All
of int * shape_elt
74 | Long_noreg
of shape_elt
76 | Wide_noreg
of shape_elt
80 | Binary_imm
of shape_elt
81 | Use_operands
of shape_elt array
82 | By_scalar
of shape_elt
83 | Unary_scalar
of shape_elt
86 | Pair_result
of shape_elt
88 type arity
= Arity0
of vectype
89 | Arity1
of vectype
* vectype
90 | Arity2
of vectype
* vectype
* vectype
91 | Arity3
of vectype
* vectype
* vectype
* vectype
92 | Arity4
of vectype
* vectype
* vectype
* vectype
* vectype
94 type vecmode
= V8QI
| V4HI
| V2SI
| V2SF
| DI
95 | V16QI
| V8HI
| V4SI
| V4SF
| V2DI
138 (* Ops with scalar. *)
160 (* Vector extract. *)
162 (* Reverse elements. *)
166 (* Transposition ops. *)
170 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
176 (* Set/extract lanes from a vector. *)
179 (* Initialize vector from bit pattern. *)
181 (* Set all lanes to same value. *)
183 | Vmov_n
(* Is this the same? *)
184 (* Duplicate scalar to all lanes of vector. *)
186 (* Combine vectors. *)
188 (* Get quadword high/low parts. *)
191 (* Convert vectors. *)
194 (* Narrow/lengthen vectors. *)
200 (* Reinterpret casts. *)
203 (* Features used for documentation, to distinguish between some instruction
204 variants, and to signal special requirements (e.g. swapping arguments). *)
213 | Flipped
of string (* Builtin name to use with flipped arguments. *)
214 | InfoWord
(* Pass an extra word for signage/rounding etc. (always passed
215 for All _, Long, Wide, Narrow shape_forms. *)
216 | ReturnPtr
(* Pass explicit pointer to return value as first argument. *)
217 (* A specification as to the shape of instruction expected upon
218 disassembly, used if it differs from the shape used to build the
219 intrinsic prototype. Multiple entries in the constructor's argument
220 indicate that the intrinsic expands to more than one assembly
221 instruction, each with a corresponding shape specified here. *)
222 | Disassembles_as
of shape_form list
223 | Builtin_name
of string (* Override the name of the builtin. *)
224 (* Override the name of the instruction. If more than one name
225 is specified, it means that the instruction can have any of those
227 | Instruction_name
of string list
228 (* Mark that the intrinsic yields no instructions, or expands to yield
229 behavior that the test generator cannot test. *)
231 (* Mark that the intrinsic has constant arguments that cannot be set
232 to the defaults (zero for pointers and one otherwise) in the test
233 cases. The function supplied must return the integer to be written
234 into the testcase for the argument number (0-based) supplied to it. *)
235 | Const_valuator
of (int -> int)
237 exception MixedMode
of elts
* elts
239 let rec elt_width = function
240 S8
| U8
| P8
| I8
| B8
-> 8
241 | S16
| U16
| P16
| I16
| B16
-> 16
242 | S32
| F32
| U32
| I32
| B32
-> 32
243 | S64
| U64
| I64
| B64
-> 64
245 let wa = elt_width a
and wb
= elt_width b
in
246 if wa = wb
then wa else failwith
"element width?"
247 | Cast
(a
, b
) -> raise
(MixedMode
(a
, b
))
248 | NoElts
-> failwith
"No elts"
250 let rec elt_class = function
251 S8
| S16
| S32
| S64
-> Signed
252 | U8
| U16
| U32
| U64
-> Unsigned
255 | I8
| I16
| I32
| I64
-> Int
256 | B8
| B16
| B32
| B64
-> Bits
257 | Conv
(a
, b
) | Cast
(a
, b
) -> ConvClass
(elt_class a
, elt_class b
)
260 let elt_of_class_width c w
=
268 | Unsigned
, 16 -> U16
269 | Unsigned
, 32 -> U32
270 | Unsigned
, 64 -> U64
281 | _
-> failwith
"Bad element type"
283 (* Return unsigned integer element the same width as argument. *)
284 let unsigned_of_elt elt
=
285 elt_of_class_width Unsigned
(elt_width elt
)
287 let signed_of_elt elt
=
288 elt_of_class_width Signed
(elt_width elt
)
290 (* Return untyped bits element the same width as argument. *)
291 let bits_of_elt elt
=
292 elt_of_class_width Bits
(elt_width elt
)
294 let non_signed_variant = function
305 let poly_unsigned_variant v
=
306 let elclass = match elt_class v
with
309 elt_of_class_width elclass (elt_width v
)
312 let w = elt_width elt
313 and c
= elt_class elt
in
314 elt_of_class_width c
(w * 2)
317 let w = elt_width elt
318 and c
= elt_class elt
in
319 elt_of_class_width c
(w / 2)
321 (* If we're trying to find a mode from a "Use_operands" instruction, use the
322 last vector operand as the dominant mode used to invoke the correct builtin.
323 We must stick to this rule in neon.md. *)
324 let find_key_operand operands
=
326 match operands
.(opno
) with
329 | VecArray
(_
, Qreg
) -> Qreg
330 | VecArray
(_
, Dreg
) -> Dreg
333 scan ((Array.length operands
) - 1)
335 let rec mode_of_elt elt shape
=
336 let flt = match elt_class elt
with
337 Float
| ConvClass
(_
, Float
) -> true | _
-> false in
339 match elt_width elt
with
340 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
341 | _
-> failwith
"Bad element width"
343 All
(_
, Dreg
) | By_scalar Dreg
| Pair_result Dreg
| Unary_scalar Dreg
344 | Binary_imm Dreg
| Long_noreg Dreg
| Wide_noreg Dreg
->
345 [| V8QI
; V4HI
; if flt then V2SF
else V2SI
; DI
|].(idx)
346 | All
(_
, Qreg
) | By_scalar Qreg
| Pair_result Qreg
| Unary_scalar Qreg
347 | Binary_imm Qreg
| Long_noreg Qreg
| Wide_noreg Qreg
->
348 [| V16QI
; V8HI
; if flt then V4SF
else V4SI
; V2DI
|].(idx)
349 | All
(_
, (Corereg
| PtrTo _
| CstPtrTo _
)) ->
350 [| QI
; HI
; if flt then SF
else SI
; DI
|].(idx)
351 | Long
| Wide
| Wide_lane
| Wide_scalar
353 [| V8QI
; V4HI
; V2SI
; DI
|].(idx)
354 | Narrow
| Narrow_imm
-> [| V16QI
; V8HI
; V4SI
; V2DI
|].(idx)
355 | Use_operands ops
-> mode_of_elt elt
(All
(0, (find_key_operand ops
)))
356 | _
-> failwith
"invalid shape"
358 (* Modify an element type dependent on the shape of the instruction and the
361 let shapemap shape no
=
362 let ident = fun x
-> x
in
364 All _
| Use_operands _
| By_scalar _
| Pair_result _
| Unary_scalar _
365 | Binary_imm _
-> ident
366 | Long
| Long_noreg _
| Wide_scalar
| Long_imm
->
367 [| widen_elt; ident; ident |].(no
)
368 | Wide
| Wide_noreg _
-> [| widen_elt; widen_elt; ident |].(no
)
369 | Wide_lane
-> [| widen_elt; ident; ident; ident |].(no
)
370 | Narrow
| Narrow_imm
-> [| narrow_elt; ident; ident |].(no
)
372 (* Register type (D/Q) of an operand, based on shape and operand number. *)
374 let regmap shape no
=
376 All
(_
, reg
) | Long_noreg reg
| Wide_noreg reg
-> reg
377 | Long
-> [| Qreg
; Dreg
; Dreg
|].(no
)
378 | Wide
-> [| Qreg
; Qreg
; Dreg
|].(no
)
379 | Narrow
-> [| Dreg
; Qreg
; Qreg
|].(no
)
380 | Wide_lane
-> [| Qreg
; Dreg
; Dreg
; Immed
|].(no
)
381 | Wide_scalar
-> [| Qreg
; Dreg
; Corereg
|].(no
)
382 | By_scalar reg
-> [| reg
; reg
; Dreg
; Immed
|].(no
)
383 | Unary_scalar reg
-> [| reg
; Dreg
; Immed
|].(no
)
384 | Pair_result reg
-> [| VecArray
(2, reg
); reg
; reg
|].(no
)
385 | Binary_imm reg
-> [| reg
; reg
; Immed
|].(no
)
386 | Long_imm
-> [| Qreg
; Dreg
; Immed
|].(no
)
387 | Narrow_imm
-> [| Dreg
; Qreg
; Immed
|].(no
)
388 | Use_operands these
-> these
.(no
)
390 let type_for_elt shape elt no
=
391 let elt = (shapemap shape no
) elt in
392 let reg = regmap shape no
in
393 let rec type_for_reg_elt reg elt =
408 | _
-> failwith
"Bad elt type"
423 | _
-> failwith
"Bad elt type"
438 | _
-> failwith
"Bad elt type"
442 | VecArray
(num
, sub
) ->
443 T_arrayof
(num
, type_for_reg_elt sub
elt)
445 T_ptrto
(type_for_reg_elt x
elt)
447 T_ptrto
(T_const
(type_for_reg_elt x
elt))
448 (* Anything else is solely for the use of the test generator. *)
451 type_for_reg_elt reg elt
453 (* Return size of a vector type, in bits. *)
454 let vectype_size = function
455 T_int8x8
| T_int16x4
| T_int32x2
| T_int64x1
456 | T_uint8x8
| T_uint16x4
| T_uint32x2
| T_uint64x1
457 | T_float32x2
| T_poly8x8
| T_poly16x4
-> 64
458 | T_int8x16
| T_int16x8
| T_int32x4
| T_int64x2
459 | T_uint8x16
| T_uint16x8
| T_uint32x4
| T_uint64x2
460 | T_float32x4
| T_poly8x16
| T_poly16x8
-> 128
461 | _
-> raise Not_found
463 let inttype_for_array num elttype
=
464 let eltsize = vectype_size elttype
in
465 let numwords = (num
* eltsize) / 32 in
472 | _
-> failwith
("no int type for size " ^ string_of_int
numwords)
474 (* These functions return pairs of (internal, external) types, where "internal"
475 types are those seen by GCC, and "external" are those seen by the assembler.
476 These types aren't necessarily the same, since the intrinsics can munge more
477 than one C type into each assembler opcode. *)
479 let make_sign_invariant func shape
elt =
480 let arity, elt'
= func shape
elt in
481 arity, non_signed_variant elt'
483 (* Don't restrict any types. *)
485 let elts_same make_arity shape
elt =
486 let vtype = type_for_elt shape
elt in
487 make_arity
vtype, elt
489 (* As sign_invar_*, but when sign matters. *)
490 let elts_same_io_lane =
491 elts_same (fun vtype -> Arity4
(vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
494 elts_same (fun vtype -> Arity3
(vtype 0, vtype 0, vtype 1, vtype 2))
496 let elts_same_2_lane =
497 elts_same (fun vtype -> Arity3
(vtype 0, vtype 1, vtype 2, vtype 3))
499 let elts_same_3 = elts_same_2_lane
502 elts_same (fun vtype -> Arity2
(vtype 0, vtype 1, vtype 2))
505 elts_same (fun vtype -> Arity1
(vtype 0, vtype 1))
507 (* Use for signed/unsigned invariant operations (i.e. where the operation
508 doesn't depend on the sign of the data. *)
510 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
511 let sign_invar_io = make_sign_invariant elts_same_io
512 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
513 let sign_invar_2 = make_sign_invariant elts_same_2
514 let sign_invar_1 = make_sign_invariant elts_same_1
516 (* Sign-sensitive comparison. *)
518 let cmp_sign_matters shape
elt =
519 let vtype = type_for_elt shape
elt
520 and rtype
= type_for_elt shape
(unsigned_of_elt elt) 0 in
521 Arity2
(rtype
, vtype 1, vtype 2), elt
523 (* Signed/unsigned invariant comparison. *)
525 let cmp_sign_invar shape
elt =
526 let shape'
, elt'
= cmp_sign_matters shape elt in
528 match non_signed_variant elt'
with
534 (* Comparison (VTST) where only the element width matters. *)
536 let cmp_bits shape elt =
537 let vtype = type_for_elt shape elt
538 and rtype
= type_for_elt shape (unsigned_of_elt elt) 0
539 and bits_only
= bits_of_elt elt in
540 Arity2
(rtype
, vtype 1, vtype 2), bits_only
542 let reg_shift shape elt =
543 let vtype = type_for_elt shape elt
544 and op2type
= type_for_elt shape (signed_of_elt elt) 2 in
545 Arity2
(vtype 0, vtype 1, op2type
), elt
547 (* Genericised constant-shift type-generating function. *)
549 let const_shift mkimm ?
arity ?result
shape elt =
550 let op2type = (shapemap shape 2) elt in
551 let op2width = elt_width op2type in
552 let op2 = mkimm
op2width
553 and op1
= type_for_elt shape elt 1
557 | Some restriction
-> restriction
elt in
558 let rtype = type_for_elt shape r_elt
0 in
560 None
-> Arity2
(rtype, op1
, op2), elt
561 | Some mkarity
-> mkarity
rtype op1
op2, elt
563 (* Use for immediate right-shifts. *)
565 let shift_right shape elt =
566 const_shift (fun imm
-> T_immediate
(1, imm
)) shape elt
568 let shift_right_acc shape elt =
569 const_shift (fun imm
-> T_immediate
(1, imm
))
570 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt
572 (* Use for immediate right-shifts when the operation doesn't care about
575 let shift_right_sign_invar =
576 make_sign_invariant shift_right
578 (* Immediate right-shift; result is unsigned even when operand is signed. *)
580 let shift_right_to_uns shape elt =
581 const_shift (fun imm
-> T_immediate
(1, imm
)) ~result
:unsigned_of_elt
584 (* Immediate left-shift. *)
586 let shift_left shape elt =
587 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) shape elt
589 (* Immediate left-shift, unsigned result. *)
591 let shift_left_to_uns shape elt =
592 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) ~result
:unsigned_of_elt
595 (* Immediate left-shift, don't care about signs. *)
597 let shift_left_sign_invar =
598 make_sign_invariant shift_left
600 (* Shift left/right and insert: only element size matters. *)
602 let shift_insert shape elt =
604 const_shift (fun imm
-> T_immediate
(1, imm
))
605 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt in
606 arity, bits_of_elt elt
610 let get_lane shape elt =
611 let vtype = type_for_elt shape elt in
612 Arity2
(vtype 0, vtype 1, vtype 2),
613 (match elt with P8
-> U8
| P16
-> U16
| S32
| U32
| F32
-> B32
| x
-> x
)
615 let set_lane shape elt =
616 let vtype = type_for_elt shape elt in
617 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
619 let set_lane_notype shape elt =
620 let vtype = type_for_elt shape elt in
621 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), NoElts
623 let create_vector shape elt =
624 let vtype = type_for_elt shape U64
1
625 and rtype = type_for_elt shape elt 0 in
626 Arity1
(rtype, vtype), elt
628 let conv make_arity
shape elt =
629 let edest, esrc
= match elt with
630 Conv
(edest, esrc
) | Cast
(edest, esrc
) -> edest, esrc
631 | _
-> failwith
"Non-conversion element in conversion" in
632 let vtype = type_for_elt shape esrc
633 and rtype = type_for_elt shape edest 0 in
634 make_arity
rtype vtype, elt
636 let conv_1 = conv (fun rtype vtype -> Arity1
(rtype, vtype 1))
637 let conv_2 = conv (fun rtype vtype -> Arity2
(rtype, vtype 1, vtype 2))
639 (* Operation has an unsigned result even if operands are signed. *)
641 let dst_unsign make_arity
shape elt =
642 let vtype = type_for_elt shape elt
643 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
644 make_arity
rtype vtype, elt
646 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1
(rtype, vtype 1))
648 let make_bits_only func
shape elt =
649 let arity, elt'
= func
shape elt in
650 arity, bits_of_elt elt'
652 (* Extend operation. *)
654 let extend shape elt =
655 let vtype = type_for_elt shape elt in
656 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
658 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
659 integer ops respectively, or unsigned for polynomial ops. *)
661 let table mkarity
shape elt =
662 let vtype = type_for_elt shape elt in
663 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
664 mkarity
vtype op2, bits_of_elt elt
666 let table_2 = table (fun vtype op2 -> Arity2
(vtype 0, vtype 1, op2))
667 let table_io = table (fun vtype op2 -> Arity3
(vtype 0, vtype 0, vtype 1, op2))
669 (* Operations where only bits matter. *)
671 let bits_1 = make_bits_only elts_same_1
672 let bits_2 = make_bits_only elts_same_2
673 let bits_3 = make_bits_only elts_same_3
676 let store_1 shape elt =
677 let vtype = type_for_elt shape elt in
678 Arity2
(T_void
, vtype 0, vtype 1), bits_of_elt elt
680 let store_3 shape elt =
681 let vtype = type_for_elt shape elt in
682 Arity3
(T_void
, vtype 0, vtype 1, vtype 2), bits_of_elt elt
684 let make_notype func
shape elt =
685 let arity, _
= func
shape elt in
688 let notype_1 = make_notype elts_same_1
689 let notype_2 = make_notype elts_same_2
690 let notype_3 = make_notype elts_same_3
692 (* Bit-select operations (first operand is unsigned int). *)
694 let bit_select shape elt =
695 let vtype = type_for_elt shape elt
696 and itype
= type_for_elt shape (unsigned_of_elt elt) in
697 Arity3
(vtype 0, itype
1, vtype 2, vtype 3), NoElts
699 (* Common lists of supported element types. *)
701 let su_8_32 = [S8
; S16
; S32
; U8
; U16
; U32
]
702 let su_8_64 = S64
:: U64
:: su_8_32
703 let su_16_64 = [S16
; S32
; S64
; U16
; U32
; U64
]
704 let pf_su_8_32 = P8
:: P16
:: F32
:: su_8_32
705 let pf_su_8_64 = P8
:: P16
:: F32
:: su_8_64
710 Vadd
, [], All
(3, Dreg
), "vadd", sign_invar_2, F32
:: su_8_64;
711 Vadd
, [], All
(3, Qreg
), "vaddQ", sign_invar_2, F32
:: su_8_64;
712 Vadd
, [], Long
, "vaddl", elts_same_2, su_8_32;
713 Vadd
, [], Wide
, "vaddw", elts_same_2, su_8_32;
714 Vadd
, [Halving
], All
(3, Dreg
), "vhadd", elts_same_2, su_8_32;
715 Vadd
, [Halving
], All
(3, Qreg
), "vhaddQ", elts_same_2, su_8_32;
716 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
717 All
(3, Dreg
), "vRhadd", elts_same_2, su_8_32;
718 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
719 All
(3, Qreg
), "vRhaddQ", elts_same_2, su_8_32;
720 Vadd
, [Saturating
], All
(3, Dreg
), "vqadd", elts_same_2, su_8_64;
721 Vadd
, [Saturating
], All
(3, Qreg
), "vqaddQ", elts_same_2, su_8_64;
722 Vadd
, [High_half
], Narrow
, "vaddhn", sign_invar_2, su_16_64;
723 Vadd
, [Instruction_name
["vraddhn"]; Rounding
; High_half
],
724 Narrow
, "vRaddhn", sign_invar_2, su_16_64;
726 (* Multiplication. *)
727 Vmul
, [], All
(3, Dreg
), "vmul", sign_invar_2, P8
:: F32
:: su_8_32;
728 Vmul
, [], All
(3, Qreg
), "vmulQ", sign_invar_2, P8
:: F32
:: su_8_32;
729 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Dreg
), "vqdmulh",
730 elts_same_2, [S16
; S32
];
731 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Qreg
), "vqdmulhQ",
732 elts_same_2, [S16
; S32
];
734 [Saturating
; Rounding
; Doubling
; High_half
;
735 Instruction_name
["vqrdmulh"]],
736 All
(3, Dreg
), "vqRdmulh",
737 elts_same_2, [S16
; S32
];
739 [Saturating
; Rounding
; Doubling
; High_half
;
740 Instruction_name
["vqrdmulh"]],
741 All
(3, Qreg
), "vqRdmulhQ",
742 elts_same_2, [S16
; S32
];
743 Vmul
, [], Long
, "vmull", elts_same_2, P8
:: su_8_32;
744 Vmul
, [Saturating
; Doubling
], Long
, "vqdmull", elts_same_2, [S16
; S32
];
746 (* Multiply-accumulate. *)
747 Vmla
, [], All
(3, Dreg
), "vmla", sign_invar_io, F32
:: su_8_32;
748 Vmla
, [], All
(3, Qreg
), "vmlaQ", sign_invar_io, F32
:: su_8_32;
749 Vmla
, [], Long
, "vmlal", elts_same_io, su_8_32;
750 Vmla
, [Saturating
; Doubling
], Long
, "vqdmlal", elts_same_io, [S16
; S32
];
752 (* Multiply-subtract. *)
753 Vmls
, [], All
(3, Dreg
), "vmls", sign_invar_io, F32
:: su_8_32;
754 Vmls
, [], All
(3, Qreg
), "vmlsQ", sign_invar_io, F32
:: su_8_32;
755 Vmls
, [], Long
, "vmlsl", elts_same_io, su_8_32;
756 Vmls
, [Saturating
; Doubling
], Long
, "vqdmlsl", elts_same_io, [S16
; S32
];
759 Vsub
, [], All
(3, Dreg
), "vsub", sign_invar_2, F32
:: su_8_64;
760 Vsub
, [], All
(3, Qreg
), "vsubQ", sign_invar_2, F32
:: su_8_64;
761 Vsub
, [], Long
, "vsubl", elts_same_2, su_8_32;
762 Vsub
, [], Wide
, "vsubw", elts_same_2, su_8_32;
763 Vsub
, [Halving
], All
(3, Dreg
), "vhsub", elts_same_2, su_8_32;
764 Vsub
, [Halving
], All
(3, Qreg
), "vhsubQ", elts_same_2, su_8_32;
765 Vsub
, [Saturating
], All
(3, Dreg
), "vqsub", elts_same_2, su_8_64;
766 Vsub
, [Saturating
], All
(3, Qreg
), "vqsubQ", elts_same_2, su_8_64;
767 Vsub
, [High_half
], Narrow
, "vsubhn", sign_invar_2, su_16_64;
768 Vsub
, [Instruction_name
["vrsubhn"]; Rounding
; High_half
],
769 Narrow
, "vRsubhn", sign_invar_2, su_16_64;
771 (* Comparison, equal. *)
772 Vceq
, [], All
(3, Dreg
), "vceq", cmp_sign_invar, P8
:: F32
:: su_8_32;
773 Vceq
, [], All
(3, Qreg
), "vceqQ", cmp_sign_invar, P8
:: F32
:: su_8_32;
775 (* Comparison, greater-than or equal. *)
776 Vcge
, [], All
(3, Dreg
), "vcge", cmp_sign_matters, F32
:: su_8_32;
777 Vcge
, [], All
(3, Qreg
), "vcgeQ", cmp_sign_matters, F32
:: su_8_32;
779 (* Comparison, less-than or equal. *)
780 Vcle
, [Flipped
"vcge"], All
(3, Dreg
), "vcle", cmp_sign_matters,
782 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeQ"],
783 All
(3, Qreg
), "vcleQ", cmp_sign_matters,
786 (* Comparison, greater-than. *)
787 Vcgt
, [], All
(3, Dreg
), "vcgt", cmp_sign_matters, F32
:: su_8_32;
788 Vcgt
, [], All
(3, Qreg
), "vcgtQ", cmp_sign_matters, F32
:: su_8_32;
790 (* Comparison, less-than. *)
791 Vclt
, [Flipped
"vcgt"], All
(3, Dreg
), "vclt", cmp_sign_matters,
793 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtQ"],
794 All
(3, Qreg
), "vcltQ", cmp_sign_matters,
797 (* Compare absolute greater-than or equal. *)
798 Vcage
, [Instruction_name
["vacge"]],
799 All
(3, Dreg
), "vcage", cmp_sign_matters, [F32
];
800 Vcage
, [Instruction_name
["vacge"]],
801 All
(3, Qreg
), "vcageQ", cmp_sign_matters, [F32
];
803 (* Compare absolute less-than or equal. *)
804 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcage"],
805 All
(3, Dreg
), "vcale", cmp_sign_matters, [F32
];
806 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcageQ"],
807 All
(3, Qreg
), "vcaleQ", cmp_sign_matters, [F32
];
809 (* Compare absolute greater-than or equal. *)
810 Vcagt
, [Instruction_name
["vacgt"]],
811 All
(3, Dreg
), "vcagt", cmp_sign_matters, [F32
];
812 Vcagt
, [Instruction_name
["vacgt"]],
813 All
(3, Qreg
), "vcagtQ", cmp_sign_matters, [F32
];
815 (* Compare absolute less-than or equal. *)
816 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagt"],
817 All
(3, Dreg
), "vcalt", cmp_sign_matters, [F32
];
818 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagtQ"],
819 All
(3, Qreg
), "vcaltQ", cmp_sign_matters, [F32
];
822 Vtst
, [], All
(3, Dreg
), "vtst", cmp_bits, P8
:: su_8_32;
823 Vtst
, [], All
(3, Qreg
), "vtstQ", cmp_bits, P8
:: su_8_32;
825 (* Absolute difference. *)
826 Vabd
, [], All
(3, Dreg
), "vabd", elts_same_2, F32
:: su_8_32;
827 Vabd
, [], All
(3, Qreg
), "vabdQ", elts_same_2, F32
:: su_8_32;
828 Vabd
, [], Long
, "vabdl", elts_same_2, su_8_32;
830 (* Absolute difference and accumulate. *)
831 Vaba
, [], All
(3, Dreg
), "vaba", elts_same_io, su_8_32;
832 Vaba
, [], All
(3, Qreg
), "vabaQ", elts_same_io, su_8_32;
833 Vaba
, [], Long
, "vabal", elts_same_io, su_8_32;
836 Vmax
, [], All
(3, Dreg
), "vmax", elts_same_2, F32
:: su_8_32;
837 Vmax
, [], All
(3, Qreg
), "vmaxQ", elts_same_2, F32
:: su_8_32;
840 Vmin
, [], All
(3, Dreg
), "vmin", elts_same_2, F32
:: su_8_32;
841 Vmin
, [], All
(3, Qreg
), "vminQ", elts_same_2, F32
:: su_8_32;
844 Vpadd
, [], All
(3, Dreg
), "vpadd", sign_invar_2, F32
:: su_8_32;
845 Vpadd
, [], Long_noreg Dreg
, "vpaddl", elts_same_1, su_8_32;
846 Vpadd
, [], Long_noreg Qreg
, "vpaddlQ", elts_same_1, su_8_32;
848 (* Pairwise add, widen and accumulate. *)
849 Vpada
, [], Wide_noreg Dreg
, "vpadal", elts_same_2, su_8_32;
850 Vpada
, [], Wide_noreg Qreg
, "vpadalQ", elts_same_2, su_8_32;
852 (* Folding maximum, minimum. *)
853 Vpmax
, [], All
(3, Dreg
), "vpmax", elts_same_2, F32
:: su_8_32;
854 Vpmin
, [], All
(3, Dreg
), "vpmin", elts_same_2, F32
:: su_8_32;
856 (* Reciprocal step. *)
857 Vrecps
, [], All
(3, Dreg
), "vrecps", elts_same_2, [F32
];
858 Vrecps
, [], All
(3, Qreg
), "vrecpsQ", elts_same_2, [F32
];
859 Vrsqrts
, [], All
(3, Dreg
), "vrsqrts", elts_same_2, [F32
];
860 Vrsqrts
, [], All
(3, Qreg
), "vrsqrtsQ", elts_same_2, [F32
];
862 (* Vector shift left. *)
863 Vshl
, [], All
(3, Dreg
), "vshl", reg_shift, su_8_64;
864 Vshl
, [], All
(3, Qreg
), "vshlQ", reg_shift, su_8_64;
865 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
866 All
(3, Dreg
), "vRshl", reg_shift, su_8_64;
867 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
868 All
(3, Qreg
), "vRshlQ", reg_shift, su_8_64;
869 Vshl
, [Saturating
], All
(3, Dreg
), "vqshl", reg_shift, su_8_64;
870 Vshl
, [Saturating
], All
(3, Qreg
), "vqshlQ", reg_shift, su_8_64;
871 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
872 All
(3, Dreg
), "vqRshl", reg_shift, su_8_64;
873 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
874 All
(3, Qreg
), "vqRshlQ", reg_shift, su_8_64;
876 (* Vector shift right by constant. *)
877 Vshr_n
, [], Binary_imm Dreg
, "vshr_n", shift_right, su_8_64;
878 Vshr_n
, [], Binary_imm Qreg
, "vshrQ_n", shift_right, su_8_64;
879 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Dreg
,
880 "vRshr_n", shift_right, su_8_64;
881 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Qreg
,
882 "vRshrQ_n", shift_right, su_8_64;
883 Vshr_n
, [], Narrow_imm
, "vshrn_n", shift_right_sign_invar, su_16_64;
884 Vshr_n
, [Instruction_name
["vrshrn"]; Rounding
], Narrow_imm
, "vRshrn_n",
885 shift_right_sign_invar, su_16_64;
886 Vshr_n
, [Saturating
], Narrow_imm
, "vqshrn_n", shift_right, su_16_64;
887 Vshr_n
, [Instruction_name
["vqrshrn"]; Saturating
; Rounding
], Narrow_imm
,
888 "vqRshrn_n", shift_right, su_16_64;
889 Vshr_n
, [Saturating
; Dst_unsign
], Narrow_imm
, "vqshrun_n",
890 shift_right_to_uns, [S16
; S32
; S64
];
891 Vshr_n
, [Instruction_name
["vqrshrun"]; Saturating
; Dst_unsign
; Rounding
],
892 Narrow_imm
, "vqRshrun_n", shift_right_to_uns, [S16
; S32
; S64
];
894 (* Vector shift left by constant. *)
895 Vshl_n
, [], Binary_imm Dreg
, "vshl_n", shift_left_sign_invar, su_8_64;
896 Vshl_n
, [], Binary_imm Qreg
, "vshlQ_n", shift_left_sign_invar, su_8_64;
897 Vshl_n
, [Saturating
], Binary_imm Dreg
, "vqshl_n", shift_left, su_8_64;
898 Vshl_n
, [Saturating
], Binary_imm Qreg
, "vqshlQ_n", shift_left, su_8_64;
899 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Dreg
, "vqshlu_n",
900 shift_left_to_uns, [S8
; S16
; S32
; S64
];
901 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Qreg
, "vqshluQ_n",
902 shift_left_to_uns, [S8
; S16
; S32
; S64
];
903 Vshl_n
, [], Long_imm
, "vshll_n", shift_left, su_8_32;
905 (* Vector shift right by constant and accumulate. *)
906 Vsra_n
, [], Binary_imm Dreg
, "vsra_n", shift_right_acc, su_8_64;
907 Vsra_n
, [], Binary_imm Qreg
, "vsraQ_n", shift_right_acc, su_8_64;
908 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Dreg
,
909 "vRsra_n", shift_right_acc, su_8_64;
910 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Qreg
,
911 "vRsraQ_n", shift_right_acc, su_8_64;
913 (* Vector shift right and insert. *)
914 Vsri
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsri_n", shift_insert,
915 P8
:: P16
:: su_8_64;
916 Vsri
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsriQ_n", shift_insert,
917 P8
:: P16
:: su_8_64;
919 (* Vector shift left and insert. *)
920 Vsli
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsli_n", shift_insert,
921 P8
:: P16
:: su_8_64;
922 Vsli
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsliQ_n", shift_insert,
923 P8
:: P16
:: su_8_64;
925 (* Absolute value. *)
926 Vabs
, [], All
(2, Dreg
), "vabs", elts_same_1, [S8
; S16
; S32
; F32
];
927 Vabs
, [], All
(2, Qreg
), "vabsQ", elts_same_1, [S8
; S16
; S32
; F32
];
928 Vabs
, [Saturating
], All
(2, Dreg
), "vqabs", elts_same_1, [S8
; S16
; S32
];
929 Vabs
, [Saturating
], All
(2, Qreg
), "vqabsQ", elts_same_1, [S8
; S16
; S32
];
932 Vneg
, [], All
(2, Dreg
), "vneg", elts_same_1, [S8
; S16
; S32
; F32
];
933 Vneg
, [], All
(2, Qreg
), "vnegQ", elts_same_1, [S8
; S16
; S32
; F32
];
934 Vneg
, [Saturating
], All
(2, Dreg
), "vqneg", elts_same_1, [S8
; S16
; S32
];
935 Vneg
, [Saturating
], All
(2, Qreg
), "vqnegQ", elts_same_1, [S8
; S16
; S32
];
938 Vmvn
, [], All
(2, Dreg
), "vmvn", notype_1, P8
:: su_8_32;
939 Vmvn
, [], All
(2, Qreg
), "vmvnQ", notype_1, P8
:: su_8_32;
941 (* Count leading sign bits. *)
942 Vcls
, [], All
(2, Dreg
), "vcls", elts_same_1, [S8
; S16
; S32
];
943 Vcls
, [], All
(2, Qreg
), "vclsQ", elts_same_1, [S8
; S16
; S32
];
945 (* Count leading zeros. *)
946 Vclz
, [], All
(2, Dreg
), "vclz", sign_invar_1, su_8_32;
947 Vclz
, [], All
(2, Qreg
), "vclzQ", sign_invar_1, su_8_32;
949 (* Count number of set bits. *)
950 Vcnt
, [], All
(2, Dreg
), "vcnt", bits_1, [P8
; S8
; U8
];
951 Vcnt
, [], All
(2, Qreg
), "vcntQ", bits_1, [P8
; S8
; U8
];
953 (* Reciprocal estimate. *)
954 Vrecpe
, [], All
(2, Dreg
), "vrecpe", elts_same_1, [U32
; F32
];
955 Vrecpe
, [], All
(2, Qreg
), "vrecpeQ", elts_same_1, [U32
; F32
];
957 (* Reciprocal square-root estimate. *)
958 Vrsqrte
, [], All
(2, Dreg
), "vrsqrte", elts_same_1, [U32
; F32
];
959 Vrsqrte
, [], All
(2, Qreg
), "vrsqrteQ", elts_same_1, [U32
; F32
];
961 (* Get lanes from a vector. *)
963 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
964 Instruction_name
["vmov"]],
965 Use_operands
[| Corereg
; Dreg
; Immed
|],
966 "vget_lane", get_lane, pf_su_8_32;
969 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
970 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
971 Use_operands
[| Corereg
; Dreg
; Immed
|],
972 "vget_lane", notype_2, [S64
; U64
];
974 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
975 Instruction_name
["vmov"]],
976 Use_operands
[| Corereg
; Qreg
; Immed
|],
977 "vgetQ_lane", get_lane, pf_su_8_32;
980 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
981 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
982 Use_operands
[| Corereg
; Qreg
; Immed
|],
983 "vgetQ_lane", notype_2, [S64
; U64
];
985 (* Set lanes in a vector. *)
986 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
987 Instruction_name
["vmov"]],
988 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
989 set_lane, pf_su_8_32;
990 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
991 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
992 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
993 set_lane_notype, [S64
; U64
];
994 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
995 Instruction_name
["vmov"]],
996 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
997 set_lane, pf_su_8_32;
998 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
999 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1000 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1001 set_lane_notype, [S64
; U64
];
1003 (* Create vector from literal bit pattern. *)
1005 [No_op
], (* Not really, but it can yield various things that are too
1006 hard for the test generator at this time. *)
1007 Use_operands
[| Dreg
; Corereg
|], "vcreate", create_vector,
1010 (* Set all lanes to the same value. *)
1012 Use_operands
[| Dreg
; Corereg
|], "vdup_n", bits_1,
1015 [Instruction_name
["vmov"];
1016 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1017 Use_operands
[| Dreg
; Corereg
|], "vdup_n", notype_1,
1020 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", bits_1,
1023 [Instruction_name
["vmov"];
1024 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1025 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1026 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", notype_1,
1029 (* These are just aliases for the above. *)
1031 [Builtin_name
"vdup_n"],
1032 Use_operands
[| Dreg
; Corereg
|],
1033 "vmov_n", bits_1, pf_su_8_32;
1035 [Builtin_name
"vdup_n";
1036 Instruction_name
["vmov"];
1037 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1038 Use_operands
[| Dreg
; Corereg
|],
1039 "vmov_n", notype_1, [S64
; U64
];
1041 [Builtin_name
"vdupQ_n"],
1042 Use_operands
[| Qreg
; Corereg
|],
1043 "vmovQ_n", bits_1, pf_su_8_32;
1045 [Builtin_name
"vdupQ_n";
1046 Instruction_name
["vmov"];
1047 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1048 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1049 Use_operands
[| Qreg
; Corereg
|],
1050 "vmovQ_n", notype_1, [S64
; U64
];
1052 (* Duplicate, lane version. We can't use Use_operands here because the
1053 rightmost register (always Dreg) would be picked up by find_key_operand,
1054 when we want the leftmost register to be used in this case (otherwise
1055 the modes are indistinguishable in neon.md, etc. *)
1057 [Disassembles_as
[Use_operands
[| Dreg
; Element_of_dreg
|]]],
1058 Unary_scalar Dreg
, "vdup_lane", bits_2, pf_su_8_32;
1060 [No_op
; Const_valuator
(fun _
-> 0)],
1061 Unary_scalar Dreg
, "vdup_lane", bits_2, [S64
; U64
];
1063 [Disassembles_as
[Use_operands
[| Qreg
; Element_of_dreg
|]]],
1064 Unary_scalar Qreg
, "vdupQ_lane", bits_2, pf_su_8_32;
1066 [No_op
; Const_valuator
(fun _
-> 0)],
1067 Unary_scalar Qreg
, "vdupQ_lane", bits_2, [S64
; U64
];
1069 (* Combining vectors. *)
1071 Use_operands
[| Qreg
; Dreg
; Dreg
|], "vcombine", notype_2,
1074 (* Splitting vectors. *)
1076 Use_operands
[| Dreg
; Qreg
|], "vget_high",
1077 notype_1, pf_su_8_64;
1078 Vget_low
, [Instruction_name
["vmov"];
1079 Disassembles_as
[Use_operands
[| Dreg
; Dreg
|]]],
1080 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1081 notype_1, pf_su_8_64;
1084 Vcvt
, [InfoWord
], All
(2, Dreg
), "vcvt", conv_1,
1085 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1086 Vcvt
, [InfoWord
], All
(2, Qreg
), "vcvtQ", conv_1,
1087 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1088 Vcvt_n
, [InfoWord
], Use_operands
[| Dreg
; Dreg
; Immed
|], "vcvt_n", conv_2,
1089 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1090 Vcvt_n
, [InfoWord
], Use_operands
[| Qreg
; Qreg
; Immed
|], "vcvtQ_n", conv_2,
1091 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1093 (* Move, narrowing. *)
1094 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]],
1095 Narrow
, "vmovn", sign_invar_1, su_16_64;
1096 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
],
1097 Narrow
, "vqmovn", elts_same_1, su_16_64;
1099 [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
; Dst_unsign
],
1100 Narrow
, "vqmovun", dst_unsign_1,
1104 Vmovl
, [Disassembles_as
[Use_operands
[| Qreg
; Dreg
|]]],
1105 Long
, "vmovl", elts_same_1, su_8_32;
1109 [Instruction_name
["vtbl"];
1110 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1111 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbl1", table_2, [U8
; S8
; P8
];
1112 Vtbl
2, [Instruction_name
["vtbl"]],
1113 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbl2", table_2,
1115 Vtbl
3, [Instruction_name
["vtbl"]],
1116 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbl3", table_2,
1118 Vtbl
4, [Instruction_name
["vtbl"]],
1119 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbl4", table_2,
1122 (* Extended table lookup. *)
1124 [Instruction_name
["vtbx"];
1125 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1126 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbx1", table_io, [U8
; S8
; P8
];
1127 Vtbx
2, [Instruction_name
["vtbx"]],
1128 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbx2", table_io,
1130 Vtbx
3, [Instruction_name
["vtbx"]],
1131 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbx3", table_io,
1133 Vtbx
4, [Instruction_name
["vtbx"]],
1134 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbx4", table_io,
1137 (* Multiply, lane. (note: these were undocumented at the time of
1139 Vmul_lane
, [], By_scalar Dreg
, "vmul_lane", sign_invar_2_lane,
1140 [S16
; S32
; U16
; U32
; F32
];
1141 Vmul_lane
, [], By_scalar Qreg
, "vmulQ_lane", sign_invar_2_lane,
1142 [S16
; S32
; U16
; U32
; F32
];
1144 (* Multiply-accumulate, lane. *)
1145 Vmla_lane
, [], By_scalar Dreg
, "vmla_lane", sign_invar_io_lane,
1146 [S16
; S32
; U16
; U32
; F32
];
1147 Vmla_lane
, [], By_scalar Qreg
, "vmlaQ_lane", sign_invar_io_lane,
1148 [S16
; S32
; U16
; U32
; F32
];
1149 Vmla_lane
, [], Wide_lane
, "vmlal_lane", elts_same_io_lane,
1150 [S16
; S32
; U16
; U32
];
1151 Vmla_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlal_lane",
1152 elts_same_io_lane, [S16
; S32
];
1154 (* Multiply-subtract, lane. *)
1155 Vmls_lane
, [], By_scalar Dreg
, "vmls_lane", sign_invar_io_lane,
1156 [S16
; S32
; U16
; U32
; F32
];
1157 Vmls_lane
, [], By_scalar Qreg
, "vmlsQ_lane", sign_invar_io_lane,
1158 [S16
; S32
; U16
; U32
; F32
];
1159 Vmls_lane
, [], Wide_lane
, "vmlsl_lane", elts_same_io_lane,
1160 [S16
; S32
; U16
; U32
];
1161 Vmls_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlsl_lane",
1162 elts_same_io_lane, [S16
; S32
];
1164 (* Long multiply, lane. *)
1166 Wide_lane
, "vmull_lane", elts_same_2_lane, [S16
; S32
; U16
; U32
];
1168 (* Saturating doubling long multiply, lane. *)
1169 Vqdmull_lane
, [Saturating
; Doubling
],
1170 Wide_lane
, "vqdmull_lane", elts_same_2_lane, [S16
; S32
];
1172 (* Saturating doubling long multiply high, lane. *)
1173 Vqdmulh_lane
, [Saturating
; Halving
],
1174 By_scalar Qreg
, "vqdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1175 Vqdmulh_lane
, [Saturating
; Halving
],
1176 By_scalar Dreg
, "vqdmulh_lane", elts_same_2_lane, [S16
; S32
];
1177 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1178 Instruction_name
["vqrdmulh"]],
1179 By_scalar Qreg
, "vqRdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1180 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1181 Instruction_name
["vqrdmulh"]],
1182 By_scalar Dreg
, "vqRdmulh_lane", elts_same_2_lane, [S16
; S32
];
1184 (* Vector multiply by scalar. *)
1186 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1187 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmul_n",
1188 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1190 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1191 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmulQ_n",
1192 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1194 (* Vector long multiply by scalar. *)
1195 Vmull_n
, [Instruction_name
["vmull"];
1196 Disassembles_as
[Use_operands
[| Qreg
; Dreg
; Element_of_dreg
|]]],
1197 Wide_scalar
, "vmull_n",
1198 elts_same_2, [S16
; S32
; U16
; U32
];
1200 (* Vector saturating doubling long multiply by scalar. *)
1201 Vqdmull_n
, [Saturating
; Doubling
;
1202 Disassembles_as
[Use_operands
[| Qreg
; Dreg
;
1203 Element_of_dreg
|]]],
1204 Wide_scalar
, "vqdmull_n",
1205 elts_same_2, [S16
; S32
];
1207 (* Vector saturating doubling long multiply high by scalar. *)
1209 [Saturating
; Halving
; InfoWord
;
1210 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1211 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1212 "vqdmulhQ_n", elts_same_2, [S16
; S32
];
1214 [Saturating
; Halving
; InfoWord
;
1215 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1216 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1217 "vqdmulh_n", elts_same_2, [S16
; S32
];
1219 [Saturating
; Halving
; Rounding
; InfoWord
;
1220 Instruction_name
["vqrdmulh"];
1221 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1222 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1223 "vqRdmulhQ_n", elts_same_2, [S16
; S32
];
1225 [Saturating
; Halving
; Rounding
; InfoWord
;
1226 Instruction_name
["vqrdmulh"];
1227 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1228 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1229 "vqRdmulh_n", elts_same_2, [S16
; S32
];
1231 (* Vector multiply-accumulate by scalar. *)
1233 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1234 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmla_n",
1235 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1237 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1238 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlaQ_n",
1239 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1240 Vmla_n
, [], Wide_scalar
, "vmlal_n", elts_same_io, [S16
; S32
; U16
; U32
];
1241 Vmla_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlal_n", elts_same_io,
1244 (* Vector multiply subtract by scalar. *)
1246 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1247 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmls_n",
1248 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1250 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1251 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlsQ_n",
1252 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1253 Vmls_n
, [], Wide_scalar
, "vmlsl_n", elts_same_io, [S16
; S32
; U16
; U32
];
1254 Vmls_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlsl_n", elts_same_io,
1257 (* Vector extract. *)
1258 Vext
, [Const_valuator
(fun _
-> 0)],
1259 Use_operands
[| Dreg
; Dreg
; Dreg
; Immed
|], "vext", extend,
1261 Vext
, [Const_valuator
(fun _
-> 0)],
1262 Use_operands
[| Qreg
; Qreg
; Qreg
; Immed
|], "vextQ", extend,
1265 (* Reverse elements. *)
1266 Vrev64
, [], All
(2, Dreg
), "vrev64", bits_1, P8
:: P16
:: F32
:: su_8_32;
1267 Vrev64
, [], All
(2, Qreg
), "vrev64Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1268 Vrev32
, [], All
(2, Dreg
), "vrev32", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1269 Vrev32
, [], All
(2, Qreg
), "vrev32Q", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1270 Vrev16
, [], All
(2, Dreg
), "vrev16", bits_1, [P8
; S8
; U8
];
1271 Vrev16
, [], All
(2, Qreg
), "vrev16Q", bits_1, [P8
; S8
; U8
];
1273 (* Bit selection. *)
1275 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1276 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Dreg
|]]],
1277 Use_operands
[| Dreg
; Dreg
; Dreg
; Dreg
|], "vbsl", bit_select,
1280 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1281 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Qreg
|]]],
1282 Use_operands
[| Qreg
; Qreg
; Qreg
; Qreg
|], "vbslQ", bit_select,
1285 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1286 generating good code for intrinsics which return structure types --
1287 builtins work well by themselves (and understand that the values being
1288 stored on e.g. the stack also reside in registers, so can optimise the
1289 stores away entirely if the results are used immediately), but
1290 intrinsics are very much less efficient. Maybe something can be improved
1291 re: inlining, or tweaking the ABI used for intrinsics (a special call
1294 Vtrn
, [ReturnPtr
], Pair_result Dreg
, "vtrn", bits_2, pf_su_8_32;
1295 Vtrn
, [ReturnPtr
], Pair_result Qreg
, "vtrnQ", bits_2, pf_su_8_32;
1298 Vzip
, [ReturnPtr
], Pair_result Dreg
, "vzip", bits_2, pf_su_8_32;
1299 Vzip
, [ReturnPtr
], Pair_result Qreg
, "vzipQ", bits_2, pf_su_8_32;
1301 (* Unzip elements. *)
1302 Vuzp
, [ReturnPtr
], Pair_result Dreg
, "vuzp", bits_2, pf_su_8_32;
1303 Vuzp
, [ReturnPtr
], Pair_result Qreg
, "vuzpQ", bits_2, pf_su_8_32;
1305 (* Element/structure loads. VLD1 variants. *)
1307 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1308 CstPtrTo Corereg
|]]],
1309 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1", bits_1,
1311 Vldx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1312 CstPtrTo Corereg
|]]],
1313 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q", bits_1,
1317 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1318 CstPtrTo Corereg
|]]],
1319 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1320 "vld1_lane", bits_3, pf_su_8_32;
1322 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1323 CstPtrTo Corereg
|]];
1324 Const_valuator
(fun _
-> 0)],
1325 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1326 "vld1_lane", bits_3, [S64
; U64
];
1328 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1329 CstPtrTo Corereg
|]]],
1330 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1331 "vld1Q_lane", bits_3, pf_su_8_32;
1333 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1334 CstPtrTo Corereg
|]]],
1335 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1336 "vld1Q_lane", bits_3, [S64
; U64
];
1339 [Disassembles_as
[Use_operands
[| VecArray
(1, All_elements_of_dreg
);
1340 CstPtrTo Corereg
|]]],
1341 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1344 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1345 CstPtrTo Corereg
|]]],
1346 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1349 [Disassembles_as
[Use_operands
[| VecArray
(2, All_elements_of_dreg
);
1350 CstPtrTo Corereg
|]]],
1351 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1354 [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1355 CstPtrTo Corereg
|]]],
1356 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1359 (* VST1 variants. *)
1360 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1362 Use_operands
[| PtrTo Corereg
; Dreg
|], "vst1",
1363 store_1, pf_su_8_64;
1364 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1366 Use_operands
[| PtrTo Corereg
; Qreg
|], "vst1Q",
1367 store_1, pf_su_8_64;
1370 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1371 CstPtrTo Corereg
|]]],
1372 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1373 "vst1_lane", store_3, pf_su_8_32;
1375 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1376 CstPtrTo Corereg
|]];
1377 Const_valuator
(fun _
-> 0)],
1378 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1379 "vst1_lane", store_3, [U64
; S64
];
1381 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1382 CstPtrTo Corereg
|]]],
1383 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1384 "vst1Q_lane", store_3, pf_su_8_32;
1386 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1387 CstPtrTo Corereg
|]]],
1388 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1389 "vst1Q_lane", store_3, [U64
; S64
];
1391 (* VLD2 variants. *)
1392 Vldx
2, [], Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1393 "vld2", bits_1, pf_su_8_32;
1394 Vldx
2, [Instruction_name
["vld1"]],
1395 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1396 "vld2", bits_1, [S64
; U64
];
1397 Vldx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1398 CstPtrTo Corereg
|];
1399 Use_operands
[| VecArray
(2, Dreg
);
1400 CstPtrTo Corereg
|]]],
1401 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
|],
1402 "vld2Q", bits_1, pf_su_8_32;
1405 [Disassembles_as
[Use_operands
1406 [| VecArray
(2, Element_of_dreg
);
1407 CstPtrTo Corereg
|]]],
1408 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
;
1409 VecArray
(2, Dreg
); Immed
|],
1410 "vld2_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1412 [Disassembles_as
[Use_operands
1413 [| VecArray
(2, Element_of_dreg
);
1414 CstPtrTo Corereg
|]]],
1415 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
;
1416 VecArray
(2, Qreg
); Immed
|],
1417 "vld2Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1420 [Disassembles_as
[Use_operands
1421 [| VecArray
(2, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1422 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1423 "vld2_dup", bits_1, pf_su_8_32;
1425 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1426 [| VecArray
(2, Dreg
); CstPtrTo Corereg
|]]],
1427 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1428 "vld2_dup", bits_1, [S64
; U64
];
1430 (* VST2 variants. *)
1431 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1433 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1434 store_1, pf_su_8_32;
1435 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1437 Instruction_name
["vst1"]],
1438 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1439 store_1, [S64
; U64
];
1440 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1442 Use_operands
[| VecArray
(2, Dreg
);
1444 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
) |], "vst2Q",
1445 store_1, pf_su_8_32;
1448 [Disassembles_as
[Use_operands
1449 [| VecArray
(2, Element_of_dreg
);
1450 CstPtrTo Corereg
|]]],
1451 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
); Immed
|], "vst2_lane",
1452 store_3, P8
:: P16
:: F32
:: su_8_32;
1454 [Disassembles_as
[Use_operands
1455 [| VecArray
(2, Element_of_dreg
);
1456 CstPtrTo Corereg
|]]],
1457 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
); Immed
|], "vst2Q_lane",
1458 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1460 (* VLD3 variants. *)
1461 Vldx
3, [], Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1462 "vld3", bits_1, pf_su_8_32;
1463 Vldx
3, [Instruction_name
["vld1"]],
1464 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1465 "vld3", bits_1, [S64
; U64
];
1466 Vldx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1467 CstPtrTo Corereg
|];
1468 Use_operands
[| VecArray
(3, Dreg
);
1469 CstPtrTo Corereg
|]]],
1470 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
|],
1471 "vld3Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1474 [Disassembles_as
[Use_operands
1475 [| VecArray
(3, Element_of_dreg
);
1476 CstPtrTo Corereg
|]]],
1477 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
;
1478 VecArray
(3, Dreg
); Immed
|],
1479 "vld3_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1481 [Disassembles_as
[Use_operands
1482 [| VecArray
(3, Element_of_dreg
);
1483 CstPtrTo Corereg
|]]],
1484 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
;
1485 VecArray
(3, Qreg
); Immed
|],
1486 "vld3Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1489 [Disassembles_as
[Use_operands
1490 [| VecArray
(3, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1491 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1492 "vld3_dup", bits_1, pf_su_8_32;
1494 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1495 [| VecArray
(3, Dreg
); CstPtrTo Corereg
|]]],
1496 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1497 "vld3_dup", bits_1, [S64
; U64
];
1499 (* VST3 variants. *)
1500 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1502 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1503 store_1, pf_su_8_32;
1504 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1506 Instruction_name
["vst1"]],
1507 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1508 store_1, [S64
; U64
];
1509 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1511 Use_operands
[| VecArray
(3, Dreg
);
1513 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
) |], "vst3Q",
1514 store_1, pf_su_8_32;
1517 [Disassembles_as
[Use_operands
1518 [| VecArray
(3, Element_of_dreg
);
1519 CstPtrTo Corereg
|]]],
1520 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
); Immed
|], "vst3_lane",
1521 store_3, P8
:: P16
:: F32
:: su_8_32;
1523 [Disassembles_as
[Use_operands
1524 [| VecArray
(3, Element_of_dreg
);
1525 CstPtrTo Corereg
|]]],
1526 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
); Immed
|], "vst3Q_lane",
1527 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1529 (* VLD4/VST4 variants. *)
1530 Vldx
4, [], Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1531 "vld4", bits_1, pf_su_8_32;
1532 Vldx
4, [Instruction_name
["vld1"]],
1533 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1534 "vld4", bits_1, [S64
; U64
];
1535 Vldx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1536 CstPtrTo Corereg
|];
1537 Use_operands
[| VecArray
(4, Dreg
);
1538 CstPtrTo Corereg
|]]],
1539 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
|],
1540 "vld4Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1543 [Disassembles_as
[Use_operands
1544 [| VecArray
(4, Element_of_dreg
);
1545 CstPtrTo Corereg
|]]],
1546 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
;
1547 VecArray
(4, Dreg
); Immed
|],
1548 "vld4_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1550 [Disassembles_as
[Use_operands
1551 [| VecArray
(4, Element_of_dreg
);
1552 CstPtrTo Corereg
|]]],
1553 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
;
1554 VecArray
(4, Qreg
); Immed
|],
1555 "vld4Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1558 [Disassembles_as
[Use_operands
1559 [| VecArray
(4, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1560 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1561 "vld4_dup", bits_1, pf_su_8_32;
1563 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1564 [| VecArray
(4, Dreg
); CstPtrTo Corereg
|]]],
1565 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1566 "vld4_dup", bits_1, [S64
; U64
];
1568 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1570 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1571 store_1, pf_su_8_32;
1572 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1574 Instruction_name
["vst1"]],
1575 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1576 store_1, [S64
; U64
];
1577 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1579 Use_operands
[| VecArray
(4, Dreg
);
1581 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
) |], "vst4Q",
1582 store_1, pf_su_8_32;
1585 [Disassembles_as
[Use_operands
1586 [| VecArray
(4, Element_of_dreg
);
1587 CstPtrTo Corereg
|]]],
1588 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
); Immed
|], "vst4_lane",
1589 store_3, P8
:: P16
:: F32
:: su_8_32;
1591 [Disassembles_as
[Use_operands
1592 [| VecArray
(4, Element_of_dreg
);
1593 CstPtrTo Corereg
|]]],
1594 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
); Immed
|], "vst4Q_lane",
1595 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1597 (* Logical operations. And. *)
1598 Vand
, [], All
(3, Dreg
), "vand", notype_2, su_8_64;
1599 Vand
, [], All
(3, Qreg
), "vandQ", notype_2, su_8_64;
1602 Vorr
, [], All
(3, Dreg
), "vorr", notype_2, su_8_64;
1603 Vorr
, [], All
(3, Qreg
), "vorrQ", notype_2, su_8_64;
1606 Veor
, [], All
(3, Dreg
), "veor", notype_2, su_8_64;
1607 Veor
, [], All
(3, Qreg
), "veorQ", notype_2, su_8_64;
1609 (* Bic (And-not). *)
1610 Vbic
, [], All
(3, Dreg
), "vbic", notype_2, su_8_64;
1611 Vbic
, [], All
(3, Qreg
), "vbicQ", notype_2, su_8_64;
1614 Vorn
, [], All
(3, Dreg
), "vorn", notype_2, su_8_64;
1615 Vorn
, [], All
(3, Qreg
), "vornQ", notype_2, su_8_64;
1619 let elems = P8
:: P16
:: F32
:: su_8_64 in
1622 let types = List.fold_right
1623 (fun convfrom acc
->
1624 if convfrom
<> convto
then
1625 Cast
(convto
, convfrom
) :: acc
1631 let dconv = Vreinterp
, [No_op
], Use_operands
[| Dreg
; Dreg
|],
1632 "vreinterpret", conv_1, types
1633 and qconv
= Vreinterp
, [No_op
], Use_operands
[| Qreg
; Qreg
|],
1634 "vreinterpretQ", conv_1, types in
1635 dconv :: qconv
:: acc
)
1639 (* Output routines. *)
1641 let rec string_of_elt = function
1642 S8
-> "s8" | S16
-> "s16" | S32
-> "s32" | S64
-> "s64"
1643 | U8
-> "u8" | U16
-> "u16" | U32
-> "u32" | U64
-> "u64"
1644 | I8
-> "i8" | I16
-> "i16" | I32
-> "i32" | I64
-> "i64"
1645 | B8
-> "8" | B16
-> "16" | B32
-> "32" | B64
-> "64"
1646 | F32
-> "f32" | P8
-> "p8" | P16
-> "p16"
1647 | Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"_" ^
string_of_elt b
1648 | NoElts
-> failwith
"No elts"
1650 let string_of_elt_dots elt =
1652 Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"." ^
string_of_elt b
1653 | _
-> string_of_elt elt
1655 let string_of_vectype vt
=
1656 let rec name affix
= function
1657 T_int8x8
-> affix
"int8x8"
1658 | T_int8x16
-> affix
"int8x16"
1659 | T_int16x4
-> affix
"int16x4"
1660 | T_int16x8
-> affix
"int16x8"
1661 | T_int32x2
-> affix
"int32x2"
1662 | T_int32x4
-> affix
"int32x4"
1663 | T_int64x1
-> affix
"int64x1"
1664 | T_int64x2
-> affix
"int64x2"
1665 | T_uint8x8
-> affix
"uint8x8"
1666 | T_uint8x16
-> affix
"uint8x16"
1667 | T_uint16x4
-> affix
"uint16x4"
1668 | T_uint16x8
-> affix
"uint16x8"
1669 | T_uint32x2
-> affix
"uint32x2"
1670 | T_uint32x4
-> affix
"uint32x4"
1671 | T_uint64x1
-> affix
"uint64x1"
1672 | T_uint64x2
-> affix
"uint64x2"
1673 | T_float32x2
-> affix
"float32x2"
1674 | T_float32x4
-> affix
"float32x4"
1675 | T_poly8x8
-> affix
"poly8x8"
1676 | T_poly8x16
-> affix
"poly8x16"
1677 | T_poly16x4
-> affix
"poly16x4"
1678 | T_poly16x8
-> affix
"poly16x8"
1679 | T_int8
-> affix
"int8"
1680 | T_int16
-> affix
"int16"
1681 | T_int32
-> affix
"int32"
1682 | T_int64
-> affix
"int64"
1683 | T_uint8
-> affix
"uint8"
1684 | T_uint16
-> affix
"uint16"
1685 | T_uint32
-> affix
"uint32"
1686 | T_uint64
-> affix
"uint64"
1687 | T_poly8
-> affix
"poly8"
1688 | T_poly16
-> affix
"poly16"
1689 | T_float32
-> affix
"float32"
1690 | T_immediate _
-> "const int"
1692 | T_intQI
-> "__builtin_neon_qi"
1693 | T_intHI
-> "__builtin_neon_hi"
1694 | T_intSI
-> "__builtin_neon_si"
1695 | T_intDI
-> "__builtin_neon_di"
1696 | T_arrayof
(num
, base
) ->
1697 let basename = name (fun x
-> x
) base
in
1698 affix
(Printf.sprintf
"%sx%d" basename num
)
1700 let basename = name affix x
in
1701 Printf.sprintf
"%s *" basename
1703 let basename = name affix x
in
1704 Printf.sprintf
"const %s" basename
1706 name (fun x
-> x ^
"_t") vt
1708 let string_of_inttype = function
1709 B_TImode
-> "__builtin_neon_ti"
1710 | B_EImode
-> "__builtin_neon_ei"
1711 | B_OImode
-> "__builtin_neon_oi"
1712 | B_CImode
-> "__builtin_neon_ci"
1713 | B_XImode
-> "__builtin_neon_xi"
1715 let string_of_mode = function
1716 V8QI
-> "v8qi" | V4HI
-> "v4hi" | V2SI
-> "v2si" | V2SF
-> "v2sf"
1717 | DI
-> "di" | V16QI
-> "v16qi" | V8HI
-> "v8hi" | V4SI
-> "v4si"
1718 | V4SF
-> "v4sf" | V2DI
-> "v2di" | QI
-> "qi" | HI
-> "hi" | SI
-> "si"
1721 (* Use uppercase chars for letters which form part of the intrinsic name, but
1722 should be omitted from the builtin name (the info is passed in an extra
1723 argument, instead). *)
1724 let intrinsic_name name = String.lowercase
name
1726 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1727 found in the features list. *)
1728 let builtin_name features
name =
1729 let name = List.fold_right
1732 Flipped x
| Builtin_name x
-> x
1735 let islower x
= let str = String.make
1 x
in (String.lowercase
str) = str
1736 and buf
= Buffer.create
(String.length
name) in
1737 String.iter
(fun c
-> if islower c
then Buffer.add_char buf c
) name;
1740 (* Transform an arity into a list of strings. *)
1741 let strings_of_arity a
=
1743 | Arity0 vt
-> [string_of_vectype vt
]
1744 | Arity1
(vt1
, vt2
) -> [string_of_vectype vt1
; string_of_vectype vt2
]
1745 | Arity2
(vt1
, vt2
, vt3
) -> [string_of_vectype vt1
;
1746 string_of_vectype vt2
;
1747 string_of_vectype vt3
]
1748 | Arity3
(vt1
, vt2
, vt3
, vt4
) -> [string_of_vectype vt1
;
1749 string_of_vectype vt2
;
1750 string_of_vectype vt3
;
1751 string_of_vectype vt4
]
1752 | Arity4
(vt1
, vt2
, vt3
, vt4
, vt5
) -> [string_of_vectype vt1
;
1753 string_of_vectype vt2
;
1754 string_of_vectype vt3
;
1755 string_of_vectype vt4
;
1756 string_of_vectype vt5
]
1758 (* Suffixes on the end of builtin names that are to be stripped in order
1759 to obtain the name used as an instruction. They are only stripped if
1760 preceded immediately by an underscore. *)
1761 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1763 (* Get the possible names of an instruction corresponding to a "name" from the
1764 ops table. This is done by getting the equivalent builtin name and
1765 stripping any suffixes from the list at the top of this file, unless
1766 the features list presents with an Instruction_name entry, in which
1767 case that is used; or unless the features list presents with a Flipped
1768 entry, in which case that is used. If both such entries are present,
1769 the first in the list will be chosen. *)
1770 let get_insn_names features
name =
1773 match List.find
(fun feature
-> match feature
with
1774 Instruction_name _
-> true
1776 | _
-> false) features
1778 Instruction_name
names -> names
1779 | Flipped
name -> [name]
1782 with Not_found
-> [builtin_name features
name]
1785 List.map
(fun name'
->
1787 let underscore = String.rindex
name' '_'
in
1788 let our_suffix = String.sub
name'
(underscore + 1)
1789 ((String.length
name'
) - underscore - 1)
1791 let rec strip remaining_suffixes
=
1792 match remaining_suffixes
with
1794 | s
::ss
when our_suffix = s
-> String.sub
name'
0 underscore
1797 strip suffixes_to_strip
1798 with (Not_found
| Invalid_argument _
) -> name'
) names
1801 (* Apply a function to each element of a list and then comma-separate
1802 the resulting strings. *)
1803 let rec commas f elts acc
=
1806 | [elt] -> acc ^
(f
elt)
1808 commas f elts
(acc ^
(f
elt) ^
", ")
1810 (* Given a list of features and the shape specified in the "ops" table, apply
1811 a function to each possible shape that the instruction may have.
1812 By default, this is the "shape" entry in "ops". If the features list
1813 contains a Disassembles_as entry, the shapes contained in that entry are
1814 mapped to corresponding outputs and returned in a list. If there is more
1815 than one Disassembles_as entry, only the first is used. *)
1816 let analyze_all_shapes features
shape f
=
1818 match List.find
(fun feature
->
1819 match feature
with Disassembles_as _
-> true
1822 Disassembles_as shapes
-> List.map f shapes
1824 with Not_found
-> [f
shape]