1 (* APPLE LOCAL file v7 support. Merge from Codesourcery *)
2 (* Common code for ARM NEON header file, documentation and test case
5 Copyright (C) 2006 Free Software Foundation, Inc.
6 Contributed by CodeSourcery.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to the Free
22 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
25 (* Shorthand types for vector elements. *)
26 type elts
= S8
| S16
| S32
| S64
| F32
| U8
| U16
| U32
| U64
| P8
| P16
27 | I8
| I16
| I32
| I64
| B8
| B16
| B32
| B64
| Conv
of elts
* elts
28 | Cast
of elts
* elts
| NoElts
30 type eltclass
= Signed
| Unsigned
| Float
| Poly
| Int
| Bits
31 | ConvClass
of eltclass
* eltclass
| NoType
33 (* These vector types correspond directly to C types. *)
34 type vectype
= T_int8x8
| T_int8x16
35 | T_int16x4
| T_int16x8
36 | T_int32x2
| T_int32x4
37 | T_int64x1
| T_int64x2
38 | T_uint8x8
| T_uint8x16
39 | T_uint16x4
| T_uint16x8
40 | T_uint32x2
| T_uint32x4
41 | T_uint64x1
| T_uint64x2
42 | T_float32x2
| T_float32x4
43 | T_poly8x8
| T_poly8x16
44 | T_poly16x4
| T_poly16x8
45 | T_immediate
of int * int
51 | T_float32
| T_arrayof
of int * vectype
52 | T_ptrto
of vectype
| T_const
of vectype
57 (* The meanings of the following are:
58 TImode : "Tetra", two registers (four words).
59 EImode : "hExa", three registers (six words).
60 OImode : "Octa", four registers (eight words).
61 CImode : "dodeCa", six registers (twelve words).
62 XImode : "heXadeca", eight registers (sixteen words).
65 (* LLVM LOCAL begin Use a different type for each vector type. *)
66 type inttype
= B_TId8mode
| B_EId8mode
| B_OId8mode
67 | B_TId16mode
| B_EId16mode
| B_OId16mode
68 | B_TId32mode
| B_EId32mode
| B_OId32mode
69 | B_TId64mode
| B_EId64mode
| B_OId64mode
70 | B_TIdSFmode
| B_EIdSFmode
| B_OIdSFmode
71 | B_OIq8mode
| B_CIq8mode
| B_XIq8mode
72 | B_OIq16mode
| B_CIq16mode
| B_XIq16mode
73 | B_OIq32mode
| B_CIq32mode
| B_XIq32mode
74 | B_OIq64mode
| B_CIq64mode
| B_XIq64mode
75 | B_OIqSFmode
| B_CIqSFmode
| B_XIqSFmode
76 (* LLVM LOCAL end Use a different type for each vector type. *)
78 type shape_elt
= Dreg
| Qreg
| Corereg
| Immed
| VecArray
of int * shape_elt
79 | PtrTo
of shape_elt
| CstPtrTo
of shape_elt
80 (* These next ones are used only in the test generator. *)
81 | Element_of_dreg
(* Used for "lane" variants. *)
82 | Element_of_qreg
(* Likewise. *)
83 | All_elements_of_dreg
(* Used for "dup" variants. *)
85 type shape_form
= All
of int * shape_elt
87 | Long_noreg
of shape_elt
89 | Wide_noreg
of shape_elt
93 | Binary_imm
of shape_elt
94 | Use_operands
of shape_elt array
95 | By_scalar
of shape_elt
96 | Unary_scalar
of shape_elt
99 | Pair_result
of shape_elt
101 type arity
= Arity0
of vectype
102 | Arity1
of vectype
* vectype
103 | Arity2
of vectype
* vectype
* vectype
104 | Arity3
of vectype
* vectype
* vectype
* vectype
105 | Arity4
of vectype
* vectype
* vectype
* vectype
* vectype
108 type vecmode
= V8QI
| V4HI
| V2SI
| V2SF
| V1DI
109 | V16QI
| V8HI
| V4SI
| V4SF
| V2DI
110 | QI
| HI
| SI
| SF
| DI
152 (* Ops with scalar. *)
174 (* Vector extract. *)
176 (* Reverse elements. *)
180 (* Transposition ops. *)
184 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
190 (* Set/extract lanes from a vector. *)
193 (* Initialise vector from bit pattern. *)
195 (* Set all lanes to same value. *)
197 | Vmov_n
(* Is this the same? *)
198 (* Duplicate scalar to all lanes of vector. *)
200 (* Combine vectors. *)
202 (* Get quadword high/low parts. *)
205 (* Convert vectors. *)
208 (* Narrow/lengthen vectors. *)
214 (* Reinterpret casts. *)
217 (* Features used for documentation, to distinguish between some instruction
218 variants, and to signal special requirements (e.g. swapping arguments). *)
227 | Flipped
of string (* Builtin name to use with flipped arguments. *)
228 | InfoWord
(* Pass an extra word for signage/rounding etc. (always passed
229 for All _, Long, Wide, Narrow shape_forms. *)
230 | ReturnPtr
(* Pass explicit pointer to return value as first argument. *)
231 (* A specification as to the shape of instruction expected upon
232 disassembly, used if it differs from the shape used to build the
233 intrinsic prototype. Multiple entries in the constructor's argument
234 indicate that the intrinsic expands to more than one assembly
235 instruction, each with a corresponding shape specified here. *)
236 | Disassembles_as
of shape_form list
237 | Builtin_name
of string (* Override the name of the builtin. *)
238 (* Override the name of the instruction. If more than one name
239 is specified, it means that the instruction can have any of those
241 | Instruction_name
of string list
242 (* Mark that the intrinsic yields no instructions, or expands to yield
243 behaviour that the test generator cannot test. *)
245 (* Mark that the intrinsic has constant arguments that cannot be set
246 to the defaults (zero for pointers and one otherwise) in the test
247 cases. The function supplied must return the integer to be written
248 into the testcase for the argument number (0-based) supplied to it. *)
249 | Const_valuator
of (int -> int)
251 exception MixedMode
of elts
* elts
253 let rec elt_width = function
254 S8
| U8
| P8
| I8
| B8
-> 8
255 | S16
| U16
| P16
| I16
| B16
-> 16
256 | S32
| F32
| U32
| I32
| B32
-> 32
257 | S64
| U64
| I64
| B64
-> 64
259 let wa = elt_width a
and wb
= elt_width b
in
260 if wa = wb
then wa else failwith
"element width?"
261 | Cast
(a
, b
) -> raise
(MixedMode
(a
, b
))
262 | NoElts
-> failwith
"No elts"
264 let rec elt_class = function
265 S8
| S16
| S32
| S64
-> Signed
266 | U8
| U16
| U32
| U64
-> Unsigned
269 | I8
| I16
| I32
| I64
-> Int
270 | B8
| B16
| B32
| B64
-> Bits
271 | Conv
(a
, b
) | Cast
(a
, b
) -> ConvClass
(elt_class a
, elt_class b
)
274 let elt_of_class_width c w
=
282 | Unsigned
, 16 -> U16
283 | Unsigned
, 32 -> U32
284 | Unsigned
, 64 -> U64
295 | _
-> failwith
"Bad element type"
297 (* Return unsigned integer element the same width as argument. *)
298 let unsigned_of_elt elt
=
299 elt_of_class_width Unsigned
(elt_width elt
)
301 let signed_of_elt elt
=
302 elt_of_class_width Signed
(elt_width elt
)
304 (* Return untyped bits element the same width as argument. *)
305 let bits_of_elt elt
=
306 elt_of_class_width Bits
(elt_width elt
)
308 let non_signed_variant = function
319 let poly_unsigned_variant v
=
320 let elclass = match elt_class v
with
323 elt_of_class_width elclass (elt_width v
)
326 let w = elt_width elt
327 and c
= elt_class elt
in
328 elt_of_class_width c
(w * 2)
331 let w = elt_width elt
332 and c
= elt_class elt
in
333 elt_of_class_width c
(w / 2)
335 (* If we're trying to find a mode from a "Use_operands" instruction, use the
336 last vector operand as the dominant mode used to invoke the correct builtin.
337 We must stick to this rule in neon.md. *)
338 let find_key_operand operands
=
340 match operands
.(opno
) with
343 | VecArray
(_
, Qreg
) -> Qreg
344 | VecArray
(_
, Dreg
) -> Dreg
347 scan ((Array.length operands
) - 1)
349 let rec mode_of_elt elt shape
=
350 let flt = match elt_class elt
with
351 Float
| ConvClass
(_
, Float
) -> true | _
-> false in
353 match elt_width elt
with
354 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
355 | _
-> failwith
"Bad element width"
357 All
(_
, Dreg
) | By_scalar Dreg
| Pair_result Dreg
| Unary_scalar Dreg
358 | Binary_imm Dreg
| Long_noreg Dreg
| Wide_noreg Dreg
->
360 [| V8QI
; V4HI
; if flt then V2SF
else V2SI
; V1DI
|].(idx)
361 | All
(_
, Qreg
) | By_scalar Qreg
| Pair_result Qreg
| Unary_scalar Qreg
362 | Binary_imm Qreg
| Long_noreg Qreg
| Wide_noreg Qreg
->
363 [| V16QI
; V8HI
; if flt then V4SF
else V4SI
; V2DI
|].(idx)
364 | All
(_
, (Corereg
| PtrTo _
| CstPtrTo _
)) ->
365 [| QI
; HI
; if flt then SF
else SI
; DI
|].(idx)
366 | Long
| Wide
| Wide_lane
| Wide_scalar
369 [| V8QI
; V4HI
; V2SI
; V1DI
|].(idx)
370 | Narrow
| Narrow_imm
-> [| V16QI
; V8HI
; V4SI
; V2DI
|].(idx)
371 | Use_operands ops
-> mode_of_elt elt
(All
(0, (find_key_operand ops
)))
372 | _
-> failwith
"invalid shape"
374 (* Modify an element type dependent on the shape of the instruction and the
377 let shapemap shape no
=
378 let ident = fun x
-> x
in
380 All _
| Use_operands _
| By_scalar _
| Pair_result _
| Unary_scalar _
381 | Binary_imm _
-> ident
382 | Long
| Long_noreg _
| Wide_scalar
| Long_imm
->
383 [| widen_elt; ident; ident |].(no
)
384 | Wide
| Wide_noreg _
-> [| widen_elt; widen_elt; ident |].(no
)
385 | Wide_lane
-> [| widen_elt; ident; ident; ident |].(no
)
386 | Narrow
| Narrow_imm
-> [| narrow_elt; ident; ident |].(no
)
388 (* Register type (D/Q) of an operand, based on shape and operand number. *)
390 let regmap shape no
=
392 All
(_
, reg
) | Long_noreg reg
| Wide_noreg reg
-> reg
393 | Long
-> [| Qreg
; Dreg
; Dreg
|].(no
)
394 | Wide
-> [| Qreg
; Qreg
; Dreg
|].(no
)
395 | Narrow
-> [| Dreg
; Qreg
; Qreg
|].(no
)
396 | Wide_lane
-> [| Qreg
; Dreg
; Dreg
; Immed
|].(no
)
397 | Wide_scalar
-> [| Qreg
; Dreg
; Corereg
|].(no
)
398 | By_scalar reg
-> [| reg
; reg
; Dreg
; Immed
|].(no
)
399 | Unary_scalar reg
-> [| reg
; Dreg
; Immed
|].(no
)
400 | Pair_result reg
-> [| VecArray
(2, reg
); reg
; reg
|].(no
)
401 | Binary_imm reg
-> [| reg
; reg
; Immed
|].(no
)
402 | Long_imm
-> [| Qreg
; Dreg
; Immed
|].(no
)
403 | Narrow_imm
-> [| Dreg
; Qreg
; Immed
|].(no
)
404 | Use_operands these
-> these
.(no
)
406 let type_for_elt shape elt no
=
407 let elt = (shapemap shape no
) elt in
408 let reg = regmap shape no
in
409 let rec type_for_reg_elt reg elt =
424 | _
-> failwith
"Bad elt type"
439 | _
-> failwith
"Bad elt type"
454 | _
-> failwith
"Bad elt type"
458 | VecArray
(num
, sub
) ->
459 T_arrayof
(num
, type_for_reg_elt sub
elt)
461 T_ptrto
(type_for_reg_elt x
elt)
463 T_ptrto
(T_const
(type_for_reg_elt x
elt))
464 (* Anything else is solely for the use of the test generator. *)
467 type_for_reg_elt reg elt
469 (* Return size of a vector type, in bits. *)
470 let vectype_size = function
471 T_int8x8
| T_int16x4
| T_int32x2
| T_int64x1
472 | T_uint8x8
| T_uint16x4
| T_uint32x2
| T_uint64x1
473 | T_float32x2
| T_poly8x8
| T_poly16x4
-> 64
474 | T_int8x16
| T_int16x8
| T_int32x4
| T_int64x2
475 | T_uint8x16
| T_uint16x8
| T_uint32x4
| T_uint64x2
476 | T_float32x4
| T_poly8x16
| T_poly16x8
-> 128
477 | _
-> raise Not_found
479 (* LLVM LOCAL begin Map vector types to modes. *)
480 let vectype_mode = function
481 T_int8x8
| T_uint8x8
| T_poly8x8
-> V8QI
482 | T_int8x16
| T_uint8x16
| T_poly8x16
-> V16QI
483 | T_int16x4
| T_uint16x4
| T_poly16x4
-> V4HI
484 | T_int16x8
| T_uint16x8
| T_poly16x8
-> V8HI
485 | T_int32x2
| T_uint32x2
-> V2SI
486 | T_int32x4
| T_uint32x4
-> V4SI
488 | T_int64x1
| T_uint64x1
-> V1DI
489 | T_int64x2
| T_uint64x2
-> V2DI
490 | T_float32x2
-> V2SF
491 | T_float32x4
-> V4SF
492 | _
-> raise Not_found
493 (* LLVM LOCAL end Map vector types to modes. *)
495 let inttype_for_array num elttype
=
496 let eltsize = vectype_size elttype
in
497 let numwords = (num
* eltsize) / 32 in
498 (* LLVM LOCAL begin Match vector type, too. *)
499 let vecmode = vectype_mode elttype
in
500 match numwords, vecmode with
501 4, V8QI
-> B_TId8mode
502 | 4, V4HI
-> B_TId16mode
503 | 4, V2SI
-> B_TId32mode
505 | 4, V1DI
-> B_TId64mode
506 | 4, V2SF
-> B_TIdSFmode
507 | 6, V8QI
-> B_EId8mode
508 | 6, V4HI
-> B_EId16mode
509 | 6, V2SI
-> B_EId32mode
511 | 6, V1DI
-> B_EId64mode
512 | 6, V2SF
-> B_EIdSFmode
513 | 8, V8QI
-> B_OId8mode
514 | 8, V4HI
-> B_OId16mode
515 | 8, V2SI
-> B_OId32mode
517 | 8, V1DI
-> B_OId64mode
518 | 8, V2SF
-> B_OIdSFmode
519 | 8, V16QI
-> B_OIq8mode
520 | 8, V8HI
-> B_OIq16mode
521 | 8, V4SI
-> B_OIq32mode
522 | 8, V2DI
-> B_OIq64mode
523 | 8, V4SF
-> B_OIqSFmode
524 | 12, V16QI
-> B_CIq8mode
525 | 12, V8HI
-> B_CIq16mode
526 | 12, V4SI
-> B_CIq32mode
527 | 12, V2DI
-> B_CIq64mode
528 | 12, V4SF
-> B_CIqSFmode
529 | 16, V16QI
-> B_XIq8mode
530 | 16, V8HI
-> B_XIq16mode
531 | 16, V4SI
-> B_XIq32mode
532 | 16, V2DI
-> B_XIq64mode
533 | 16, V4SF
-> B_XIqSFmode
534 | _
-> failwith
("no int type for size " ^ string_of_int
numwords)
535 (* LLVM LOCAL end Match vector type, too. *)
537 (* These functions return pairs of (internal, external) types, where "internal"
538 types are those seen by GCC, and "external" are those seen by the assembler.
539 These types aren't necessarily the same, since the intrinsics can munge more
540 than one C type into each assembler opcode. *)
542 let make_sign_invariant func shape
elt =
543 let arity, elt'
= func shape
elt in
544 arity, non_signed_variant elt'
546 (* Don't restrict any types. *)
548 let elts_same make_arity shape
elt =
549 let vtype = type_for_elt shape
elt in
550 make_arity
vtype, elt
552 (* As sign_invar_*, but when sign matters. *)
553 let elts_same_io_lane =
554 elts_same (fun vtype -> Arity4
(vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
557 elts_same (fun vtype -> Arity3
(vtype 0, vtype 0, vtype 1, vtype 2))
559 let elts_same_2_lane =
560 elts_same (fun vtype -> Arity3
(vtype 0, vtype 1, vtype 2, vtype 3))
562 let elts_same_3 = elts_same_2_lane
565 elts_same (fun vtype -> Arity2
(vtype 0, vtype 1, vtype 2))
568 elts_same (fun vtype -> Arity1
(vtype 0, vtype 1))
570 (* Use for signed/unsigned invariant operations (i.e. where the operation
571 doesn't depend on the sign of the data. *)
573 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
574 let sign_invar_io = make_sign_invariant elts_same_io
575 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
576 let sign_invar_2 = make_sign_invariant elts_same_2
577 let sign_invar_1 = make_sign_invariant elts_same_1
579 (* Sign-sensitive comparison. *)
581 let cmp_sign_matters shape
elt =
582 let vtype = type_for_elt shape
elt
583 and rtype
= type_for_elt shape
(unsigned_of_elt elt) 0 in
584 Arity2
(rtype
, vtype 1, vtype 2), elt
586 (* Signed/unsigned invariant comparison. *)
588 let cmp_sign_invar shape
elt =
589 let shape'
, elt'
= cmp_sign_matters shape elt in
591 match non_signed_variant elt'
with
597 (* Comparison (VTST) where only the element width matters. *)
599 let cmp_bits shape elt =
600 let vtype = type_for_elt shape elt
601 and rtype
= type_for_elt shape (unsigned_of_elt elt) 0
602 and bits_only
= bits_of_elt elt in
603 Arity2
(rtype
, vtype 1, vtype 2), bits_only
605 let reg_shift shape elt =
606 let vtype = type_for_elt shape elt
607 and op2type
= type_for_elt shape (signed_of_elt elt) 2 in
608 Arity2
(vtype 0, vtype 1, op2type
), elt
610 (* Genericised constant-shift type-generating function. *)
612 let const_shift mkimm ?
arity ?result
shape elt =
613 let op2type = (shapemap shape 2) elt in
614 let op2width = elt_width op2type in
615 let op2 = mkimm
op2width
616 and op1
= type_for_elt shape elt 1
620 | Some restriction
-> restriction
elt in
621 let rtype = type_for_elt shape r_elt
0 in
623 None
-> Arity2
(rtype, op1
, op2), elt
624 | Some mkarity
-> mkarity
rtype op1
op2, elt
626 (* Use for immediate right-shifts. *)
628 let shift_right shape elt =
629 const_shift (fun imm
-> T_immediate
(1, imm
)) shape elt
631 let shift_right_acc shape elt =
632 const_shift (fun imm
-> T_immediate
(1, imm
))
633 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt
635 (* Use for immediate right-shifts when the operation doesn't care about
638 let shift_right_sign_invar =
639 make_sign_invariant shift_right
641 (* Immediate right-shift; result is unsigned even when operand is signed. *)
643 let shift_right_to_uns shape elt =
644 const_shift (fun imm
-> T_immediate
(1, imm
)) ~result
:unsigned_of_elt
647 (* Immediate left-shift. *)
649 let shift_left shape elt =
650 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) shape elt
652 (* Immediate left-shift, unsigned result. *)
654 let shift_left_to_uns shape elt =
655 const_shift (fun imm
-> T_immediate
(0, imm
- 1)) ~result
:unsigned_of_elt
658 (* Immediate left-shift, don't care about signs. *)
660 let shift_left_sign_invar =
661 make_sign_invariant shift_left
663 (* Shift left/right and insert: only element size matters. *)
665 let shift_insert shape elt =
667 const_shift (fun imm
-> T_immediate
(1, imm
))
668 ~
arity:(fun dst op1
op2 -> Arity3
(dst
, dst
, op1
, op2)) shape elt in
669 arity, bits_of_elt elt
673 let get_lane shape elt =
674 let vtype = type_for_elt shape elt in
675 Arity2
(vtype 0, vtype 1, vtype 2),
676 (match elt with P8
-> U8
| P16
-> U16
| x
-> x
)
678 let set_lane shape elt =
679 let vtype = type_for_elt shape elt in
680 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
682 let set_lane_notype shape elt =
683 let vtype = type_for_elt shape elt in
684 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), NoElts
686 let create_vector shape elt =
687 let vtype = type_for_elt shape U64
1
688 and rtype = type_for_elt shape elt 0 in
689 Arity1
(rtype, vtype), elt
691 let conv make_arity
shape elt =
692 let edest, esrc
= match elt with
693 Conv
(edest, esrc
) | Cast
(edest, esrc
) -> edest, esrc
694 | _
-> failwith
"Non-conversion element in conversion" in
695 let vtype = type_for_elt shape esrc
696 and rtype = type_for_elt shape edest 0 in
697 make_arity
rtype vtype, elt
699 let conv_1 = conv (fun rtype vtype -> Arity1
(rtype, vtype 1))
700 let conv_2 = conv (fun rtype vtype -> Arity2
(rtype, vtype 1, vtype 2))
702 (* Operation has an unsigned result even if operands are signed. *)
704 let dst_unsign make_arity
shape elt =
705 let vtype = type_for_elt shape elt
706 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
707 make_arity
rtype vtype, elt
709 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1
(rtype, vtype 1))
711 let make_bits_only func
shape elt =
712 let arity, elt'
= func
shape elt in
713 arity, bits_of_elt elt'
715 (* Extend operation. *)
717 let extend shape elt =
718 let vtype = type_for_elt shape elt in
719 Arity3
(vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
721 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
722 integer ops respectively, or unsigned for polynomial ops. *)
724 let table mkarity
shape elt =
725 let vtype = type_for_elt shape elt in
726 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
727 mkarity
vtype op2, bits_of_elt elt
729 let table_2 = table (fun vtype op2 -> Arity2
(vtype 0, vtype 1, op2))
730 let table_io = table (fun vtype op2 -> Arity3
(vtype 0, vtype 0, vtype 1, op2))
732 (* Operations where only bits matter. *)
734 let bits_1 = make_bits_only elts_same_1
735 let bits_2 = make_bits_only elts_same_2
736 let bits_3 = make_bits_only elts_same_3
739 let store_1 shape elt =
740 let vtype = type_for_elt shape elt in
741 Arity2
(T_void
, vtype 0, vtype 1), bits_of_elt elt
743 let store_3 shape elt =
744 let vtype = type_for_elt shape elt in
745 Arity3
(T_void
, vtype 0, vtype 1, vtype 2), bits_of_elt elt
747 let make_notype func
shape elt =
748 let arity, _
= func
shape elt in
751 let notype_1 = make_notype elts_same_1
752 let notype_2 = make_notype elts_same_2
753 let notype_3 = make_notype elts_same_3
755 (* Bit-select operations (first operand is unsigned int). *)
757 let bit_select shape elt =
758 let vtype = type_for_elt shape elt
759 and itype
= type_for_elt shape (unsigned_of_elt elt) in
760 Arity3
(vtype 0, itype
1, vtype 2, vtype 3), NoElts
762 (* Common lists of supported element types. *)
764 let su_8_32 = [S8
; S16
; S32
; U8
; U16
; U32
]
765 let su_8_64 = S64
:: U64
:: su_8_32
766 let su_16_64 = [S16
; S32
; S64
; U16
; U32
; U64
]
767 let pf_su_8_32 = P8
:: P16
:: F32
:: su_8_32
768 let pf_su_8_64 = P8
:: P16
:: F32
:: su_8_64
773 Vadd
, [], All
(3, Dreg
), "vadd", sign_invar_2, F32
:: su_8_64;
774 Vadd
, [], All
(3, Qreg
), "vaddQ", sign_invar_2, F32
:: su_8_64;
775 Vadd
, [], Long
, "vaddl", elts_same_2, su_8_32;
776 Vadd
, [], Wide
, "vaddw", elts_same_2, su_8_32;
777 Vadd
, [Halving
], All
(3, Dreg
), "vhadd", elts_same_2, su_8_32;
778 Vadd
, [Halving
], All
(3, Qreg
), "vhaddQ", elts_same_2, su_8_32;
779 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
780 All
(3, Dreg
), "vRhadd", elts_same_2, su_8_32;
781 Vadd
, [Instruction_name
["vrhadd"]; Rounding
; Halving
],
782 All
(3, Qreg
), "vRhaddQ", elts_same_2, su_8_32;
783 Vadd
, [Saturating
], All
(3, Dreg
), "vqadd", elts_same_2, su_8_64;
784 Vadd
, [Saturating
], All
(3, Qreg
), "vqaddQ", elts_same_2, su_8_64;
785 Vadd
, [High_half
], Narrow
, "vaddhn", sign_invar_2, su_16_64;
786 Vadd
, [Instruction_name
["vraddhn"]; Rounding
; High_half
],
787 Narrow
, "vRaddhn", sign_invar_2, su_16_64;
789 (* Multiplication. *)
790 Vmul
, [], All
(3, Dreg
), "vmul", sign_invar_2, P8
:: F32
:: su_8_32;
791 Vmul
, [], All
(3, Qreg
), "vmulQ", sign_invar_2, P8
:: F32
:: su_8_32;
792 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Dreg
), "vqdmulh",
793 elts_same_2, [S16
; S32
];
794 Vmul
, [Saturating
; Doubling
; High_half
], All
(3, Qreg
), "vqdmulhQ",
795 elts_same_2, [S16
; S32
];
797 [Saturating
; Rounding
; Doubling
; High_half
;
798 Instruction_name
["vqrdmulh"]],
799 All
(3, Dreg
), "vqRdmulh",
800 elts_same_2, [S16
; S32
];
802 [Saturating
; Rounding
; Doubling
; High_half
;
803 Instruction_name
["vqrdmulh"]],
804 All
(3, Qreg
), "vqRdmulhQ",
805 elts_same_2, [S16
; S32
];
806 Vmul
, [], Long
, "vmull", elts_same_2, P8
:: su_8_32;
807 Vmul
, [Saturating
; Doubling
], Long
, "vqdmull", elts_same_2, [S16
; S32
];
809 (* Multiply-accumulate. *)
810 Vmla
, [], All
(3, Dreg
), "vmla", sign_invar_io, F32
:: su_8_32;
811 Vmla
, [], All
(3, Qreg
), "vmlaQ", sign_invar_io, F32
:: su_8_32;
812 Vmla
, [], Long
, "vmlal", elts_same_io, su_8_32;
813 Vmla
, [Saturating
; Doubling
], Long
, "vqdmlal", elts_same_io, [S16
; S32
];
815 (* Multiply-subtract. *)
816 Vmls
, [], All
(3, Dreg
), "vmls", sign_invar_io, F32
:: su_8_32;
817 Vmls
, [], All
(3, Qreg
), "vmlsQ", sign_invar_io, F32
:: su_8_32;
818 Vmls
, [], Long
, "vmlsl", elts_same_io, su_8_32;
819 Vmls
, [Saturating
; Doubling
], Long
, "vqdmlsl", elts_same_io, [S16
; S32
];
822 Vsub
, [], All
(3, Dreg
), "vsub", sign_invar_2, F32
:: su_8_64;
823 Vsub
, [], All
(3, Qreg
), "vsubQ", sign_invar_2, F32
:: su_8_64;
824 Vsub
, [], Long
, "vsubl", elts_same_2, su_8_32;
825 Vsub
, [], Wide
, "vsubw", elts_same_2, su_8_32;
826 Vsub
, [Halving
], All
(3, Dreg
), "vhsub", elts_same_2, su_8_32;
827 Vsub
, [Halving
], All
(3, Qreg
), "vhsubQ", elts_same_2, su_8_32;
828 Vsub
, [Saturating
], All
(3, Dreg
), "vqsub", elts_same_2, su_8_64;
829 Vsub
, [Saturating
], All
(3, Qreg
), "vqsubQ", elts_same_2, su_8_64;
830 Vsub
, [High_half
], Narrow
, "vsubhn", sign_invar_2, su_16_64;
831 Vsub
, [Instruction_name
["vrsubhn"]; Rounding
; High_half
],
832 Narrow
, "vRsubhn", sign_invar_2, su_16_64;
834 (* Comparison, equal. *)
835 Vceq
, [], All
(3, Dreg
), "vceq", cmp_sign_invar, P8
:: F32
:: su_8_32;
836 Vceq
, [], All
(3, Qreg
), "vceqQ", cmp_sign_invar, P8
:: F32
:: su_8_32;
838 (* Comparison, greater-than or equal. *)
839 Vcge
, [], All
(3, Dreg
), "vcge", cmp_sign_matters, F32
:: su_8_32;
840 Vcge
, [], All
(3, Qreg
), "vcgeQ", cmp_sign_matters, F32
:: su_8_32;
842 (* Comparison, less-than or equal. *)
843 Vcle
, [Flipped
"vcge"], All
(3, Dreg
), "vcle", cmp_sign_matters,
845 Vcle
, [Instruction_name
["vcge"]; Flipped
"vcgeQ"],
846 All
(3, Qreg
), "vcleQ", cmp_sign_matters,
849 (* Comparison, greater-than. *)
850 Vcgt
, [], All
(3, Dreg
), "vcgt", cmp_sign_matters, F32
:: su_8_32;
851 Vcgt
, [], All
(3, Qreg
), "vcgtQ", cmp_sign_matters, F32
:: su_8_32;
853 (* Comparison, less-than. *)
854 Vclt
, [Flipped
"vcgt"], All
(3, Dreg
), "vclt", cmp_sign_matters,
856 Vclt
, [Instruction_name
["vcgt"]; Flipped
"vcgtQ"],
857 All
(3, Qreg
), "vcltQ", cmp_sign_matters,
860 (* Compare absolute greater-than or equal. *)
861 Vcage
, [Instruction_name
["vacge"]],
862 All
(3, Dreg
), "vcage", cmp_sign_matters, [F32
];
863 Vcage
, [Instruction_name
["vacge"]],
864 All
(3, Qreg
), "vcageQ", cmp_sign_matters, [F32
];
866 (* Compare absolute less-than or equal. *)
867 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcage"],
868 All
(3, Dreg
), "vcale", cmp_sign_matters, [F32
];
869 Vcale
, [Instruction_name
["vacge"]; Flipped
"vcageQ"],
870 All
(3, Qreg
), "vcaleQ", cmp_sign_matters, [F32
];
872 (* Compare absolute greater-than or equal. *)
873 Vcagt
, [Instruction_name
["vacgt"]],
874 All
(3, Dreg
), "vcagt", cmp_sign_matters, [F32
];
875 Vcagt
, [Instruction_name
["vacgt"]],
876 All
(3, Qreg
), "vcagtQ", cmp_sign_matters, [F32
];
878 (* Compare absolute less-than or equal. *)
879 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagt"],
880 All
(3, Dreg
), "vcalt", cmp_sign_matters, [F32
];
881 Vcalt
, [Instruction_name
["vacgt"]; Flipped
"vcagtQ"],
882 All
(3, Qreg
), "vcaltQ", cmp_sign_matters, [F32
];
885 Vtst
, [], All
(3, Dreg
), "vtst", cmp_bits, P8
:: su_8_32;
886 Vtst
, [], All
(3, Qreg
), "vtstQ", cmp_bits, P8
:: su_8_32;
888 (* Absolute difference. *)
889 Vabd
, [], All
(3, Dreg
), "vabd", elts_same_2, F32
:: su_8_32;
890 Vabd
, [], All
(3, Qreg
), "vabdQ", elts_same_2, F32
:: su_8_32;
891 Vabd
, [], Long
, "vabdl", elts_same_2, su_8_32;
893 (* Absolute difference and accumulate. *)
894 Vaba
, [], All
(3, Dreg
), "vaba", elts_same_io, su_8_32;
895 Vaba
, [], All
(3, Qreg
), "vabaQ", elts_same_io, su_8_32;
896 Vaba
, [], Long
, "vabal", elts_same_io, su_8_32;
899 Vmax
, [], All
(3, Dreg
), "vmax", elts_same_2, F32
:: su_8_32;
900 Vmax
, [], All
(3, Qreg
), "vmaxQ", elts_same_2, F32
:: su_8_32;
903 Vmin
, [], All
(3, Dreg
), "vmin", elts_same_2, F32
:: su_8_32;
904 Vmin
, [], All
(3, Qreg
), "vminQ", elts_same_2, F32
:: su_8_32;
907 Vpadd
, [], All
(3, Dreg
), "vpadd", sign_invar_2, F32
:: su_8_32;
908 Vpadd
, [], Long_noreg Dreg
, "vpaddl", elts_same_1, su_8_32;
909 Vpadd
, [], Long_noreg Qreg
, "vpaddlQ", elts_same_1, su_8_32;
911 (* Pairwise add, widen and accumulate. *)
912 Vpada
, [], Wide_noreg Dreg
, "vpadal", elts_same_2, su_8_32;
913 Vpada
, [], Wide_noreg Qreg
, "vpadalQ", elts_same_2, su_8_32;
915 (* Folding maximum, minimum. *)
916 Vpmax
, [], All
(3, Dreg
), "vpmax", elts_same_2, F32
:: su_8_32;
917 Vpmin
, [], All
(3, Dreg
), "vpmin", elts_same_2, F32
:: su_8_32;
919 (* Reciprocal step. *)
920 Vrecps
, [], All
(3, Dreg
), "vrecps", elts_same_2, [F32
];
921 Vrecps
, [], All
(3, Qreg
), "vrecpsQ", elts_same_2, [F32
];
922 Vrsqrts
, [], All
(3, Dreg
), "vrsqrts", elts_same_2, [F32
];
923 Vrsqrts
, [], All
(3, Qreg
), "vrsqrtsQ", elts_same_2, [F32
];
925 (* Vector shift left. *)
926 Vshl
, [], All
(3, Dreg
), "vshl", reg_shift, su_8_64;
927 Vshl
, [], All
(3, Qreg
), "vshlQ", reg_shift, su_8_64;
928 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
929 All
(3, Dreg
), "vRshl", reg_shift, su_8_64;
930 Vshl
, [Instruction_name
["vrshl"]; Rounding
],
931 All
(3, Qreg
), "vRshlQ", reg_shift, su_8_64;
932 Vshl
, [Saturating
], All
(3, Dreg
), "vqshl", reg_shift, su_8_64;
933 Vshl
, [Saturating
], All
(3, Qreg
), "vqshlQ", reg_shift, su_8_64;
934 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
935 All
(3, Dreg
), "vqRshl", reg_shift, su_8_64;
936 Vshl
, [Instruction_name
["vqrshl"]; Saturating
; Rounding
],
937 All
(3, Qreg
), "vqRshlQ", reg_shift, su_8_64;
939 (* Vector shift right by constant. *)
940 Vshr_n
, [], Binary_imm Dreg
, "vshr_n", shift_right, su_8_64;
941 Vshr_n
, [], Binary_imm Qreg
, "vshrQ_n", shift_right, su_8_64;
942 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Dreg
,
943 "vRshr_n", shift_right, su_8_64;
944 Vshr_n
, [Instruction_name
["vrshr"]; Rounding
], Binary_imm Qreg
,
945 "vRshrQ_n", shift_right, su_8_64;
946 Vshr_n
, [], Narrow_imm
, "vshrn_n", shift_right_sign_invar, su_16_64;
947 Vshr_n
, [Instruction_name
["vrshrn"]; Rounding
], Narrow_imm
, "vRshrn_n",
948 shift_right_sign_invar, su_16_64;
949 Vshr_n
, [Saturating
], Narrow_imm
, "vqshrn_n", shift_right, su_16_64;
950 Vshr_n
, [Instruction_name
["vqrshrn"]; Saturating
; Rounding
], Narrow_imm
,
951 "vqRshrn_n", shift_right, su_16_64;
952 Vshr_n
, [Saturating
; Dst_unsign
], Narrow_imm
, "vqshrun_n",
953 shift_right_to_uns, [S16
; S32
; S64
];
954 Vshr_n
, [Instruction_name
["vqrshrun"]; Saturating
; Dst_unsign
; Rounding
],
955 Narrow_imm
, "vqRshrun_n", shift_right_to_uns, [S16
; S32
; S64
];
957 (* Vector shift left by constant. *)
958 Vshl_n
, [], Binary_imm Dreg
, "vshl_n", shift_left_sign_invar, su_8_64;
959 Vshl_n
, [], Binary_imm Qreg
, "vshlQ_n", shift_left_sign_invar, su_8_64;
960 Vshl_n
, [Saturating
], Binary_imm Dreg
, "vqshl_n", shift_left, su_8_64;
961 Vshl_n
, [Saturating
], Binary_imm Qreg
, "vqshlQ_n", shift_left, su_8_64;
962 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Dreg
, "vqshlu_n",
963 shift_left_to_uns, [S8
; S16
; S32
; S64
];
964 Vshl_n
, [Saturating
; Dst_unsign
], Binary_imm Qreg
, "vqshluQ_n",
965 shift_left_to_uns, [S8
; S16
; S32
; S64
];
966 Vshl_n
, [], Long_imm
, "vshll_n", shift_left, su_8_32;
968 (* Vector shift right by constant and accumulate. *)
969 Vsra_n
, [], Binary_imm Dreg
, "vsra_n", shift_right_acc, su_8_64;
970 Vsra_n
, [], Binary_imm Qreg
, "vsraQ_n", shift_right_acc, su_8_64;
971 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Dreg
,
972 "vRsra_n", shift_right_acc, su_8_64;
973 Vsra_n
, [Instruction_name
["vrsra"]; Rounding
], Binary_imm Qreg
,
974 "vRsraQ_n", shift_right_acc, su_8_64;
976 (* Vector shift right and insert. *)
977 Vsri
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsri_n", shift_insert,
978 P8
:: P16
:: su_8_64;
979 Vsri
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsriQ_n", shift_insert,
980 P8
:: P16
:: su_8_64;
982 (* Vector shift left and insert. *)
983 Vsli
, [], Use_operands
[| Dreg
; Dreg
; Immed
|], "vsli_n", shift_insert,
984 P8
:: P16
:: su_8_64;
985 Vsli
, [], Use_operands
[| Qreg
; Qreg
; Immed
|], "vsliQ_n", shift_insert,
986 P8
:: P16
:: su_8_64;
988 (* Absolute value. *)
989 Vabs
, [], All
(2, Dreg
), "vabs", elts_same_1, [S8
; S16
; S32
; F32
];
990 Vabs
, [], All
(2, Qreg
), "vabsQ", elts_same_1, [S8
; S16
; S32
; F32
];
991 Vabs
, [Saturating
], All
(2, Dreg
), "vqabs", elts_same_1, [S8
; S16
; S32
];
992 Vabs
, [Saturating
], All
(2, Qreg
), "vqabsQ", elts_same_1, [S8
; S16
; S32
];
995 Vneg
, [], All
(2, Dreg
), "vneg", elts_same_1, [S8
; S16
; S32
; F32
];
996 Vneg
, [], All
(2, Qreg
), "vnegQ", elts_same_1, [S8
; S16
; S32
; F32
];
997 Vneg
, [Saturating
], All
(2, Dreg
), "vqneg", elts_same_1, [S8
; S16
; S32
];
998 Vneg
, [Saturating
], All
(2, Qreg
), "vqnegQ", elts_same_1, [S8
; S16
; S32
];
1001 Vmvn
, [], All
(2, Dreg
), "vmvn", notype_1, P8
:: su_8_32;
1002 Vmvn
, [], All
(2, Qreg
), "vmvnQ", notype_1, P8
:: su_8_32;
1004 (* Count leading sign bits. *)
1005 Vcls
, [], All
(2, Dreg
), "vcls", elts_same_1, [S8
; S16
; S32
];
1006 Vcls
, [], All
(2, Qreg
), "vclsQ", elts_same_1, [S8
; S16
; S32
];
1008 (* Count leading zeros. *)
1009 Vclz
, [], All
(2, Dreg
), "vclz", sign_invar_1, su_8_32;
1010 Vclz
, [], All
(2, Qreg
), "vclzQ", sign_invar_1, su_8_32;
1012 (* Count number of set bits. *)
1013 Vcnt
, [], All
(2, Dreg
), "vcnt", bits_1, [P8
; S8
; U8
];
1014 Vcnt
, [], All
(2, Qreg
), "vcntQ", bits_1, [P8
; S8
; U8
];
1016 (* Reciprocal estimate. *)
1017 Vrecpe
, [], All
(2, Dreg
), "vrecpe", elts_same_1, [U32
; F32
];
1018 Vrecpe
, [], All
(2, Qreg
), "vrecpeQ", elts_same_1, [U32
; F32
];
1020 (* Reciprocal square-root estimate. *)
1021 Vrsqrte
, [], All
(2, Dreg
), "vrsqrte", elts_same_1, [U32
; F32
];
1022 Vrsqrte
, [], All
(2, Qreg
), "vrsqrteQ", elts_same_1, [U32
; F32
];
1024 (* Get lanes from a vector. *)
1026 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
1027 Instruction_name
["vmov"]],
1028 Use_operands
[| Corereg
; Dreg
; Immed
|],
1029 "vget_lane", get_lane, pf_su_8_32;
1032 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
1033 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1034 Use_operands
[| Corereg
; Dreg
; Immed
|],
1035 "vget_lane", notype_2, [S64
; U64
];
1037 [InfoWord
; Disassembles_as
[Use_operands
[| Corereg
; Element_of_dreg
|]];
1038 Instruction_name
["vmov"]],
1039 Use_operands
[| Corereg
; Qreg
; Immed
|],
1040 "vgetQ_lane", get_lane, pf_su_8_32;
1043 Disassembles_as
[Use_operands
[| Corereg
; Corereg
; Dreg
|]];
1044 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1045 Use_operands
[| Corereg
; Qreg
; Immed
|],
1046 "vgetQ_lane", notype_2, [S64
; U64
];
1048 (* Set lanes in a vector. *)
1049 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
1050 Instruction_name
["vmov"]],
1051 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
1052 set_lane, pf_su_8_32;
1053 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
1054 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1055 Use_operands
[| Dreg
; Corereg
; Dreg
; Immed
|], "vset_lane",
1056 set_lane_notype, [S64
; U64
];
1057 Vset_lane
, [Disassembles_as
[Use_operands
[| Element_of_dreg
; Corereg
|]];
1058 Instruction_name
["vmov"]],
1059 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1060 set_lane, pf_su_8_32;
1061 Vset_lane
, [Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]];
1062 Instruction_name
["vmov"]; Const_valuator
(fun _
-> 0)],
1063 Use_operands
[| Qreg
; Corereg
; Qreg
; Immed
|], "vsetQ_lane",
1064 set_lane_notype, [S64
; U64
];
1066 (* Create vector from literal bit pattern. *)
1068 [No_op
], (* Not really, but it can yield various things that are too
1069 hard for the test generator at this time. *)
1070 Use_operands
[| Dreg
; Corereg
|], "vcreate", create_vector,
1073 (* Set all lanes to the same value. *)
1075 Use_operands
[| Dreg
; Corereg
|], "vdup_n", bits_1,
1078 [Instruction_name
["vmov"];
1079 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1080 Use_operands
[| Dreg
; Corereg
|], "vdup_n", notype_1,
1083 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", bits_1,
1086 [Instruction_name
["vmov"];
1087 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1088 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1089 Use_operands
[| Qreg
; Corereg
|], "vdupQ_n", notype_1,
1092 (* These are just aliases for the above. *)
1094 [Builtin_name
"vdup_n"],
1095 Use_operands
[| Dreg
; Corereg
|],
1096 "vmov_n", bits_1, pf_su_8_32;
1098 [Builtin_name
"vdup_n";
1099 Instruction_name
["vmov"];
1100 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1101 Use_operands
[| Dreg
; Corereg
|],
1102 "vmov_n", notype_1, [S64
; U64
];
1104 [Builtin_name
"vdupQ_n"],
1105 Use_operands
[| Qreg
; Corereg
|],
1106 "vmovQ_n", bits_1, pf_su_8_32;
1108 [Builtin_name
"vdupQ_n";
1109 Instruction_name
["vmov"];
1110 Disassembles_as
[Use_operands
[| Dreg
; Corereg
; Corereg
|];
1111 Use_operands
[| Dreg
; Corereg
; Corereg
|]]],
1112 Use_operands
[| Qreg
; Corereg
|],
1113 "vmovQ_n", notype_1, [S64
; U64
];
1115 (* Duplicate, lane version. We can't use Use_operands here because the
1116 rightmost register (always Dreg) would be picked up by find_key_operand,
1117 when we want the leftmost register to be used in this case (otherwise
1118 the modes are indistinguishable in neon.md, etc. *)
1120 [Disassembles_as
[Use_operands
[| Dreg
; Element_of_dreg
|]]],
1121 Unary_scalar Dreg
, "vdup_lane", bits_2, pf_su_8_32;
1123 [No_op
; Const_valuator
(fun _
-> 0)],
1124 Unary_scalar Dreg
, "vdup_lane", bits_2, [S64
; U64
];
1126 [Disassembles_as
[Use_operands
[| Qreg
; Element_of_dreg
|]]],
1127 Unary_scalar Qreg
, "vdupQ_lane", bits_2, pf_su_8_32;
1129 [No_op
; Const_valuator
(fun _
-> 0)],
1130 Unary_scalar Qreg
, "vdupQ_lane", bits_2, [S64
; U64
];
1132 (* Combining vectors. *)
1134 Use_operands
[| Qreg
; Dreg
; Dreg
|], "vcombine", notype_2,
1137 (* Splitting vectors. *)
1139 Use_operands
[| Dreg
; Qreg
|], "vget_high",
1140 notype_1, pf_su_8_64;
1141 Vget_low
, [Instruction_name
["vmov"];
1142 Disassembles_as
[Use_operands
[| Dreg
; Dreg
|]]],
1143 Use_operands
[| Dreg
; Qreg
|], "vget_low",
1144 notype_1, pf_su_8_64;
1147 Vcvt
, [InfoWord
], All
(2, Dreg
), "vcvt", conv_1,
1148 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1149 Vcvt
, [InfoWord
], All
(2, Qreg
), "vcvtQ", conv_1,
1150 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1151 Vcvt_n
, [InfoWord
], Use_operands
[| Dreg
; Dreg
; Immed
|], "vcvt_n", conv_2,
1152 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1153 Vcvt_n
, [InfoWord
], Use_operands
[| Qreg
; Qreg
; Immed
|], "vcvtQ_n", conv_2,
1154 [Conv
(S32
, F32
); Conv
(U32
, F32
); Conv
(F32
, S32
); Conv
(F32
, U32
)];
1156 (* Move, narrowing. *)
1157 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]],
1158 Narrow
, "vmovn", sign_invar_1, su_16_64;
1159 Vmovn
, [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
],
1160 Narrow
, "vqmovn", elts_same_1, su_16_64;
1162 [Disassembles_as
[Use_operands
[| Dreg
; Qreg
|]]; Saturating
; Dst_unsign
],
1163 Narrow
, "vqmovun", dst_unsign_1,
1167 Vmovl
, [Disassembles_as
[Use_operands
[| Qreg
; Dreg
|]]],
1168 Long
, "vmovl", elts_same_1, su_8_32;
1172 [Instruction_name
["vtbl"];
1173 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1174 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbl1", table_2, [U8
; S8
; P8
];
1175 Vtbl
2, [Instruction_name
["vtbl"]],
1176 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbl2", table_2,
1178 Vtbl
3, [Instruction_name
["vtbl"]],
1179 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbl3", table_2,
1181 Vtbl
4, [Instruction_name
["vtbl"]],
1182 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbl4", table_2,
1185 (* Extended table lookup. *)
1187 [Instruction_name
["vtbx"];
1188 Disassembles_as
[Use_operands
[| Dreg
; VecArray
(1, Dreg
); Dreg
|]]],
1189 Use_operands
[| Dreg
; Dreg
; Dreg
|], "vtbx1", table_io, [U8
; S8
; P8
];
1190 Vtbx
2, [Instruction_name
["vtbx"]],
1191 Use_operands
[| Dreg
; VecArray
(2, Dreg
); Dreg
|], "vtbx2", table_io,
1193 Vtbx
3, [Instruction_name
["vtbx"]],
1194 Use_operands
[| Dreg
; VecArray
(3, Dreg
); Dreg
|], "vtbx3", table_io,
1196 Vtbx
4, [Instruction_name
["vtbx"]],
1197 Use_operands
[| Dreg
; VecArray
(4, Dreg
); Dreg
|], "vtbx4", table_io,
1200 (* Multiply, lane. (note: these were undocumented at the time of
1202 Vmul_lane
, [], By_scalar Dreg
, "vmul_lane", sign_invar_2_lane,
1203 [S16
; S32
; U16
; U32
; F32
];
1204 Vmul_lane
, [], By_scalar Qreg
, "vmulQ_lane", sign_invar_2_lane,
1205 [S16
; S32
; U16
; U32
; F32
];
1207 (* Multiply-accumulate, lane. *)
1208 Vmla_lane
, [], By_scalar Dreg
, "vmla_lane", sign_invar_io_lane,
1209 [S16
; S32
; U16
; U32
; F32
];
1210 Vmla_lane
, [], By_scalar Qreg
, "vmlaQ_lane", sign_invar_io_lane,
1211 [S16
; S32
; U16
; U32
; F32
];
1212 Vmla_lane
, [], Wide_lane
, "vmlal_lane", elts_same_io_lane,
1213 [S16
; S32
; U16
; U32
];
1214 Vmla_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlal_lane",
1215 elts_same_io_lane, [S16
; S32
];
1217 (* Multiply-subtract, lane. *)
1218 Vmls_lane
, [], By_scalar Dreg
, "vmls_lane", sign_invar_io_lane,
1219 [S16
; S32
; U16
; U32
; F32
];
1220 Vmls_lane
, [], By_scalar Qreg
, "vmlsQ_lane", sign_invar_io_lane,
1221 [S16
; S32
; U16
; U32
; F32
];
1222 Vmls_lane
, [], Wide_lane
, "vmlsl_lane", elts_same_io_lane,
1223 [S16
; S32
; U16
; U32
];
1224 Vmls_lane
, [Saturating
; Doubling
], Wide_lane
, "vqdmlsl_lane",
1225 elts_same_io_lane, [S16
; S32
];
1227 (* Long multiply, lane. *)
1229 Wide_lane
, "vmull_lane", elts_same_2_lane, [S16
; S32
; U16
; U32
];
1231 (* Saturating doubling long multiply, lane. *)
1232 Vqdmull_lane
, [Saturating
; Doubling
],
1233 Wide_lane
, "vqdmull_lane", elts_same_2_lane, [S16
; S32
];
1235 (* Saturating doubling long multiply high, lane. *)
1236 Vqdmulh_lane
, [Saturating
; Halving
],
1237 By_scalar Qreg
, "vqdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1238 Vqdmulh_lane
, [Saturating
; Halving
],
1239 By_scalar Dreg
, "vqdmulh_lane", elts_same_2_lane, [S16
; S32
];
1240 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1241 Instruction_name
["vqrdmulh"]],
1242 By_scalar Qreg
, "vqRdmulhQ_lane", elts_same_2_lane, [S16
; S32
];
1243 Vqdmulh_lane
, [Saturating
; Halving
; Rounding
;
1244 Instruction_name
["vqrdmulh"]],
1245 By_scalar Dreg
, "vqRdmulh_lane", elts_same_2_lane, [S16
; S32
];
1247 (* Vector multiply by scalar. *)
1249 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1250 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmul_n",
1251 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1253 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1254 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmulQ_n",
1255 sign_invar_2, [S16
; S32
; U16
; U32
; F32
];
1257 (* Vector long multiply by scalar. *)
1258 Vmull_n
, [Instruction_name
["vmull"];
1259 Disassembles_as
[Use_operands
[| Qreg
; Dreg
; Element_of_dreg
|]]],
1260 Wide_scalar
, "vmull_n",
1261 elts_same_2, [S16
; S32
; U16
; U32
];
1263 (* Vector saturating doubling long multiply by scalar. *)
1264 Vqdmull_n
, [Saturating
; Doubling
;
1265 Disassembles_as
[Use_operands
[| Qreg
; Dreg
;
1266 Element_of_dreg
|]]],
1267 Wide_scalar
, "vqdmull_n",
1268 elts_same_2, [S16
; S32
];
1270 (* Vector saturating doubling long multiply high by scalar. *)
1272 [Saturating
; Halving
; InfoWord
;
1273 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1274 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1275 "vqdmulhQ_n", elts_same_2, [S16
; S32
];
1277 [Saturating
; Halving
; InfoWord
;
1278 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1279 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1280 "vqdmulh_n", elts_same_2, [S16
; S32
];
1282 [Saturating
; Halving
; Rounding
; InfoWord
;
1283 Instruction_name
["vqrdmulh"];
1284 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1285 Use_operands
[| Qreg
; Qreg
; Corereg
|],
1286 "vqRdmulhQ_n", elts_same_2, [S16
; S32
];
1288 [Saturating
; Halving
; Rounding
; InfoWord
;
1289 Instruction_name
["vqrdmulh"];
1290 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1291 Use_operands
[| Dreg
; Dreg
; Corereg
|],
1292 "vqRdmulh_n", elts_same_2, [S16
; S32
];
1294 (* Vector multiply-accumulate by scalar. *)
1296 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1297 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmla_n",
1298 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1300 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1301 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlaQ_n",
1302 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1303 Vmla_n
, [], Wide_scalar
, "vmlal_n", elts_same_io, [S16
; S32
; U16
; U32
];
1304 Vmla_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlal_n", elts_same_io,
1307 (* Vector multiply subtract by scalar. *)
1309 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Element_of_dreg
|]]],
1310 Use_operands
[| Dreg
; Dreg
; Corereg
|], "vmls_n",
1311 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1313 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Element_of_dreg
|]]],
1314 Use_operands
[| Qreg
; Qreg
; Corereg
|], "vmlsQ_n",
1315 sign_invar_io, [S16
; S32
; U16
; U32
; F32
];
1316 Vmls_n
, [], Wide_scalar
, "vmlsl_n", elts_same_io, [S16
; S32
; U16
; U32
];
1317 Vmls_n
, [Saturating
; Doubling
], Wide_scalar
, "vqdmlsl_n", elts_same_io,
1320 (* Vector extract. *)
1321 Vext
, [Const_valuator
(fun _
-> 0)],
1322 Use_operands
[| Dreg
; Dreg
; Dreg
; Immed
|], "vext", extend,
1324 Vext
, [Const_valuator
(fun _
-> 0)],
1325 Use_operands
[| Qreg
; Qreg
; Qreg
; Immed
|], "vextQ", extend,
1328 (* Reverse elements. *)
1329 Vrev64
, [], All
(2, Dreg
), "vrev64", bits_1, P8
:: P16
:: F32
:: su_8_32;
1330 Vrev64
, [], All
(2, Qreg
), "vrev64Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1331 Vrev32
, [], All
(2, Dreg
), "vrev32", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1332 Vrev32
, [], All
(2, Qreg
), "vrev32Q", bits_1, [P8
; P16
; S8
; U8
; S16
; U16
];
1333 Vrev16
, [], All
(2, Dreg
), "vrev16", bits_1, [P8
; S8
; U8
];
1334 Vrev16
, [], All
(2, Qreg
), "vrev16Q", bits_1, [P8
; S8
; U8
];
1336 (* Bit selection. *)
1338 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1339 Disassembles_as
[Use_operands
[| Dreg
; Dreg
; Dreg
|]]],
1340 Use_operands
[| Dreg
; Dreg
; Dreg
; Dreg
|], "vbsl", bit_select,
1343 [Instruction_name
["vbsl"; "vbit"; "vbif"];
1344 Disassembles_as
[Use_operands
[| Qreg
; Qreg
; Qreg
|]]],
1345 Use_operands
[| Qreg
; Qreg
; Qreg
; Qreg
|], "vbslQ", bit_select,
1348 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1349 generating good code for intrinsics which return structure types --
1350 builtins work well by themselves (and understand that the values being
1351 stored on e.g. the stack also reside in registers, so can optimise the
1352 stores away entirely if the results are used immediately), but
1353 intrinsics are very much less efficient. Maybe something can be improved
1354 re: inlining, or tweaking the ABI used for intrinsics (a special call
1357 (* LLVM LOCAL begin Use return by value instead of ReturnPtr. *)
1358 Vtrn
, [], Use_operands
[| VecArray
(2, Dreg
); Dreg
; Dreg
|],
1359 "vtrn", bits_2, pf_su_8_32;
1360 Vtrn
, [], Use_operands
[| VecArray
(2, Qreg
); Qreg
; Qreg
|],
1361 "vtrnQ", bits_2, pf_su_8_32;
1364 Vzip
, [], Use_operands
[| VecArray
(2, Dreg
); Dreg
; Dreg
|],
1365 "vzip", bits_2, pf_su_8_32;
1366 Vzip
, [], Use_operands
[| VecArray
(2, Qreg
); Qreg
; Qreg
|],
1367 "vzipQ", bits_2, pf_su_8_32;
1369 (* Unzip elements. *)
1370 Vuzp
, [], Use_operands
[| VecArray
(2, Dreg
); Dreg
; Dreg
|],
1371 "vuzp", bits_2, pf_su_8_32;
1372 Vuzp
, [], Use_operands
[| VecArray
(2, Qreg
); Qreg
; Qreg
|],
1373 "vuzpQ", bits_2, pf_su_8_32;
1374 (* LLVM LOCAL end Use return by value instead of ReturnPtr. *)
1376 (* Element/structure loads. VLD1 variants. *)
1378 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1379 CstPtrTo Corereg
|]]],
1380 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1", bits_1,
1382 Vldx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1383 CstPtrTo Corereg
|]]],
1384 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q", bits_1,
1388 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1389 CstPtrTo Corereg
|]]],
1390 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1391 "vld1_lane", bits_3, pf_su_8_32;
1393 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1394 CstPtrTo Corereg
|]];
1395 Const_valuator
(fun _
-> 0)],
1396 Use_operands
[| Dreg
; CstPtrTo Corereg
; Dreg
; Immed
|],
1397 "vld1_lane", bits_3, [S64
; U64
];
1399 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1400 CstPtrTo Corereg
|]]],
1401 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1402 "vld1Q_lane", bits_3, pf_su_8_32;
1404 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1405 CstPtrTo Corereg
|]]],
1406 Use_operands
[| Qreg
; CstPtrTo Corereg
; Qreg
; Immed
|],
1407 "vld1Q_lane", bits_3, [S64
; U64
];
1410 [Disassembles_as
[Use_operands
[| VecArray
(1, All_elements_of_dreg
);
1411 CstPtrTo Corereg
|]]],
1412 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1415 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1416 CstPtrTo Corereg
|]]],
1417 Use_operands
[| Dreg
; CstPtrTo Corereg
|], "vld1_dup",
1420 [Disassembles_as
[Use_operands
[| VecArray
(2, All_elements_of_dreg
);
1421 CstPtrTo Corereg
|]]],
1422 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1425 [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1426 CstPtrTo Corereg
|]]],
1427 Use_operands
[| Qreg
; CstPtrTo Corereg
|], "vld1Q_dup",
1430 (* VST1 variants. *)
1431 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1433 Use_operands
[| PtrTo Corereg
; Dreg
|], "vst1",
1434 store_1, pf_su_8_64;
1435 Vstx
1, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1437 Use_operands
[| PtrTo Corereg
; Qreg
|], "vst1Q",
1438 store_1, pf_su_8_64;
1441 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1442 CstPtrTo Corereg
|]]],
1443 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1444 "vst1_lane", store_3, pf_su_8_32;
1446 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1447 CstPtrTo Corereg
|]];
1448 Const_valuator
(fun _
-> 0)],
1449 Use_operands
[| PtrTo Corereg
; Dreg
; Immed
|],
1450 "vst1_lane", store_3, [U64
; S64
];
1452 [Disassembles_as
[Use_operands
[| VecArray
(1, Element_of_dreg
);
1453 CstPtrTo Corereg
|]]],
1454 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1455 "vst1Q_lane", store_3, pf_su_8_32;
1457 [Disassembles_as
[Use_operands
[| VecArray
(1, Dreg
);
1458 CstPtrTo Corereg
|]]],
1459 Use_operands
[| PtrTo Corereg
; Qreg
; Immed
|],
1460 "vst1Q_lane", store_3, [U64
; S64
];
1462 (* VLD2 variants. *)
1463 Vldx
2, [], Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1464 "vld2", bits_1, pf_su_8_32;
1465 Vldx
2, [Instruction_name
["vld1"]],
1466 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1467 "vld2", bits_1, [S64
; U64
];
1468 Vldx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1469 CstPtrTo Corereg
|];
1470 Use_operands
[| VecArray
(2, Dreg
);
1471 CstPtrTo Corereg
|]]],
1472 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
|],
1473 "vld2Q", bits_1, pf_su_8_32;
1476 [Disassembles_as
[Use_operands
1477 [| VecArray
(2, Element_of_dreg
);
1478 CstPtrTo Corereg
|]]],
1479 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
;
1480 VecArray
(2, Dreg
); Immed
|],
1481 "vld2_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1483 [Disassembles_as
[Use_operands
1484 [| VecArray
(2, Element_of_dreg
);
1485 CstPtrTo Corereg
|]]],
1486 Use_operands
[| VecArray
(2, Qreg
); CstPtrTo Corereg
;
1487 VecArray
(2, Qreg
); Immed
|],
1488 "vld2Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1491 [Disassembles_as
[Use_operands
1492 [| VecArray
(2, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1493 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1494 "vld2_dup", bits_1, pf_su_8_32;
1496 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1497 [| VecArray
(2, Dreg
); CstPtrTo Corereg
|]]],
1498 Use_operands
[| VecArray
(2, Dreg
); CstPtrTo Corereg
|],
1499 "vld2_dup", bits_1, [S64
; U64
];
1501 (* VST2 variants. *)
1502 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1504 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1505 store_1, pf_su_8_32;
1506 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1508 Instruction_name
["vst1"]],
1509 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
) |], "vst2",
1510 store_1, [S64
; U64
];
1511 Vstx
2, [Disassembles_as
[Use_operands
[| VecArray
(2, Dreg
);
1513 Use_operands
[| VecArray
(2, Dreg
);
1515 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
) |], "vst2Q",
1516 store_1, pf_su_8_32;
1519 [Disassembles_as
[Use_operands
1520 [| VecArray
(2, Element_of_dreg
);
1521 CstPtrTo Corereg
|]]],
1522 Use_operands
[| PtrTo Corereg
; VecArray
(2, Dreg
); Immed
|], "vst2_lane",
1523 store_3, P8
:: P16
:: F32
:: su_8_32;
1525 [Disassembles_as
[Use_operands
1526 [| VecArray
(2, Element_of_dreg
);
1527 CstPtrTo Corereg
|]]],
1528 Use_operands
[| PtrTo Corereg
; VecArray
(2, Qreg
); Immed
|], "vst2Q_lane",
1529 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1531 (* VLD3 variants. *)
1532 Vldx
3, [], Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1533 "vld3", bits_1, pf_su_8_32;
1534 Vldx
3, [Instruction_name
["vld1"]],
1535 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1536 "vld3", bits_1, [S64
; U64
];
1537 Vldx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1538 CstPtrTo Corereg
|];
1539 Use_operands
[| VecArray
(3, Dreg
);
1540 CstPtrTo Corereg
|]]],
1541 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
|],
1542 "vld3Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1545 [Disassembles_as
[Use_operands
1546 [| VecArray
(3, Element_of_dreg
);
1547 CstPtrTo Corereg
|]]],
1548 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
;
1549 VecArray
(3, Dreg
); Immed
|],
1550 "vld3_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1552 [Disassembles_as
[Use_operands
1553 [| VecArray
(3, Element_of_dreg
);
1554 CstPtrTo Corereg
|]]],
1555 Use_operands
[| VecArray
(3, Qreg
); CstPtrTo Corereg
;
1556 VecArray
(3, Qreg
); Immed
|],
1557 "vld3Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1560 [Disassembles_as
[Use_operands
1561 [| VecArray
(3, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1562 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1563 "vld3_dup", bits_1, pf_su_8_32;
1565 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1566 [| VecArray
(3, Dreg
); CstPtrTo Corereg
|]]],
1567 Use_operands
[| VecArray
(3, Dreg
); CstPtrTo Corereg
|],
1568 "vld3_dup", bits_1, [S64
; U64
];
1570 (* VST3 variants. *)
1571 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1573 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1574 store_1, pf_su_8_32;
1575 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1577 Instruction_name
["vst1"]],
1578 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
) |], "vst3",
1579 store_1, [S64
; U64
];
1580 Vstx
3, [Disassembles_as
[Use_operands
[| VecArray
(3, Dreg
);
1582 Use_operands
[| VecArray
(3, Dreg
);
1584 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
) |], "vst3Q",
1585 store_1, pf_su_8_32;
1588 [Disassembles_as
[Use_operands
1589 [| VecArray
(3, Element_of_dreg
);
1590 CstPtrTo Corereg
|]]],
1591 Use_operands
[| PtrTo Corereg
; VecArray
(3, Dreg
); Immed
|], "vst3_lane",
1592 store_3, P8
:: P16
:: F32
:: su_8_32;
1594 [Disassembles_as
[Use_operands
1595 [| VecArray
(3, Element_of_dreg
);
1596 CstPtrTo Corereg
|]]],
1597 Use_operands
[| PtrTo Corereg
; VecArray
(3, Qreg
); Immed
|], "vst3Q_lane",
1598 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1600 (* VLD4/VST4 variants. *)
1601 Vldx
4, [], Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1602 "vld4", bits_1, pf_su_8_32;
1603 Vldx
4, [Instruction_name
["vld1"]],
1604 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1605 "vld4", bits_1, [S64
; U64
];
1606 Vldx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1607 CstPtrTo Corereg
|];
1608 Use_operands
[| VecArray
(4, Dreg
);
1609 CstPtrTo Corereg
|]]],
1610 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
|],
1611 "vld4Q", bits_1, P8
:: P16
:: F32
:: su_8_32;
1614 [Disassembles_as
[Use_operands
1615 [| VecArray
(4, Element_of_dreg
);
1616 CstPtrTo Corereg
|]]],
1617 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
;
1618 VecArray
(4, Dreg
); Immed
|],
1619 "vld4_lane", bits_3, P8
:: P16
:: F32
:: su_8_32;
1621 [Disassembles_as
[Use_operands
1622 [| VecArray
(4, Element_of_dreg
);
1623 CstPtrTo Corereg
|]]],
1624 Use_operands
[| VecArray
(4, Qreg
); CstPtrTo Corereg
;
1625 VecArray
(4, Qreg
); Immed
|],
1626 "vld4Q_lane", bits_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1629 [Disassembles_as
[Use_operands
1630 [| VecArray
(4, All_elements_of_dreg
); CstPtrTo Corereg
|]]],
1631 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1632 "vld4_dup", bits_1, pf_su_8_32;
1634 [Instruction_name
["vld1"]; Disassembles_as
[Use_operands
1635 [| VecArray
(4, Dreg
); CstPtrTo Corereg
|]]],
1636 Use_operands
[| VecArray
(4, Dreg
); CstPtrTo Corereg
|],
1637 "vld4_dup", bits_1, [S64
; U64
];
1639 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1641 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1642 store_1, pf_su_8_32;
1643 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1645 Instruction_name
["vst1"]],
1646 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
) |], "vst4",
1647 store_1, [S64
; U64
];
1648 Vstx
4, [Disassembles_as
[Use_operands
[| VecArray
(4, Dreg
);
1650 Use_operands
[| VecArray
(4, Dreg
);
1652 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
) |], "vst4Q",
1653 store_1, pf_su_8_32;
1656 [Disassembles_as
[Use_operands
1657 [| VecArray
(4, Element_of_dreg
);
1658 CstPtrTo Corereg
|]]],
1659 Use_operands
[| PtrTo Corereg
; VecArray
(4, Dreg
); Immed
|], "vst4_lane",
1660 store_3, P8
:: P16
:: F32
:: su_8_32;
1662 [Disassembles_as
[Use_operands
1663 [| VecArray
(4, Element_of_dreg
);
1664 CstPtrTo Corereg
|]]],
1665 Use_operands
[| PtrTo Corereg
; VecArray
(4, Qreg
); Immed
|], "vst4Q_lane",
1666 store_3, [P16
; F32
; U16
; U32
; S16
; S32
];
1668 (* Logical operations. And. *)
1669 Vand
, [], All
(3, Dreg
), "vand", notype_2, su_8_64;
1670 Vand
, [], All
(3, Qreg
), "vandQ", notype_2, su_8_64;
1673 Vorr
, [], All
(3, Dreg
), "vorr", notype_2, su_8_64;
1674 Vorr
, [], All
(3, Qreg
), "vorrQ", notype_2, su_8_64;
1677 Veor
, [], All
(3, Dreg
), "veor", notype_2, su_8_64;
1678 Veor
, [], All
(3, Qreg
), "veorQ", notype_2, su_8_64;
1680 (* Bic (And-not). *)
1681 Vbic
, [], All
(3, Dreg
), "vbic", notype_2, su_8_64;
1682 Vbic
, [], All
(3, Qreg
), "vbicQ", notype_2, su_8_64;
1685 Vorn
, [], All
(3, Dreg
), "vorn", notype_2, su_8_64;
1686 Vorn
, [], All
(3, Qreg
), "vornQ", notype_2, su_8_64;
1690 let elems = P8
:: P16
:: F32
:: su_8_64 in
1693 let types = List.fold_right
1694 (fun convfrom acc
->
1695 if convfrom
<> convto
then
1696 Cast
(convto
, convfrom
) :: acc
1702 let dconv = Vreinterp
, [No_op
], Use_operands
[| Dreg
; Dreg
|],
1703 "vreinterpret", conv_1, types
1704 and qconv
= Vreinterp
, [No_op
], Use_operands
[| Qreg
; Qreg
|],
1705 "vreinterpretQ", conv_1, types in
1706 dconv :: qconv
:: acc
)
1710 (* Output routines. *)
1712 let rec string_of_elt = function
1713 S8
-> "s8" | S16
-> "s16" | S32
-> "s32" | S64
-> "s64"
1714 | U8
-> "u8" | U16
-> "u16" | U32
-> "u32" | U64
-> "u64"
1715 | I8
-> "i8" | I16
-> "i16" | I32
-> "i32" | I64
-> "i64"
1716 | B8
-> "8" | B16
-> "16" | B32
-> "32" | B64
-> "64"
1717 | F32
-> "f32" | P8
-> "p8" | P16
-> "p16"
1718 | Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"_" ^
string_of_elt b
1719 | NoElts
-> failwith
"No elts"
1721 let string_of_elt_dots elt =
1723 Conv
(a
, b
) | Cast
(a
, b
) -> string_of_elt a ^
"." ^
string_of_elt b
1724 | _
-> string_of_elt elt
1726 let string_of_vectype vt
=
1727 let rec name affix
= function
1728 T_int8x8
-> affix
"int8x8"
1729 | T_int8x16
-> affix
"int8x16"
1730 | T_int16x4
-> affix
"int16x4"
1731 | T_int16x8
-> affix
"int16x8"
1732 | T_int32x2
-> affix
"int32x2"
1733 | T_int32x4
-> affix
"int32x4"
1734 | T_int64x1
-> affix
"int64x1"
1735 | T_int64x2
-> affix
"int64x2"
1736 | T_uint8x8
-> affix
"uint8x8"
1737 | T_uint8x16
-> affix
"uint8x16"
1738 | T_uint16x4
-> affix
"uint16x4"
1739 | T_uint16x8
-> affix
"uint16x8"
1740 | T_uint32x2
-> affix
"uint32x2"
1741 | T_uint32x4
-> affix
"uint32x4"
1742 | T_uint64x1
-> affix
"uint64x1"
1743 | T_uint64x2
-> affix
"uint64x2"
1744 | T_float32x2
-> affix
"float32x2"
1745 | T_float32x4
-> affix
"float32x4"
1746 | T_poly8x8
-> affix
"poly8x8"
1747 | T_poly8x16
-> affix
"poly8x16"
1748 | T_poly16x4
-> affix
"poly16x4"
1749 | T_poly16x8
-> affix
"poly16x8"
1750 | T_int8
-> affix
"int8"
1751 | T_int16
-> affix
"int16"
1752 | T_int32
-> affix
"int32"
1753 | T_int64
-> affix
"int64"
1754 | T_uint8
-> affix
"uint8"
1755 | T_uint16
-> affix
"uint16"
1756 | T_uint32
-> affix
"uint32"
1757 | T_uint64
-> affix
"uint64"
1758 | T_poly8
-> affix
"poly8"
1759 | T_poly16
-> affix
"poly16"
1760 | T_float32
-> affix
"float32"
1761 | T_immediate _
-> "const int"
1763 | T_intQI
-> "__builtin_neon_qi"
1764 | T_intHI
-> "__builtin_neon_hi"
1765 | T_intSI
-> "__builtin_neon_si"
1766 | T_intDI
-> "__builtin_neon_di"
1767 | T_arrayof
(num
, base
) ->
1768 let basename = name (fun x
-> x
) base
in
1769 affix
(Printf.sprintf
"%sx%d" basename num
)
1771 let basename = name affix x
in
1772 Printf.sprintf
"%s *" basename
1774 let basename = name affix x
in
1775 Printf.sprintf
"const %s" basename
1777 name (fun x
-> x ^
"_t") vt
1779 (* LLVM LOCAL begin Print builtin type names that include the vector type. *)
1780 let string_of_inttype = function
1781 B_TId8mode
-> "__builtin_neon_v8qi2"
1782 | B_TId16mode
-> "__builtin_neon_v4hi2"
1783 | B_TId32mode
-> "__builtin_neon_v2si2"
1784 | B_TId64mode
-> "__builtin_neon_v1di2"
1785 | B_TIdSFmode
-> "__builtin_neon_v2sf2"
1786 | B_EId8mode
-> "__builtin_neon_v8qi3"
1787 | B_EId16mode
-> "__builtin_neon_v4hi3"
1788 | B_EId32mode
-> "__builtin_neon_v2si3"
1789 | B_EId64mode
-> "__builtin_neon_v1di3"
1790 | B_EIdSFmode
-> "__builtin_neon_v2sf3"
1791 | B_OId8mode
-> "__builtin_neon_v8qi4"
1792 | B_OId16mode
-> "__builtin_neon_v4hi4"
1793 | B_OId32mode
-> "__builtin_neon_v2si4"
1794 | B_OId64mode
-> "__builtin_neon_v1di4"
1795 | B_OIdSFmode
-> "__builtin_neon_v2sf4"
1796 | B_OIq8mode
-> "__builtin_neon_v16qi2"
1797 | B_OIq16mode
-> "__builtin_neon_v8hi2"
1798 | B_OIq32mode
-> "__builtin_neon_v4si2"
1799 | B_OIq64mode
-> "__builtin_neon_v2di2"
1800 | B_OIqSFmode
-> "__builtin_neon_v4sf2"
1801 | B_CIq8mode
-> "__builtin_neon_v16qi3"
1802 | B_CIq16mode
-> "__builtin_neon_v8hi3"
1803 | B_CIq32mode
-> "__builtin_neon_v4si3"
1804 | B_CIq64mode
-> "__builtin_neon_v2di3"
1805 | B_CIqSFmode
-> "__builtin_neon_v4sf3"
1806 | B_XIq8mode
-> "__builtin_neon_v16qi4"
1807 | B_XIq16mode
-> "__builtin_neon_v8hi4"
1808 | B_XIq32mode
-> "__builtin_neon_v4si4"
1809 | B_XIq64mode
-> "__builtin_neon_v2di4"
1810 | B_XIqSFmode
-> "__builtin_neon_v4sf4"
1811 (* LLVM LOCAL end Print builtin type names that include the vector type. *)
1813 let string_of_mode = function
1814 V8QI
-> "v8qi" | V4HI
-> "v4hi" | V2SI
-> "v2si" | V2SF
-> "v2sf"
1815 | DI
-> "di" | V16QI
-> "v16qi" | V8HI
-> "v8hi" | V4SI
-> "v4si"
1816 | V4SF
-> "v4sf" | V2DI
-> "v2di" | QI
-> "qi" | HI
-> "hi" | SI
-> "si"
1818 | SF
-> "sf" | V1DI
-> "v1di"
1820 (* Use uppercase chars for letters which form part of the intrinsic name, but
1821 should be omitted from the builtin name (the info is passed in an extra
1822 argument, instead). *)
1823 let intrinsic_name name = String.lowercase
name
1825 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1826 found in the features list. *)
1827 let builtin_name features
name =
1828 let name = List.fold_right
1831 Flipped x
| Builtin_name x
-> x
1834 let islower x
= let str = String.make
1 x
in (String.lowercase
str) = str
1835 and buf
= Buffer.create
(String.length
name) in
1836 String.iter
(fun c
-> if islower c
then Buffer.add_char buf c
) name;
1839 (* Transform an arity into a list of strings. *)
1840 let strings_of_arity a
=
1842 | Arity0 vt
-> [string_of_vectype vt
]
1843 | Arity1
(vt1
, vt2
) -> [string_of_vectype vt1
; string_of_vectype vt2
]
1844 | Arity2
(vt1
, vt2
, vt3
) -> [string_of_vectype vt1
;
1845 string_of_vectype vt2
;
1846 string_of_vectype vt3
]
1847 | Arity3
(vt1
, vt2
, vt3
, vt4
) -> [string_of_vectype vt1
;
1848 string_of_vectype vt2
;
1849 string_of_vectype vt3
;
1850 string_of_vectype vt4
]
1851 | Arity4
(vt1
, vt2
, vt3
, vt4
, vt5
) -> [string_of_vectype vt1
;
1852 string_of_vectype vt2
;
1853 string_of_vectype vt3
;
1854 string_of_vectype vt4
;
1855 string_of_vectype vt5
]
1857 (* Suffixes on the end of builtin names that are to be stripped in order
1858 to obtain the name used as an instruction. They are only stripped if
1859 preceded immediately by an underscore. *)
1860 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1862 (* Get the possible names of an instruction corresponding to a "name" from the
1863 ops table. This is done by getting the equivalent builtin name and
1864 stripping any suffixes from the list at the top of this file, unless
1865 the features list presents with an Instruction_name entry, in which
1866 case that is used; or unless the features list presents with a Flipped
1867 entry, in which case that is used. If both such entries are present,
1868 the first in the list will be chosen. *)
1869 let get_insn_names features
name =
1872 match List.find
(fun feature
-> match feature
with
1873 Instruction_name _
-> true
1875 | _
-> false) features
1877 Instruction_name
names -> names
1878 | Flipped
name -> [name]
1881 with Not_found
-> [builtin_name features
name]
1884 List.map
(fun name'
->
1886 let underscore = String.rindex
name' '_'
in
1887 let our_suffix = String.sub
name'
(underscore + 1)
1888 ((String.length
name'
) - underscore - 1)
1890 let rec strip remaining_suffixes
=
1891 match remaining_suffixes
with
1893 | s
::ss
when our_suffix = s
-> String.sub
name'
0 underscore
1896 strip suffixes_to_strip
1897 with (Not_found
| Invalid_argument _
) -> name'
) names
1900 (* Apply a function to each element of a list and then comma-separate
1901 the resulting strings. *)
1902 let rec commas f elts acc
=
1905 | [elt] -> acc ^
(f
elt)
1907 commas f elts
(acc ^
(f
elt) ^
", ")
1909 (* Given a list of features and the shape specified in the "ops" table, apply
1910 a function to each possible shape that the instruction may have.
1911 By default, this is the "shape" entry in "ops". If the features list
1912 contains a Disassembles_as entry, the shapes contained in that entry are
1913 mapped to corresponding outputs and returned in a list. If there is more
1914 than one Disassembles_as entry, only the first is used. *)
1915 let analyze_all_shapes features
shape f
=
1917 match List.find
(fun feature
->
1918 match feature
with Disassembles_as _
-> true
1921 Disassembles_as shapes
-> List.map f shapes
1923 with Not_found
-> [f
shape]