Merge from mainline (154736:156693)
[official-gcc/graphite-test-results.git] / gcc / config / arm / neon.ml
blobf77f05cc825537186024ebf552d214a74ab5a9ba
1 (* Common code for ARM NEON header file, documentation and test case
2 generators.
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
44 | T_int8 | T_int16
45 | T_int32 | T_int64
46 | T_uint8 | T_uint16
47 | T_uint32 | T_uint64
48 | T_poly8 | T_poly16
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
51 | T_void | T_intQI
52 | T_intHI | T_intSI
53 | T_intDI | T_floatSF
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71 | Alternatives of shape_elt list (* Used for multiple valid operands *)
73 type shape_form = All of int * shape_elt
74 | Long
75 | Long_noreg of shape_elt
76 | Wide
77 | Wide_noreg of shape_elt
78 | Narrow
79 | Long_imm
80 | Narrow_imm
81 | Binary_imm of shape_elt
82 | Use_operands of shape_elt array
83 | By_scalar of shape_elt
84 | Unary_scalar of shape_elt
85 | Wide_lane
86 | Wide_scalar
87 | Pair_result of shape_elt
89 type arity = Arity0 of vectype
90 | Arity1 of vectype * vectype
91 | Arity2 of vectype * vectype * vectype
92 | Arity3 of vectype * vectype * vectype * vectype
93 | Arity4 of vectype * vectype * vectype * vectype * vectype
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96 | V16QI | V8HI | V4SI | V4SF | V2DI
97 | QI | HI | SI | SF
99 type opcode =
100 (* Binary ops. *)
101 Vadd
102 | Vmul
103 | Vmla
104 | Vmls
105 | Vsub
106 | Vceq
107 | Vcge
108 | Vcgt
109 | Vcle
110 | Vclt
111 | Vcage
112 | Vcagt
113 | Vcale
114 | Vcalt
115 | Vtst
116 | Vabd
117 | Vaba
118 | Vmax
119 | Vmin
120 | Vpadd
121 | Vpada
122 | Vpmax
123 | Vpmin
124 | Vrecps
125 | Vrsqrts
126 | Vshl
127 | Vshr_n
128 | Vshl_n
129 | Vsra_n
130 | Vsri
131 | Vsli
132 (* Logic binops. *)
133 | Vand
134 | Vorr
135 | Veor
136 | Vbic
137 | Vorn
138 | Vbsl
139 (* Ops with scalar. *)
140 | Vmul_lane
141 | Vmla_lane
142 | Vmls_lane
143 | Vmul_n
144 | Vmla_n
145 | Vmls_n
146 | Vmull_n
147 | Vmull_lane
148 | Vqdmull_n
149 | Vqdmull_lane
150 | Vqdmulh_n
151 | Vqdmulh_lane
152 (* Unary ops. *)
153 | Vabs
154 | Vneg
155 | Vcls
156 | Vclz
157 | Vcnt
158 | Vrecpe
159 | Vrsqrte
160 | Vmvn
161 (* Vector extract. *)
162 | Vext
163 (* Reverse elements. *)
164 | Vrev64
165 | Vrev32
166 | Vrev16
167 (* Transposition ops. *)
168 | Vtrn
169 | Vzip
170 | Vuzp
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
172 | Vldx of int
173 | Vstx of int
174 | Vldx_lane of int
175 | Vldx_dup of int
176 | Vstx_lane of int
177 (* Set/extract lanes from a vector. *)
178 | Vget_lane
179 | Vset_lane
180 (* Initialize vector from bit pattern. *)
181 | Vcreate
182 (* Set all lanes to same value. *)
183 | Vdup_n
184 | Vmov_n (* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
186 | Vdup_lane
187 (* Combine vectors. *)
188 | Vcombine
189 (* Get quadword high/low parts. *)
190 | Vget_high
191 | Vget_low
192 (* Convert vectors. *)
193 | Vcvt
194 | Vcvt_n
195 (* Narrow/lengthen vectors. *)
196 | Vmovn
197 | Vmovl
198 (* Table lookup. *)
199 | Vtbl of int
200 | Vtbx of int
201 (* Reinterpret casts. *)
202 | Vreinterp
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
207 type features =
208 Halving
209 | Rounding
210 | Saturating
211 | Dst_unsign
212 | High_half
213 | Doubling
214 | Flipped of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as of shape_form list
224 | Builtin_name of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
227 names. *)
228 | Instruction_name of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
231 | No_op
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator of (int -> int)
237 | Fixed_return_reg
239 exception MixedMode of elts * elts
241 let rec elt_width = function
242 S8 | U8 | P8 | I8 | B8 -> 8
243 | S16 | U16 | P16 | I16 | B16 -> 16
244 | S32 | F32 | U32 | I32 | B32 -> 32
245 | S64 | U64 | I64 | B64 -> 64
246 | Conv (a, b) ->
247 let wa = elt_width a and wb = elt_width b in
248 if wa = wb then wa else failwith "element width?"
249 | Cast (a, b) -> raise (MixedMode (a, b))
250 | NoElts -> failwith "No elts"
252 let rec elt_class = function
253 S8 | S16 | S32 | S64 -> Signed
254 | U8 | U16 | U32 | U64 -> Unsigned
255 | P8 | P16 -> Poly
256 | F32 -> Float
257 | I8 | I16 | I32 | I64 -> Int
258 | B8 | B16 | B32 | B64 -> Bits
259 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
260 | NoElts -> NoType
262 let elt_of_class_width c w =
263 match c, w with
264 Signed, 8 -> S8
265 | Signed, 16 -> S16
266 | Signed, 32 -> S32
267 | Signed, 64 -> S64
268 | Float, 32 -> F32
269 | Unsigned, 8 -> U8
270 | Unsigned, 16 -> U16
271 | Unsigned, 32 -> U32
272 | Unsigned, 64 -> U64
273 | Poly, 8 -> P8
274 | Poly, 16 -> P16
275 | Int, 8 -> I8
276 | Int, 16 -> I16
277 | Int, 32 -> I32
278 | Int, 64 -> I64
279 | Bits, 8 -> B8
280 | Bits, 16 -> B16
281 | Bits, 32 -> B32
282 | Bits, 64 -> B64
283 | _ -> failwith "Bad element type"
285 (* Return unsigned integer element the same width as argument. *)
286 let unsigned_of_elt elt =
287 elt_of_class_width Unsigned (elt_width elt)
289 let signed_of_elt elt =
290 elt_of_class_width Signed (elt_width elt)
292 (* Return untyped bits element the same width as argument. *)
293 let bits_of_elt elt =
294 elt_of_class_width Bits (elt_width elt)
296 let non_signed_variant = function
297 S8 -> I8
298 | S16 -> I16
299 | S32 -> I32
300 | S64 -> I64
301 | U8 -> I8
302 | U16 -> I16
303 | U32 -> I32
304 | U64 -> I64
305 | x -> x
307 let poly_unsigned_variant v =
308 let elclass = match elt_class v with
309 Poly -> Unsigned
310 | x -> x in
311 elt_of_class_width elclass (elt_width v)
313 let widen_elt elt =
314 let w = elt_width elt
315 and c = elt_class elt in
316 elt_of_class_width c (w * 2)
318 let narrow_elt elt =
319 let w = elt_width elt
320 and c = elt_class elt in
321 elt_of_class_width c (w / 2)
323 (* If we're trying to find a mode from a "Use_operands" instruction, use the
324 last vector operand as the dominant mode used to invoke the correct builtin.
325 We must stick to this rule in neon.md. *)
326 let find_key_operand operands =
327 let rec scan opno =
328 match operands.(opno) with
329 Qreg -> Qreg
330 | Dreg -> Dreg
331 | VecArray (_, Qreg) -> Qreg
332 | VecArray (_, Dreg) -> Dreg
333 | _ -> scan (opno-1)
335 scan ((Array.length operands) - 1)
337 let rec mode_of_elt elt shape =
338 let flt = match elt_class elt with
339 Float | ConvClass(_, Float) -> true | _ -> false in
340 let idx =
341 match elt_width elt with
342 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
343 | _ -> failwith "Bad element width"
344 in match shape with
345 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
346 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
347 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
348 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
349 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
350 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
351 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
352 [| QI; HI; if flt then SF else SI; DI |].(idx)
353 | Long | Wide | Wide_lane | Wide_scalar
354 | Long_imm ->
355 [| V8QI; V4HI; V2SI; DI |].(idx)
356 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
357 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
358 | _ -> failwith "invalid shape"
360 (* Modify an element type dependent on the shape of the instruction and the
361 operand number. *)
363 let shapemap shape no =
364 let ident = fun x -> x in
365 match shape with
366 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
367 | Binary_imm _ -> ident
368 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
369 [| widen_elt; ident; ident |].(no)
370 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
371 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
372 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
374 (* Register type (D/Q) of an operand, based on shape and operand number. *)
376 let regmap shape no =
377 match shape with
378 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
379 | Long -> [| Qreg; Dreg; Dreg |].(no)
380 | Wide -> [| Qreg; Qreg; Dreg |].(no)
381 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
382 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
383 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
384 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
385 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
386 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
387 | Binary_imm reg -> [| reg; reg; Immed |].(no)
388 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
389 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
390 | Use_operands these -> these.(no)
392 let type_for_elt shape elt no =
393 let elt = (shapemap shape no) elt in
394 let reg = regmap shape no in
395 let rec type_for_reg_elt reg elt =
396 match reg with
397 Dreg ->
398 begin match elt with
399 S8 -> T_int8x8
400 | S16 -> T_int16x4
401 | S32 -> T_int32x2
402 | S64 -> T_int64x1
403 | U8 -> T_uint8x8
404 | U16 -> T_uint16x4
405 | U32 -> T_uint32x2
406 | U64 -> T_uint64x1
407 | F32 -> T_float32x2
408 | P8 -> T_poly8x8
409 | P16 -> T_poly16x4
410 | _ -> failwith "Bad elt type"
412 | Qreg ->
413 begin match elt with
414 S8 -> T_int8x16
415 | S16 -> T_int16x8
416 | S32 -> T_int32x4
417 | S64 -> T_int64x2
418 | U8 -> T_uint8x16
419 | U16 -> T_uint16x8
420 | U32 -> T_uint32x4
421 | U64 -> T_uint64x2
422 | F32 -> T_float32x4
423 | P8 -> T_poly8x16
424 | P16 -> T_poly16x8
425 | _ -> failwith "Bad elt type"
427 | Corereg ->
428 begin match elt with
429 S8 -> T_int8
430 | S16 -> T_int16
431 | S32 -> T_int32
432 | S64 -> T_int64
433 | U8 -> T_uint8
434 | U16 -> T_uint16
435 | U32 -> T_uint32
436 | U64 -> T_uint64
437 | P8 -> T_poly8
438 | P16 -> T_poly16
439 | F32 -> T_float32
440 | _ -> failwith "Bad elt type"
442 | Immed ->
443 T_immediate (0, 0)
444 | VecArray (num, sub) ->
445 T_arrayof (num, type_for_reg_elt sub elt)
446 | PtrTo x ->
447 T_ptrto (type_for_reg_elt x elt)
448 | CstPtrTo x ->
449 T_ptrto (T_const (type_for_reg_elt x elt))
450 (* Anything else is solely for the use of the test generator. *)
451 | _ -> assert false
453 type_for_reg_elt reg elt
455 (* Return size of a vector type, in bits. *)
456 let vectype_size = function
457 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
458 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
459 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
460 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
461 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
462 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
463 | _ -> raise Not_found
465 let inttype_for_array num elttype =
466 let eltsize = vectype_size elttype in
467 let numwords = (num * eltsize) / 32 in
468 match numwords with
469 4 -> B_TImode
470 | 6 -> B_EImode
471 | 8 -> B_OImode
472 | 12 -> B_CImode
473 | 16 -> B_XImode
474 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
476 (* These functions return pairs of (internal, external) types, where "internal"
477 types are those seen by GCC, and "external" are those seen by the assembler.
478 These types aren't necessarily the same, since the intrinsics can munge more
479 than one C type into each assembler opcode. *)
481 let make_sign_invariant func shape elt =
482 let arity, elt' = func shape elt in
483 arity, non_signed_variant elt'
485 (* Don't restrict any types. *)
487 let elts_same make_arity shape elt =
488 let vtype = type_for_elt shape elt in
489 make_arity vtype, elt
491 (* As sign_invar_*, but when sign matters. *)
492 let elts_same_io_lane =
493 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
495 let elts_same_io =
496 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
498 let elts_same_2_lane =
499 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
501 let elts_same_3 = elts_same_2_lane
503 let elts_same_2 =
504 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
506 let elts_same_1 =
507 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
509 (* Use for signed/unsigned invariant operations (i.e. where the operation
510 doesn't depend on the sign of the data. *)
512 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
513 let sign_invar_io = make_sign_invariant elts_same_io
514 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
515 let sign_invar_2 = make_sign_invariant elts_same_2
516 let sign_invar_1 = make_sign_invariant elts_same_1
518 (* Sign-sensitive comparison. *)
520 let cmp_sign_matters shape elt =
521 let vtype = type_for_elt shape elt
522 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
523 Arity2 (rtype, vtype 1, vtype 2), elt
525 (* Signed/unsigned invariant comparison. *)
527 let cmp_sign_invar shape elt =
528 let shape', elt' = cmp_sign_matters shape elt in
529 let elt'' =
530 match non_signed_variant elt' with
531 P8 -> I8
532 | x -> x
534 shape', elt''
536 (* Comparison (VTST) where only the element width matters. *)
538 let cmp_bits shape elt =
539 let vtype = type_for_elt shape elt
540 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
541 and bits_only = bits_of_elt elt in
542 Arity2 (rtype, vtype 1, vtype 2), bits_only
544 let reg_shift shape elt =
545 let vtype = type_for_elt shape elt
546 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
547 Arity2 (vtype 0, vtype 1, op2type), elt
549 (* Genericised constant-shift type-generating function. *)
551 let const_shift mkimm ?arity ?result shape elt =
552 let op2type = (shapemap shape 2) elt in
553 let op2width = elt_width op2type in
554 let op2 = mkimm op2width
555 and op1 = type_for_elt shape elt 1
556 and r_elt =
557 match result with
558 None -> elt
559 | Some restriction -> restriction elt in
560 let rtype = type_for_elt shape r_elt 0 in
561 match arity with
562 None -> Arity2 (rtype, op1, op2), elt
563 | Some mkarity -> mkarity rtype op1 op2, elt
565 (* Use for immediate right-shifts. *)
567 let shift_right shape elt =
568 const_shift (fun imm -> T_immediate (1, imm)) shape elt
570 let shift_right_acc shape elt =
571 const_shift (fun imm -> T_immediate (1, imm))
572 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
574 (* Use for immediate right-shifts when the operation doesn't care about
575 signedness. *)
577 let shift_right_sign_invar =
578 make_sign_invariant shift_right
580 (* Immediate right-shift; result is unsigned even when operand is signed. *)
582 let shift_right_to_uns shape elt =
583 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
584 shape elt
586 (* Immediate left-shift. *)
588 let shift_left shape elt =
589 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
591 (* Immediate left-shift, unsigned result. *)
593 let shift_left_to_uns shape elt =
594 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
595 shape elt
597 (* Immediate left-shift, don't care about signs. *)
599 let shift_left_sign_invar =
600 make_sign_invariant shift_left
602 (* Shift left/right and insert: only element size matters. *)
604 let shift_insert shape elt =
605 let arity, elt =
606 const_shift (fun imm -> T_immediate (1, imm))
607 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
608 arity, bits_of_elt elt
610 (* Get/set lane. *)
612 let get_lane shape elt =
613 let vtype = type_for_elt shape elt in
614 Arity2 (vtype 0, vtype 1, vtype 2),
615 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
617 let set_lane shape elt =
618 let vtype = type_for_elt shape elt in
619 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
621 let set_lane_notype shape elt =
622 let vtype = type_for_elt shape elt in
623 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
625 let create_vector shape elt =
626 let vtype = type_for_elt shape U64 1
627 and rtype = type_for_elt shape elt 0 in
628 Arity1 (rtype, vtype), elt
630 let conv make_arity shape elt =
631 let edest, esrc = match elt with
632 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
633 | _ -> failwith "Non-conversion element in conversion" in
634 let vtype = type_for_elt shape esrc
635 and rtype = type_for_elt shape edest 0 in
636 make_arity rtype vtype, elt
638 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
639 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
641 (* Operation has an unsigned result even if operands are signed. *)
643 let dst_unsign make_arity shape elt =
644 let vtype = type_for_elt shape elt
645 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
646 make_arity rtype vtype, elt
648 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
650 let make_bits_only func shape elt =
651 let arity, elt' = func shape elt in
652 arity, bits_of_elt elt'
654 (* Extend operation. *)
656 let extend shape elt =
657 let vtype = type_for_elt shape elt in
658 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
660 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
661 integer ops respectively, or unsigned for polynomial ops. *)
663 let table mkarity shape elt =
664 let vtype = type_for_elt shape elt in
665 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
666 mkarity vtype op2, bits_of_elt elt
668 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
669 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
671 (* Operations where only bits matter. *)
673 let bits_1 = make_bits_only elts_same_1
674 let bits_2 = make_bits_only elts_same_2
675 let bits_3 = make_bits_only elts_same_3
677 (* Store insns. *)
678 let store_1 shape elt =
679 let vtype = type_for_elt shape elt in
680 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
682 let store_3 shape elt =
683 let vtype = type_for_elt shape elt in
684 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
686 let make_notype func shape elt =
687 let arity, _ = func shape elt in
688 arity, NoElts
690 let notype_1 = make_notype elts_same_1
691 let notype_2 = make_notype elts_same_2
692 let notype_3 = make_notype elts_same_3
694 (* Bit-select operations (first operand is unsigned int). *)
696 let bit_select shape elt =
697 let vtype = type_for_elt shape elt
698 and itype = type_for_elt shape (unsigned_of_elt elt) in
699 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
701 (* Common lists of supported element types. *)
703 let su_8_32 = [S8; S16; S32; U8; U16; U32]
704 let su_8_64 = S64 :: U64 :: su_8_32
705 let su_16_64 = [S16; S32; S64; U16; U32; U64]
706 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
707 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
709 let ops =
711 (* Addition. *)
712 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
713 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
714 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
715 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
716 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
717 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
718 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
719 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
720 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
721 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
722 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
723 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
724 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
725 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
726 Narrow, "vRaddhn", sign_invar_2, su_16_64;
728 (* Multiplication. *)
729 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
730 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
731 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
732 elts_same_2, [S16; S32];
733 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
734 elts_same_2, [S16; S32];
735 Vmul,
736 [Saturating; Rounding; Doubling; High_half;
737 Instruction_name ["vqrdmulh"]],
738 All (3, Dreg), "vqRdmulh",
739 elts_same_2, [S16; S32];
740 Vmul,
741 [Saturating; Rounding; Doubling; High_half;
742 Instruction_name ["vqrdmulh"]],
743 All (3, Qreg), "vqRdmulhQ",
744 elts_same_2, [S16; S32];
745 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
746 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
748 (* Multiply-accumulate. *)
749 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
750 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
751 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
752 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
754 (* Multiply-subtract. *)
755 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
756 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
757 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
758 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
760 (* Subtraction. *)
761 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
762 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
763 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
764 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
765 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
766 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
767 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
768 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
769 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
770 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
771 Narrow, "vRsubhn", sign_invar_2, su_16_64;
773 (* Comparison, equal. *)
774 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
775 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
777 (* Comparison, greater-than or equal. *)
778 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
779 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
781 (* Comparison, less-than or equal. *)
782 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
783 F32 :: su_8_32;
784 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
785 All (3, Qreg), "vcleQ", cmp_sign_matters,
786 F32 :: su_8_32;
788 (* Comparison, greater-than. *)
789 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
790 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
792 (* Comparison, less-than. *)
793 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
794 F32 :: su_8_32;
795 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
796 All (3, Qreg), "vcltQ", cmp_sign_matters,
797 F32 :: su_8_32;
799 (* Compare absolute greater-than or equal. *)
800 Vcage, [Instruction_name ["vacge"]],
801 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
802 Vcage, [Instruction_name ["vacge"]],
803 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
805 (* Compare absolute less-than or equal. *)
806 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
807 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
808 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
809 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
811 (* Compare absolute greater-than or equal. *)
812 Vcagt, [Instruction_name ["vacgt"]],
813 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
814 Vcagt, [Instruction_name ["vacgt"]],
815 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
817 (* Compare absolute less-than or equal. *)
818 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
819 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
820 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
821 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
823 (* Test bits. *)
824 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
825 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
827 (* Absolute difference. *)
828 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
829 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
830 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
832 (* Absolute difference and accumulate. *)
833 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
834 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
835 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
837 (* Max. *)
838 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
839 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
841 (* Min. *)
842 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
843 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
845 (* Pairwise add. *)
846 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
847 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
848 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
850 (* Pairwise add, widen and accumulate. *)
851 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
852 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
854 (* Folding maximum, minimum. *)
855 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
856 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
858 (* Reciprocal step. *)
859 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
860 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
861 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
862 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
864 (* Vector shift left. *)
865 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
866 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
867 Vshl, [Instruction_name ["vrshl"]; Rounding],
868 All (3, Dreg), "vRshl", reg_shift, su_8_64;
869 Vshl, [Instruction_name ["vrshl"]; Rounding],
870 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
871 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
872 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
873 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
874 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
875 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
876 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
878 (* Vector shift right by constant. *)
879 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
880 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
881 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
882 "vRshr_n", shift_right, su_8_64;
883 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
884 "vRshrQ_n", shift_right, su_8_64;
885 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
886 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
887 shift_right_sign_invar, su_16_64;
888 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
889 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
890 "vqRshrn_n", shift_right, su_16_64;
891 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
892 shift_right_to_uns, [S16; S32; S64];
893 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
894 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
896 (* Vector shift left by constant. *)
897 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
898 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
899 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
900 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
901 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
902 shift_left_to_uns, [S8; S16; S32; S64];
903 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
904 shift_left_to_uns, [S8; S16; S32; S64];
905 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
907 (* Vector shift right by constant and accumulate. *)
908 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
909 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
910 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
911 "vRsra_n", shift_right_acc, su_8_64;
912 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
913 "vRsraQ_n", shift_right_acc, su_8_64;
915 (* Vector shift right and insert. *)
916 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
917 P8 :: P16 :: su_8_64;
918 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
919 P8 :: P16 :: su_8_64;
921 (* Vector shift left and insert. *)
922 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
923 P8 :: P16 :: su_8_64;
924 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
925 P8 :: P16 :: su_8_64;
927 (* Absolute value. *)
928 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
929 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
930 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
931 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
933 (* Negate. *)
934 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
935 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
936 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
937 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
939 (* Bitwise not. *)
940 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
941 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
943 (* Count leading sign bits. *)
944 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
945 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
947 (* Count leading zeros. *)
948 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
949 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
951 (* Count number of set bits. *)
952 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
953 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
955 (* Reciprocal estimate. *)
956 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
957 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
959 (* Reciprocal square-root estimate. *)
960 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
961 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
963 (* Get lanes from a vector. *)
964 Vget_lane,
965 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
966 Instruction_name ["vmov"]],
967 Use_operands [| Corereg; Dreg; Immed |],
968 "vget_lane", get_lane, pf_su_8_32;
969 Vget_lane,
970 [InfoWord;
971 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
972 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
973 Use_operands [| Corereg; Dreg; Immed |],
974 "vget_lane", notype_2, [S64; U64];
975 Vget_lane,
976 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
977 Instruction_name ["vmov"]],
978 Use_operands [| Corereg; Qreg; Immed |],
979 "vgetQ_lane", get_lane, pf_su_8_32;
980 Vget_lane,
981 [InfoWord;
982 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
983 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
984 Use_operands [| Corereg; Qreg; Immed |],
985 "vgetQ_lane", notype_2, [S64; U64];
987 (* Set lanes in a vector. *)
988 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
989 Instruction_name ["vmov"]],
990 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
991 set_lane, pf_su_8_32;
992 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
993 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
994 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
995 set_lane_notype, [S64; U64];
996 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
997 Instruction_name ["vmov"]],
998 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
999 set_lane, pf_su_8_32;
1000 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1001 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1002 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1003 set_lane_notype, [S64; U64];
1005 (* Create vector from literal bit pattern. *)
1006 Vcreate,
1007 [No_op], (* Not really, but it can yield various things that are too
1008 hard for the test generator at this time. *)
1009 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1010 pf_su_8_64;
1012 (* Set all lanes to the same value. *)
1013 Vdup_n,
1014 [Disassembles_as [Use_operands [| Dreg;
1015 Alternatives [ Corereg;
1016 Element_of_dreg ] |]]],
1017 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1018 pf_su_8_32;
1019 Vdup_n,
1020 [Instruction_name ["vmov"];
1021 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1022 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1023 [S64; U64];
1024 Vdup_n,
1025 [Disassembles_as [Use_operands [| Qreg;
1026 Alternatives [ Corereg;
1027 Element_of_dreg ] |]]],
1028 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1029 pf_su_8_32;
1030 Vdup_n,
1031 [Instruction_name ["vmov"];
1032 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1033 Use_operands [| Dreg; Corereg; Corereg |]]],
1034 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1035 [S64; U64];
1037 (* These are just aliases for the above. *)
1038 Vmov_n,
1039 [Builtin_name "vdup_n";
1040 Disassembles_as [Use_operands [| Dreg;
1041 Alternatives [ Corereg;
1042 Element_of_dreg ] |]]],
1043 Use_operands [| Dreg; Corereg |],
1044 "vmov_n", bits_1, pf_su_8_32;
1045 Vmov_n,
1046 [Builtin_name "vdup_n";
1047 Instruction_name ["vmov"];
1048 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1049 Use_operands [| Dreg; Corereg |],
1050 "vmov_n", notype_1, [S64; U64];
1051 Vmov_n,
1052 [Builtin_name "vdupQ_n";
1053 Disassembles_as [Use_operands [| Qreg;
1054 Alternatives [ Corereg;
1055 Element_of_dreg ] |]]],
1056 Use_operands [| Qreg; Corereg |],
1057 "vmovQ_n", bits_1, pf_su_8_32;
1058 Vmov_n,
1059 [Builtin_name "vdupQ_n";
1060 Instruction_name ["vmov"];
1061 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1062 Use_operands [| Dreg; Corereg; Corereg |]]],
1063 Use_operands [| Qreg; Corereg |],
1064 "vmovQ_n", notype_1, [S64; U64];
1066 (* Duplicate, lane version. We can't use Use_operands here because the
1067 rightmost register (always Dreg) would be picked up by find_key_operand,
1068 when we want the leftmost register to be used in this case (otherwise
1069 the modes are indistinguishable in neon.md, etc. *)
1070 Vdup_lane,
1071 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1072 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1073 Vdup_lane,
1074 [No_op; Const_valuator (fun _ -> 0)],
1075 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1076 Vdup_lane,
1077 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1078 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1079 Vdup_lane,
1080 [No_op; Const_valuator (fun _ -> 0)],
1081 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1083 (* Combining vectors. *)
1084 Vcombine, [No_op],
1085 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1086 pf_su_8_64;
1088 (* Splitting vectors. *)
1089 Vget_high, [No_op],
1090 Use_operands [| Dreg; Qreg |], "vget_high",
1091 notype_1, pf_su_8_64;
1092 Vget_low, [Instruction_name ["vmov"];
1093 Disassembles_as [Use_operands [| Dreg; Dreg |]];
1094 Fixed_return_reg],
1095 Use_operands [| Dreg; Qreg |], "vget_low",
1096 notype_1, pf_su_8_32;
1097 Vget_low, [No_op],
1098 Use_operands [| Dreg; Qreg |], "vget_low",
1099 notype_1, [S64; U64];
1101 (* Conversions. *)
1102 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1103 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1104 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1105 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1106 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1107 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1108 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1109 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1111 (* Move, narrowing. *)
1112 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1113 Narrow, "vmovn", sign_invar_1, su_16_64;
1114 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1115 Narrow, "vqmovn", elts_same_1, su_16_64;
1116 Vmovn,
1117 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1118 Narrow, "vqmovun", dst_unsign_1,
1119 [S16; S32; S64];
1121 (* Move, long. *)
1122 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1123 Long, "vmovl", elts_same_1, su_8_32;
1125 (* Table lookup. *)
1126 Vtbl 1,
1127 [Instruction_name ["vtbl"];
1128 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1129 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1130 Vtbl 2, [Instruction_name ["vtbl"]],
1131 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1132 [U8; S8; P8];
1133 Vtbl 3, [Instruction_name ["vtbl"]],
1134 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1135 [U8; S8; P8];
1136 Vtbl 4, [Instruction_name ["vtbl"]],
1137 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1138 [U8; S8; P8];
1140 (* Extended table lookup. *)
1141 Vtbx 1,
1142 [Instruction_name ["vtbx"];
1143 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1144 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1145 Vtbx 2, [Instruction_name ["vtbx"]],
1146 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1147 [U8; S8; P8];
1148 Vtbx 3, [Instruction_name ["vtbx"]],
1149 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1150 [U8; S8; P8];
1151 Vtbx 4, [Instruction_name ["vtbx"]],
1152 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1153 [U8; S8; P8];
1155 (* Multiply, lane. (note: these were undocumented at the time of
1156 writing). *)
1157 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1158 [S16; S32; U16; U32; F32];
1159 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1160 [S16; S32; U16; U32; F32];
1162 (* Multiply-accumulate, lane. *)
1163 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1164 [S16; S32; U16; U32; F32];
1165 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1166 [S16; S32; U16; U32; F32];
1167 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1168 [S16; S32; U16; U32];
1169 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1170 elts_same_io_lane, [S16; S32];
1172 (* Multiply-subtract, lane. *)
1173 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1174 [S16; S32; U16; U32; F32];
1175 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1176 [S16; S32; U16; U32; F32];
1177 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1178 [S16; S32; U16; U32];
1179 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1180 elts_same_io_lane, [S16; S32];
1182 (* Long multiply, lane. *)
1183 Vmull_lane, [],
1184 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1186 (* Saturating doubling long multiply, lane. *)
1187 Vqdmull_lane, [Saturating; Doubling],
1188 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1190 (* Saturating doubling long multiply high, lane. *)
1191 Vqdmulh_lane, [Saturating; Halving],
1192 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1193 Vqdmulh_lane, [Saturating; Halving],
1194 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1195 Vqdmulh_lane, [Saturating; Halving; Rounding;
1196 Instruction_name ["vqrdmulh"]],
1197 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1198 Vqdmulh_lane, [Saturating; Halving; Rounding;
1199 Instruction_name ["vqrdmulh"]],
1200 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1202 (* Vector multiply by scalar. *)
1203 Vmul_n, [InfoWord;
1204 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1205 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1206 sign_invar_2, [S16; S32; U16; U32; F32];
1207 Vmul_n, [InfoWord;
1208 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1209 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1210 sign_invar_2, [S16; S32; U16; U32; F32];
1212 (* Vector long multiply by scalar. *)
1213 Vmull_n, [Instruction_name ["vmull"];
1214 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1215 Wide_scalar, "vmull_n",
1216 elts_same_2, [S16; S32; U16; U32];
1218 (* Vector saturating doubling long multiply by scalar. *)
1219 Vqdmull_n, [Saturating; Doubling;
1220 Disassembles_as [Use_operands [| Qreg; Dreg;
1221 Element_of_dreg |]]],
1222 Wide_scalar, "vqdmull_n",
1223 elts_same_2, [S16; S32];
1225 (* Vector saturating doubling long multiply high by scalar. *)
1226 Vqdmulh_n,
1227 [Saturating; Halving; InfoWord;
1228 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1229 Use_operands [| Qreg; Qreg; Corereg |],
1230 "vqdmulhQ_n", elts_same_2, [S16; S32];
1231 Vqdmulh_n,
1232 [Saturating; Halving; InfoWord;
1233 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1234 Use_operands [| Dreg; Dreg; Corereg |],
1235 "vqdmulh_n", elts_same_2, [S16; S32];
1236 Vqdmulh_n,
1237 [Saturating; Halving; Rounding; InfoWord;
1238 Instruction_name ["vqrdmulh"];
1239 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1240 Use_operands [| Qreg; Qreg; Corereg |],
1241 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1242 Vqdmulh_n,
1243 [Saturating; Halving; Rounding; InfoWord;
1244 Instruction_name ["vqrdmulh"];
1245 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1246 Use_operands [| Dreg; Dreg; Corereg |],
1247 "vqRdmulh_n", elts_same_2, [S16; S32];
1249 (* Vector multiply-accumulate by scalar. *)
1250 Vmla_n, [InfoWord;
1251 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1252 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1253 sign_invar_io, [S16; S32; U16; U32; F32];
1254 Vmla_n, [InfoWord;
1255 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1256 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1257 sign_invar_io, [S16; S32; U16; U32; F32];
1258 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1259 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1260 [S16; S32];
1262 (* Vector multiply subtract by scalar. *)
1263 Vmls_n, [InfoWord;
1264 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1265 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1266 sign_invar_io, [S16; S32; U16; U32; F32];
1267 Vmls_n, [InfoWord;
1268 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1269 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1270 sign_invar_io, [S16; S32; U16; U32; F32];
1271 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1272 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1273 [S16; S32];
1275 (* Vector extract. *)
1276 Vext, [Const_valuator (fun _ -> 0)],
1277 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1278 pf_su_8_64;
1279 Vext, [Const_valuator (fun _ -> 0)],
1280 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1281 pf_su_8_64;
1283 (* Reverse elements. *)
1284 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1285 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1286 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1287 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1288 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1289 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1291 (* Bit selection. *)
1292 Vbsl,
1293 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1294 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1295 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1296 pf_su_8_64;
1297 Vbsl,
1298 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1299 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1300 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1301 pf_su_8_64;
1303 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1304 generating good code for intrinsics which return structure types --
1305 builtins work well by themselves (and understand that the values being
1306 stored on e.g. the stack also reside in registers, so can optimise the
1307 stores away entirely if the results are used immediately), but
1308 intrinsics are very much less efficient. Maybe something can be improved
1309 re: inlining, or tweaking the ABI used for intrinsics (a special call
1310 attribute?).
1312 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1313 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1315 (* Zip elements. *)
1316 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1317 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1319 (* Unzip elements. *)
1320 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1321 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1323 (* Element/structure loads. VLD1 variants. *)
1324 Vldx 1,
1325 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1326 CstPtrTo Corereg |]]],
1327 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1328 pf_su_8_64;
1329 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1330 CstPtrTo Corereg |]]],
1331 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1332 pf_su_8_64;
1334 Vldx_lane 1,
1335 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1336 CstPtrTo Corereg |]]],
1337 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1338 "vld1_lane", bits_3, pf_su_8_32;
1339 Vldx_lane 1,
1340 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1341 CstPtrTo Corereg |]];
1342 Const_valuator (fun _ -> 0)],
1343 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1344 "vld1_lane", bits_3, [S64; U64];
1345 Vldx_lane 1,
1346 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1347 CstPtrTo Corereg |]]],
1348 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1349 "vld1Q_lane", bits_3, pf_su_8_32;
1350 Vldx_lane 1,
1351 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1352 CstPtrTo Corereg |]]],
1353 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1354 "vld1Q_lane", bits_3, [S64; U64];
1356 Vldx_dup 1,
1357 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1358 CstPtrTo Corereg |]]],
1359 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1360 bits_1, pf_su_8_32;
1361 Vldx_dup 1,
1362 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1363 CstPtrTo Corereg |]]],
1364 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1365 bits_1, [S64; U64];
1366 Vldx_dup 1,
1367 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1368 CstPtrTo Corereg |]]],
1369 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1370 bits_1, pf_su_8_32;
1371 Vldx_dup 1,
1372 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1373 CstPtrTo Corereg |]]],
1374 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1375 bits_1, [S64; U64];
1377 (* VST1 variants. *)
1378 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1379 PtrTo Corereg |]]],
1380 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1381 store_1, pf_su_8_64;
1382 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1383 PtrTo Corereg |]]],
1384 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1385 store_1, pf_su_8_64;
1387 Vstx_lane 1,
1388 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1389 CstPtrTo Corereg |]]],
1390 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1391 "vst1_lane", store_3, pf_su_8_32;
1392 Vstx_lane 1,
1393 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1394 CstPtrTo Corereg |]];
1395 Const_valuator (fun _ -> 0)],
1396 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1397 "vst1_lane", store_3, [U64; S64];
1398 Vstx_lane 1,
1399 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1400 CstPtrTo Corereg |]]],
1401 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1402 "vst1Q_lane", store_3, pf_su_8_32;
1403 Vstx_lane 1,
1404 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1405 CstPtrTo Corereg |]]],
1406 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1407 "vst1Q_lane", store_3, [U64; S64];
1409 (* VLD2 variants. *)
1410 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1411 "vld2", bits_1, pf_su_8_32;
1412 Vldx 2, [Instruction_name ["vld1"]],
1413 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1414 "vld2", bits_1, [S64; U64];
1415 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1416 CstPtrTo Corereg |];
1417 Use_operands [| VecArray (2, Dreg);
1418 CstPtrTo Corereg |]]],
1419 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1420 "vld2Q", bits_1, pf_su_8_32;
1422 Vldx_lane 2,
1423 [Disassembles_as [Use_operands
1424 [| VecArray (2, Element_of_dreg);
1425 CstPtrTo Corereg |]]],
1426 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1427 VecArray (2, Dreg); Immed |],
1428 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1429 Vldx_lane 2,
1430 [Disassembles_as [Use_operands
1431 [| VecArray (2, Element_of_dreg);
1432 CstPtrTo Corereg |]]],
1433 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1434 VecArray (2, Qreg); Immed |],
1435 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1437 Vldx_dup 2,
1438 [Disassembles_as [Use_operands
1439 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1440 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1441 "vld2_dup", bits_1, pf_su_8_32;
1442 Vldx_dup 2,
1443 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1444 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1445 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1446 "vld2_dup", bits_1, [S64; U64];
1448 (* VST2 variants. *)
1449 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1450 PtrTo Corereg |]]],
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1452 store_1, pf_su_8_32;
1453 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1454 PtrTo Corereg |]];
1455 Instruction_name ["vst1"]],
1456 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1457 store_1, [S64; U64];
1458 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1459 PtrTo Corereg |];
1460 Use_operands [| VecArray (2, Dreg);
1461 PtrTo Corereg |]]],
1462 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1463 store_1, pf_su_8_32;
1465 Vstx_lane 2,
1466 [Disassembles_as [Use_operands
1467 [| VecArray (2, Element_of_dreg);
1468 CstPtrTo Corereg |]]],
1469 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1470 store_3, P8 :: P16 :: F32 :: su_8_32;
1471 Vstx_lane 2,
1472 [Disassembles_as [Use_operands
1473 [| VecArray (2, Element_of_dreg);
1474 CstPtrTo Corereg |]]],
1475 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1476 store_3, [P16; F32; U16; U32; S16; S32];
1478 (* VLD3 variants. *)
1479 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1480 "vld3", bits_1, pf_su_8_32;
1481 Vldx 3, [Instruction_name ["vld1"]],
1482 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1483 "vld3", bits_1, [S64; U64];
1484 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1485 CstPtrTo Corereg |];
1486 Use_operands [| VecArray (3, Dreg);
1487 CstPtrTo Corereg |]]],
1488 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1489 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1491 Vldx_lane 3,
1492 [Disassembles_as [Use_operands
1493 [| VecArray (3, Element_of_dreg);
1494 CstPtrTo Corereg |]]],
1495 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1496 VecArray (3, Dreg); Immed |],
1497 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1498 Vldx_lane 3,
1499 [Disassembles_as [Use_operands
1500 [| VecArray (3, Element_of_dreg);
1501 CstPtrTo Corereg |]]],
1502 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1503 VecArray (3, Qreg); Immed |],
1504 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1506 Vldx_dup 3,
1507 [Disassembles_as [Use_operands
1508 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1509 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1510 "vld3_dup", bits_1, pf_su_8_32;
1511 Vldx_dup 3,
1512 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1513 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1514 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1515 "vld3_dup", bits_1, [S64; U64];
1517 (* VST3 variants. *)
1518 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1519 PtrTo Corereg |]]],
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1521 store_1, pf_su_8_32;
1522 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1523 PtrTo Corereg |]];
1524 Instruction_name ["vst1"]],
1525 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1526 store_1, [S64; U64];
1527 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1528 PtrTo Corereg |];
1529 Use_operands [| VecArray (3, Dreg);
1530 PtrTo Corereg |]]],
1531 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1532 store_1, pf_su_8_32;
1534 Vstx_lane 3,
1535 [Disassembles_as [Use_operands
1536 [| VecArray (3, Element_of_dreg);
1537 CstPtrTo Corereg |]]],
1538 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1539 store_3, P8 :: P16 :: F32 :: su_8_32;
1540 Vstx_lane 3,
1541 [Disassembles_as [Use_operands
1542 [| VecArray (3, Element_of_dreg);
1543 CstPtrTo Corereg |]]],
1544 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1545 store_3, [P16; F32; U16; U32; S16; S32];
1547 (* VLD4/VST4 variants. *)
1548 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1549 "vld4", bits_1, pf_su_8_32;
1550 Vldx 4, [Instruction_name ["vld1"]],
1551 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1552 "vld4", bits_1, [S64; U64];
1553 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1554 CstPtrTo Corereg |];
1555 Use_operands [| VecArray (4, Dreg);
1556 CstPtrTo Corereg |]]],
1557 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1558 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1560 Vldx_lane 4,
1561 [Disassembles_as [Use_operands
1562 [| VecArray (4, Element_of_dreg);
1563 CstPtrTo Corereg |]]],
1564 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1565 VecArray (4, Dreg); Immed |],
1566 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1567 Vldx_lane 4,
1568 [Disassembles_as [Use_operands
1569 [| VecArray (4, Element_of_dreg);
1570 CstPtrTo Corereg |]]],
1571 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1572 VecArray (4, Qreg); Immed |],
1573 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1575 Vldx_dup 4,
1576 [Disassembles_as [Use_operands
1577 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1578 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1579 "vld4_dup", bits_1, pf_su_8_32;
1580 Vldx_dup 4,
1581 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1582 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1583 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1584 "vld4_dup", bits_1, [S64; U64];
1586 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1587 PtrTo Corereg |]]],
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1589 store_1, pf_su_8_32;
1590 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1591 PtrTo Corereg |]];
1592 Instruction_name ["vst1"]],
1593 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1594 store_1, [S64; U64];
1595 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1596 PtrTo Corereg |];
1597 Use_operands [| VecArray (4, Dreg);
1598 PtrTo Corereg |]]],
1599 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1600 store_1, pf_su_8_32;
1602 Vstx_lane 4,
1603 [Disassembles_as [Use_operands
1604 [| VecArray (4, Element_of_dreg);
1605 CstPtrTo Corereg |]]],
1606 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1607 store_3, P8 :: P16 :: F32 :: su_8_32;
1608 Vstx_lane 4,
1609 [Disassembles_as [Use_operands
1610 [| VecArray (4, Element_of_dreg);
1611 CstPtrTo Corereg |]]],
1612 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1613 store_3, [P16; F32; U16; U32; S16; S32];
1615 (* Logical operations. And. *)
1616 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
1617 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1619 (* Or. *)
1620 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
1621 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1623 (* Eor. *)
1624 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
1625 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1627 (* Bic (And-not). *)
1628 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
1629 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1631 (* Or-not. *)
1632 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
1633 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1636 let reinterp =
1637 let elems = P8 :: P16 :: F32 :: su_8_64 in
1638 List.fold_right
1639 (fun convto acc ->
1640 let types = List.fold_right
1641 (fun convfrom acc ->
1642 if convfrom <> convto then
1643 Cast (convto, convfrom) :: acc
1644 else
1645 acc)
1646 elems
1649 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1650 "vreinterpret", conv_1, types
1651 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1652 "vreinterpretQ", conv_1, types in
1653 dconv :: qconv :: acc)
1654 elems
1657 (* Output routines. *)
1659 let rec string_of_elt = function
1660 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1661 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1662 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1663 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1664 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1665 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1666 | NoElts -> failwith "No elts"
1668 let string_of_elt_dots elt =
1669 match elt with
1670 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1671 | _ -> string_of_elt elt
1673 let string_of_vectype vt =
1674 let rec name affix = function
1675 T_int8x8 -> affix "int8x8"
1676 | T_int8x16 -> affix "int8x16"
1677 | T_int16x4 -> affix "int16x4"
1678 | T_int16x8 -> affix "int16x8"
1679 | T_int32x2 -> affix "int32x2"
1680 | T_int32x4 -> affix "int32x4"
1681 | T_int64x1 -> affix "int64x1"
1682 | T_int64x2 -> affix "int64x2"
1683 | T_uint8x8 -> affix "uint8x8"
1684 | T_uint8x16 -> affix "uint8x16"
1685 | T_uint16x4 -> affix "uint16x4"
1686 | T_uint16x8 -> affix "uint16x8"
1687 | T_uint32x2 -> affix "uint32x2"
1688 | T_uint32x4 -> affix "uint32x4"
1689 | T_uint64x1 -> affix "uint64x1"
1690 | T_uint64x2 -> affix "uint64x2"
1691 | T_float32x2 -> affix "float32x2"
1692 | T_float32x4 -> affix "float32x4"
1693 | T_poly8x8 -> affix "poly8x8"
1694 | T_poly8x16 -> affix "poly8x16"
1695 | T_poly16x4 -> affix "poly16x4"
1696 | T_poly16x8 -> affix "poly16x8"
1697 | T_int8 -> affix "int8"
1698 | T_int16 -> affix "int16"
1699 | T_int32 -> affix "int32"
1700 | T_int64 -> affix "int64"
1701 | T_uint8 -> affix "uint8"
1702 | T_uint16 -> affix "uint16"
1703 | T_uint32 -> affix "uint32"
1704 | T_uint64 -> affix "uint64"
1705 | T_poly8 -> affix "poly8"
1706 | T_poly16 -> affix "poly16"
1707 | T_float32 -> affix "float32"
1708 | T_immediate _ -> "const int"
1709 | T_void -> "void"
1710 | T_intQI -> "__builtin_neon_qi"
1711 | T_intHI -> "__builtin_neon_hi"
1712 | T_intSI -> "__builtin_neon_si"
1713 | T_intDI -> "__builtin_neon_di"
1714 | T_floatSF -> "__builtin_neon_sf"
1715 | T_arrayof (num, base) ->
1716 let basename = name (fun x -> x) base in
1717 affix (Printf.sprintf "%sx%d" basename num)
1718 | T_ptrto x ->
1719 let basename = name affix x in
1720 Printf.sprintf "%s *" basename
1721 | T_const x ->
1722 let basename = name affix x in
1723 Printf.sprintf "const %s" basename
1725 name (fun x -> x ^ "_t") vt
1727 let string_of_inttype = function
1728 B_TImode -> "__builtin_neon_ti"
1729 | B_EImode -> "__builtin_neon_ei"
1730 | B_OImode -> "__builtin_neon_oi"
1731 | B_CImode -> "__builtin_neon_ci"
1732 | B_XImode -> "__builtin_neon_xi"
1734 let string_of_mode = function
1735 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1736 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1737 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1738 | SF -> "sf"
1740 (* Use uppercase chars for letters which form part of the intrinsic name, but
1741 should be omitted from the builtin name (the info is passed in an extra
1742 argument, instead). *)
1743 let intrinsic_name name = String.lowercase name
1745 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1746 found in the features list. *)
1747 let builtin_name features name =
1748 let name = List.fold_right
1749 (fun el name ->
1750 match el with
1751 Flipped x | Builtin_name x -> x
1752 | _ -> name)
1753 features name in
1754 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1755 and buf = Buffer.create (String.length name) in
1756 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1757 Buffer.contents buf
1759 (* Transform an arity into a list of strings. *)
1760 let strings_of_arity a =
1761 match a with
1762 | Arity0 vt -> [string_of_vectype vt]
1763 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1764 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1765 string_of_vectype vt2;
1766 string_of_vectype vt3]
1767 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1768 string_of_vectype vt2;
1769 string_of_vectype vt3;
1770 string_of_vectype vt4]
1771 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1772 string_of_vectype vt2;
1773 string_of_vectype vt3;
1774 string_of_vectype vt4;
1775 string_of_vectype vt5]
1777 (* Suffixes on the end of builtin names that are to be stripped in order
1778 to obtain the name used as an instruction. They are only stripped if
1779 preceded immediately by an underscore. *)
1780 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1782 (* Get the possible names of an instruction corresponding to a "name" from the
1783 ops table. This is done by getting the equivalent builtin name and
1784 stripping any suffixes from the list at the top of this file, unless
1785 the features list presents with an Instruction_name entry, in which
1786 case that is used; or unless the features list presents with a Flipped
1787 entry, in which case that is used. If both such entries are present,
1788 the first in the list will be chosen. *)
1789 let get_insn_names features name =
1790 let names = try
1791 begin
1792 match List.find (fun feature -> match feature with
1793 Instruction_name _ -> true
1794 | Flipped _ -> true
1795 | _ -> false) features
1796 with
1797 Instruction_name names -> names
1798 | Flipped name -> [name]
1799 | _ -> assert false
1801 with Not_found -> [builtin_name features name]
1803 begin
1804 List.map (fun name' ->
1806 let underscore = String.rindex name' '_' in
1807 let our_suffix = String.sub name' (underscore + 1)
1808 ((String.length name') - underscore - 1)
1810 let rec strip remaining_suffixes =
1811 match remaining_suffixes with
1812 [] -> name'
1813 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1814 | _::ss -> strip ss
1816 strip suffixes_to_strip
1817 with (Not_found | Invalid_argument _) -> name') names
1820 (* Apply a function to each element of a list and then comma-separate
1821 the resulting strings. *)
1822 let rec commas f elts acc =
1823 match elts with
1824 [] -> acc
1825 | [elt] -> acc ^ (f elt)
1826 | elt::elts ->
1827 commas f elts (acc ^ (f elt) ^ ", ")
1829 (* Given a list of features and the shape specified in the "ops" table, apply
1830 a function to each possible shape that the instruction may have.
1831 By default, this is the "shape" entry in "ops". If the features list
1832 contains a Disassembles_as entry, the shapes contained in that entry are
1833 mapped to corresponding outputs and returned in a list. If there is more
1834 than one Disassembles_as entry, only the first is used. *)
1835 let analyze_all_shapes features shape f =
1837 match List.find (fun feature ->
1838 match feature with Disassembles_as _ -> true
1839 | _ -> false)
1840 features with
1841 Disassembles_as shapes -> List.map f shapes
1842 | _ -> assert false
1843 with Not_found -> [f shape]