* neon.md (neon_vget_lanev2di): Use gen_lowpart and gen_highpart.
[official-gcc.git] / gcc / config / arm / neon.ml
blob677468876af00078af7c6b5fdd2e40a32f7d502e
1 (* Common code for ARM NEON header file, documentation and test case
2 generators.
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
44 | T_int8 | T_int16
45 | T_int32 | T_int64
46 | T_uint8 | T_uint16
47 | T_uint32 | T_uint64
48 | T_poly8 | T_poly16
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
51 | T_void | T_intQI
52 | T_intHI | T_intSI
53 | T_intDI | T_floatSF
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71 | Alternatives of shape_elt list (* Used for multiple valid operands *)
73 type shape_form = All of int * shape_elt
74 | Long
75 | Long_noreg of shape_elt
76 | Wide
77 | Wide_noreg of shape_elt
78 | Narrow
79 | Long_imm
80 | Narrow_imm
81 | Binary_imm of shape_elt
82 | Use_operands of shape_elt array
83 | By_scalar of shape_elt
84 | Unary_scalar of shape_elt
85 | Wide_lane
86 | Wide_scalar
87 | Pair_result of shape_elt
89 type arity = Arity0 of vectype
90 | Arity1 of vectype * vectype
91 | Arity2 of vectype * vectype * vectype
92 | Arity3 of vectype * vectype * vectype * vectype
93 | Arity4 of vectype * vectype * vectype * vectype * vectype
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
96 | V16QI | V8HI | V4SI | V4SF | V2DI
97 | QI | HI | SI | SF
99 type opcode =
100 (* Binary ops. *)
101 Vadd
102 | Vmul
103 | Vmla
104 | Vmls
105 | Vsub
106 | Vceq
107 | Vcge
108 | Vcgt
109 | Vcle
110 | Vclt
111 | Vcage
112 | Vcagt
113 | Vcale
114 | Vcalt
115 | Vtst
116 | Vabd
117 | Vaba
118 | Vmax
119 | Vmin
120 | Vpadd
121 | Vpada
122 | Vpmax
123 | Vpmin
124 | Vrecps
125 | Vrsqrts
126 | Vshl
127 | Vshr_n
128 | Vshl_n
129 | Vsra_n
130 | Vsri
131 | Vsli
132 (* Logic binops. *)
133 | Vand
134 | Vorr
135 | Veor
136 | Vbic
137 | Vorn
138 | Vbsl
139 (* Ops with scalar. *)
140 | Vmul_lane
141 | Vmla_lane
142 | Vmls_lane
143 | Vmul_n
144 | Vmla_n
145 | Vmls_n
146 | Vmull_n
147 | Vmull_lane
148 | Vqdmull_n
149 | Vqdmull_lane
150 | Vqdmulh_n
151 | Vqdmulh_lane
152 (* Unary ops. *)
153 | Vabs
154 | Vneg
155 | Vcls
156 | Vclz
157 | Vcnt
158 | Vrecpe
159 | Vrsqrte
160 | Vmvn
161 (* Vector extract. *)
162 | Vext
163 (* Reverse elements. *)
164 | Vrev64
165 | Vrev32
166 | Vrev16
167 (* Transposition ops. *)
168 | Vtrn
169 | Vzip
170 | Vuzp
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
172 | Vldx of int
173 | Vstx of int
174 | Vldx_lane of int
175 | Vldx_dup of int
176 | Vstx_lane of int
177 (* Set/extract lanes from a vector. *)
178 | Vget_lane
179 | Vset_lane
180 (* Initialize vector from bit pattern. *)
181 | Vcreate
182 (* Set all lanes to same value. *)
183 | Vdup_n
184 | Vmov_n (* Is this the same? *)
185 (* Duplicate scalar to all lanes of vector. *)
186 | Vdup_lane
187 (* Combine vectors. *)
188 | Vcombine
189 (* Get quadword high/low parts. *)
190 | Vget_high
191 | Vget_low
192 (* Convert vectors. *)
193 | Vcvt
194 | Vcvt_n
195 (* Narrow/lengthen vectors. *)
196 | Vmovn
197 | Vmovl
198 (* Table lookup. *)
199 | Vtbl of int
200 | Vtbx of int
201 (* Reinterpret casts. *)
202 | Vreinterp
204 (* Features used for documentation, to distinguish between some instruction
205 variants, and to signal special requirements (e.g. swapping arguments). *)
207 type features =
208 Halving
209 | Rounding
210 | Saturating
211 | Dst_unsign
212 | High_half
213 | Doubling
214 | Flipped of string (* Builtin name to use with flipped arguments. *)
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
216 for All _, Long, Wide, Narrow shape_forms. *)
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
218 (* A specification as to the shape of instruction expected upon
219 disassembly, used if it differs from the shape used to build the
220 intrinsic prototype. Multiple entries in the constructor's argument
221 indicate that the intrinsic expands to more than one assembly
222 instruction, each with a corresponding shape specified here. *)
223 | Disassembles_as of shape_form list
224 | Builtin_name of string (* Override the name of the builtin. *)
225 (* Override the name of the instruction. If more than one name
226 is specified, it means that the instruction can have any of those
227 names. *)
228 | Instruction_name of string list
229 (* Mark that the intrinsic yields no instructions, or expands to yield
230 behavior that the test generator cannot test. *)
231 | No_op
232 (* Mark that the intrinsic has constant arguments that cannot be set
233 to the defaults (zero for pointers and one otherwise) in the test
234 cases. The function supplied must return the integer to be written
235 into the testcase for the argument number (0-based) supplied to it. *)
236 | Const_valuator of (int -> int)
237 | Fixed_vector_reg
238 | Fixed_core_reg
240 exception MixedMode of elts * elts
242 let rec elt_width = function
243 S8 | U8 | P8 | I8 | B8 -> 8
244 | S16 | U16 | P16 | I16 | B16 -> 16
245 | S32 | F32 | U32 | I32 | B32 -> 32
246 | S64 | U64 | I64 | B64 -> 64
247 | Conv (a, b) ->
248 let wa = elt_width a and wb = elt_width b in
249 if wa = wb then wa else failwith "element width?"
250 | Cast (a, b) -> raise (MixedMode (a, b))
251 | NoElts -> failwith "No elts"
253 let rec elt_class = function
254 S8 | S16 | S32 | S64 -> Signed
255 | U8 | U16 | U32 | U64 -> Unsigned
256 | P8 | P16 -> Poly
257 | F32 -> Float
258 | I8 | I16 | I32 | I64 -> Int
259 | B8 | B16 | B32 | B64 -> Bits
260 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
261 | NoElts -> NoType
263 let elt_of_class_width c w =
264 match c, w with
265 Signed, 8 -> S8
266 | Signed, 16 -> S16
267 | Signed, 32 -> S32
268 | Signed, 64 -> S64
269 | Float, 32 -> F32
270 | Unsigned, 8 -> U8
271 | Unsigned, 16 -> U16
272 | Unsigned, 32 -> U32
273 | Unsigned, 64 -> U64
274 | Poly, 8 -> P8
275 | Poly, 16 -> P16
276 | Int, 8 -> I8
277 | Int, 16 -> I16
278 | Int, 32 -> I32
279 | Int, 64 -> I64
280 | Bits, 8 -> B8
281 | Bits, 16 -> B16
282 | Bits, 32 -> B32
283 | Bits, 64 -> B64
284 | _ -> failwith "Bad element type"
286 (* Return unsigned integer element the same width as argument. *)
287 let unsigned_of_elt elt =
288 elt_of_class_width Unsigned (elt_width elt)
290 let signed_of_elt elt =
291 elt_of_class_width Signed (elt_width elt)
293 (* Return untyped bits element the same width as argument. *)
294 let bits_of_elt elt =
295 elt_of_class_width Bits (elt_width elt)
297 let non_signed_variant = function
298 S8 -> I8
299 | S16 -> I16
300 | S32 -> I32
301 | S64 -> I64
302 | U8 -> I8
303 | U16 -> I16
304 | U32 -> I32
305 | U64 -> I64
306 | x -> x
308 let poly_unsigned_variant v =
309 let elclass = match elt_class v with
310 Poly -> Unsigned
311 | x -> x in
312 elt_of_class_width elclass (elt_width v)
314 let widen_elt elt =
315 let w = elt_width elt
316 and c = elt_class elt in
317 elt_of_class_width c (w * 2)
319 let narrow_elt elt =
320 let w = elt_width elt
321 and c = elt_class elt in
322 elt_of_class_width c (w / 2)
324 (* If we're trying to find a mode from a "Use_operands" instruction, use the
325 last vector operand as the dominant mode used to invoke the correct builtin.
326 We must stick to this rule in neon.md. *)
327 let find_key_operand operands =
328 let rec scan opno =
329 match operands.(opno) with
330 Qreg -> Qreg
331 | Dreg -> Dreg
332 | VecArray (_, Qreg) -> Qreg
333 | VecArray (_, Dreg) -> Dreg
334 | _ -> scan (opno-1)
336 scan ((Array.length operands) - 1)
338 let rec mode_of_elt elt shape =
339 let flt = match elt_class elt with
340 Float | ConvClass(_, Float) -> true | _ -> false in
341 let idx =
342 match elt_width elt with
343 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
344 | _ -> failwith "Bad element width"
345 in match shape with
346 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
347 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
348 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
349 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
350 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
351 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
352 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
353 [| QI; HI; if flt then SF else SI; DI |].(idx)
354 | Long | Wide | Wide_lane | Wide_scalar
355 | Long_imm ->
356 [| V8QI; V4HI; V2SI; DI |].(idx)
357 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
358 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
359 | _ -> failwith "invalid shape"
361 (* Modify an element type dependent on the shape of the instruction and the
362 operand number. *)
364 let shapemap shape no =
365 let ident = fun x -> x in
366 match shape with
367 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
368 | Binary_imm _ -> ident
369 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
370 [| widen_elt; ident; ident |].(no)
371 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
372 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
373 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
375 (* Register type (D/Q) of an operand, based on shape and operand number. *)
377 let regmap shape no =
378 match shape with
379 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
380 | Long -> [| Qreg; Dreg; Dreg |].(no)
381 | Wide -> [| Qreg; Qreg; Dreg |].(no)
382 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
383 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
384 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
385 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
386 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
387 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
388 | Binary_imm reg -> [| reg; reg; Immed |].(no)
389 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
390 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
391 | Use_operands these -> these.(no)
393 let type_for_elt shape elt no =
394 let elt = (shapemap shape no) elt in
395 let reg = regmap shape no in
396 let rec type_for_reg_elt reg elt =
397 match reg with
398 Dreg ->
399 begin match elt with
400 S8 -> T_int8x8
401 | S16 -> T_int16x4
402 | S32 -> T_int32x2
403 | S64 -> T_int64x1
404 | U8 -> T_uint8x8
405 | U16 -> T_uint16x4
406 | U32 -> T_uint32x2
407 | U64 -> T_uint64x1
408 | F32 -> T_float32x2
409 | P8 -> T_poly8x8
410 | P16 -> T_poly16x4
411 | _ -> failwith "Bad elt type"
413 | Qreg ->
414 begin match elt with
415 S8 -> T_int8x16
416 | S16 -> T_int16x8
417 | S32 -> T_int32x4
418 | S64 -> T_int64x2
419 | U8 -> T_uint8x16
420 | U16 -> T_uint16x8
421 | U32 -> T_uint32x4
422 | U64 -> T_uint64x2
423 | F32 -> T_float32x4
424 | P8 -> T_poly8x16
425 | P16 -> T_poly16x8
426 | _ -> failwith "Bad elt type"
428 | Corereg ->
429 begin match elt with
430 S8 -> T_int8
431 | S16 -> T_int16
432 | S32 -> T_int32
433 | S64 -> T_int64
434 | U8 -> T_uint8
435 | U16 -> T_uint16
436 | U32 -> T_uint32
437 | U64 -> T_uint64
438 | P8 -> T_poly8
439 | P16 -> T_poly16
440 | F32 -> T_float32
441 | _ -> failwith "Bad elt type"
443 | Immed ->
444 T_immediate (0, 0)
445 | VecArray (num, sub) ->
446 T_arrayof (num, type_for_reg_elt sub elt)
447 | PtrTo x ->
448 T_ptrto (type_for_reg_elt x elt)
449 | CstPtrTo x ->
450 T_ptrto (T_const (type_for_reg_elt x elt))
451 (* Anything else is solely for the use of the test generator. *)
452 | _ -> assert false
454 type_for_reg_elt reg elt
456 (* Return size of a vector type, in bits. *)
457 let vectype_size = function
458 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
459 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
460 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
461 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
462 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
463 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
464 | _ -> raise Not_found
466 let inttype_for_array num elttype =
467 let eltsize = vectype_size elttype in
468 let numwords = (num * eltsize) / 32 in
469 match numwords with
470 4 -> B_TImode
471 | 6 -> B_EImode
472 | 8 -> B_OImode
473 | 12 -> B_CImode
474 | 16 -> B_XImode
475 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
477 (* These functions return pairs of (internal, external) types, where "internal"
478 types are those seen by GCC, and "external" are those seen by the assembler.
479 These types aren't necessarily the same, since the intrinsics can munge more
480 than one C type into each assembler opcode. *)
482 let make_sign_invariant func shape elt =
483 let arity, elt' = func shape elt in
484 arity, non_signed_variant elt'
486 (* Don't restrict any types. *)
488 let elts_same make_arity shape elt =
489 let vtype = type_for_elt shape elt in
490 make_arity vtype, elt
492 (* As sign_invar_*, but when sign matters. *)
493 let elts_same_io_lane =
494 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
496 let elts_same_io =
497 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
499 let elts_same_2_lane =
500 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
502 let elts_same_3 = elts_same_2_lane
504 let elts_same_2 =
505 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
507 let elts_same_1 =
508 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
510 (* Use for signed/unsigned invariant operations (i.e. where the operation
511 doesn't depend on the sign of the data. *)
513 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
514 let sign_invar_io = make_sign_invariant elts_same_io
515 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
516 let sign_invar_2 = make_sign_invariant elts_same_2
517 let sign_invar_1 = make_sign_invariant elts_same_1
519 (* Sign-sensitive comparison. *)
521 let cmp_sign_matters shape elt =
522 let vtype = type_for_elt shape elt
523 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
524 Arity2 (rtype, vtype 1, vtype 2), elt
526 (* Signed/unsigned invariant comparison. *)
528 let cmp_sign_invar shape elt =
529 let shape', elt' = cmp_sign_matters shape elt in
530 let elt'' =
531 match non_signed_variant elt' with
532 P8 -> I8
533 | x -> x
535 shape', elt''
537 (* Comparison (VTST) where only the element width matters. *)
539 let cmp_bits shape elt =
540 let vtype = type_for_elt shape elt
541 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
542 and bits_only = bits_of_elt elt in
543 Arity2 (rtype, vtype 1, vtype 2), bits_only
545 let reg_shift shape elt =
546 let vtype = type_for_elt shape elt
547 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
548 Arity2 (vtype 0, vtype 1, op2type), elt
550 (* Genericised constant-shift type-generating function. *)
552 let const_shift mkimm ?arity ?result shape elt =
553 let op2type = (shapemap shape 2) elt in
554 let op2width = elt_width op2type in
555 let op2 = mkimm op2width
556 and op1 = type_for_elt shape elt 1
557 and r_elt =
558 match result with
559 None -> elt
560 | Some restriction -> restriction elt in
561 let rtype = type_for_elt shape r_elt 0 in
562 match arity with
563 None -> Arity2 (rtype, op1, op2), elt
564 | Some mkarity -> mkarity rtype op1 op2, elt
566 (* Use for immediate right-shifts. *)
568 let shift_right shape elt =
569 const_shift (fun imm -> T_immediate (1, imm)) shape elt
571 let shift_right_acc shape elt =
572 const_shift (fun imm -> T_immediate (1, imm))
573 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
575 (* Use for immediate right-shifts when the operation doesn't care about
576 signedness. *)
578 let shift_right_sign_invar =
579 make_sign_invariant shift_right
581 (* Immediate right-shift; result is unsigned even when operand is signed. *)
583 let shift_right_to_uns shape elt =
584 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
585 shape elt
587 (* Immediate left-shift. *)
589 let shift_left shape elt =
590 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
592 (* Immediate left-shift, unsigned result. *)
594 let shift_left_to_uns shape elt =
595 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
596 shape elt
598 (* Immediate left-shift, don't care about signs. *)
600 let shift_left_sign_invar =
601 make_sign_invariant shift_left
603 (* Shift left/right and insert: only element size matters. *)
605 let shift_insert shape elt =
606 let arity, elt =
607 const_shift (fun imm -> T_immediate (1, imm))
608 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
609 arity, bits_of_elt elt
611 (* Get/set lane. *)
613 let get_lane shape elt =
614 let vtype = type_for_elt shape elt in
615 Arity2 (vtype 0, vtype 1, vtype 2),
616 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
618 let set_lane shape elt =
619 let vtype = type_for_elt shape elt in
620 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
622 let set_lane_notype shape elt =
623 let vtype = type_for_elt shape elt in
624 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
626 let create_vector shape elt =
627 let vtype = type_for_elt shape U64 1
628 and rtype = type_for_elt shape elt 0 in
629 Arity1 (rtype, vtype), elt
631 let conv make_arity shape elt =
632 let edest, esrc = match elt with
633 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
634 | _ -> failwith "Non-conversion element in conversion" in
635 let vtype = type_for_elt shape esrc
636 and rtype = type_for_elt shape edest 0 in
637 make_arity rtype vtype, elt
639 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
640 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
642 (* Operation has an unsigned result even if operands are signed. *)
644 let dst_unsign make_arity shape elt =
645 let vtype = type_for_elt shape elt
646 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
647 make_arity rtype vtype, elt
649 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
651 let make_bits_only func shape elt =
652 let arity, elt' = func shape elt in
653 arity, bits_of_elt elt'
655 (* Extend operation. *)
657 let extend shape elt =
658 let vtype = type_for_elt shape elt in
659 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
661 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
662 integer ops respectively, or unsigned for polynomial ops. *)
664 let table mkarity shape elt =
665 let vtype = type_for_elt shape elt in
666 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
667 mkarity vtype op2, bits_of_elt elt
669 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
670 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
672 (* Operations where only bits matter. *)
674 let bits_1 = make_bits_only elts_same_1
675 let bits_2 = make_bits_only elts_same_2
676 let bits_3 = make_bits_only elts_same_3
678 (* Store insns. *)
679 let store_1 shape elt =
680 let vtype = type_for_elt shape elt in
681 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
683 let store_3 shape elt =
684 let vtype = type_for_elt shape elt in
685 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
687 let make_notype func shape elt =
688 let arity, _ = func shape elt in
689 arity, NoElts
691 let notype_1 = make_notype elts_same_1
692 let notype_2 = make_notype elts_same_2
693 let notype_3 = make_notype elts_same_3
695 (* Bit-select operations (first operand is unsigned int). *)
697 let bit_select shape elt =
698 let vtype = type_for_elt shape elt
699 and itype = type_for_elt shape (unsigned_of_elt elt) in
700 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
702 (* Common lists of supported element types. *)
704 let s_8_32 = [S8; S16; S32]
705 let u_8_32 = [U8; U16; U32]
706 let su_8_32 = [S8; S16; S32; U8; U16; U32]
707 let su_8_64 = S64 :: U64 :: su_8_32
708 let su_16_64 = [S16; S32; S64; U16; U32; U64]
709 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
710 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
712 let ops =
714 (* Addition. *)
715 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32;
716 Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64];
717 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
718 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
719 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
720 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
721 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
722 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
723 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
724 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
725 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
726 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
727 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
728 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
729 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
730 Narrow, "vRaddhn", sign_invar_2, su_16_64;
732 (* Multiplication. *)
733 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
734 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
735 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
736 elts_same_2, [S16; S32];
737 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
738 elts_same_2, [S16; S32];
739 Vmul,
740 [Saturating; Rounding; Doubling; High_half;
741 Instruction_name ["vqrdmulh"]],
742 All (3, Dreg), "vqRdmulh",
743 elts_same_2, [S16; S32];
744 Vmul,
745 [Saturating; Rounding; Doubling; High_half;
746 Instruction_name ["vqrdmulh"]],
747 All (3, Qreg), "vqRdmulhQ",
748 elts_same_2, [S16; S32];
749 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
750 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
752 (* Multiply-accumulate. *)
753 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
754 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
755 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
756 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
758 (* Multiply-subtract. *)
759 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
760 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
761 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
762 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
764 (* Subtraction. *)
765 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
766 Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
767 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
768 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
769 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
770 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
771 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
772 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
773 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
774 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
775 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
776 Narrow, "vRsubhn", sign_invar_2, su_16_64;
778 (* Comparison, equal. *)
779 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
780 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
782 (* Comparison, greater-than or equal. *)
783 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
784 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
785 All (3, Dreg), "vcge", cmp_sign_matters,
786 u_8_32;
787 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
788 Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"],
789 All (3, Qreg), "vcgeQ", cmp_sign_matters,
790 u_8_32;
792 (* Comparison, less-than or equal. *)
793 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
794 F32 :: s_8_32;
795 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"],
796 All (3, Dreg), "vcle", cmp_sign_matters,
797 u_8_32;
798 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
799 All (3, Qreg), "vcleQ", cmp_sign_matters,
800 F32 :: s_8_32;
801 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
802 All (3, Qreg), "vcleQ", cmp_sign_matters,
803 u_8_32;
805 (* Comparison, greater-than. *)
806 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
807 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
808 All (3, Dreg), "vcgt", cmp_sign_matters,
809 u_8_32;
810 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
811 Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"],
812 All (3, Qreg), "vcgtQ", cmp_sign_matters,
813 u_8_32;
815 (* Comparison, less-than. *)
816 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
817 F32 :: s_8_32;
818 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"],
819 All (3, Dreg), "vclt", cmp_sign_matters,
820 u_8_32;
821 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
822 All (3, Qreg), "vcltQ", cmp_sign_matters,
823 F32 :: s_8_32;
824 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
825 All (3, Qreg), "vcltQ", cmp_sign_matters,
826 u_8_32;
828 (* Compare absolute greater-than or equal. *)
829 Vcage, [Instruction_name ["vacge"]],
830 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
831 Vcage, [Instruction_name ["vacge"]],
832 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
834 (* Compare absolute less-than or equal. *)
835 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
836 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
837 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
838 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
840 (* Compare absolute greater-than or equal. *)
841 Vcagt, [Instruction_name ["vacgt"]],
842 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
843 Vcagt, [Instruction_name ["vacgt"]],
844 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
846 (* Compare absolute less-than or equal. *)
847 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
848 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
849 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
850 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
852 (* Test bits. *)
853 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
854 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
856 (* Absolute difference. *)
857 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
858 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
859 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
861 (* Absolute difference and accumulate. *)
862 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
863 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
864 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
866 (* Max. *)
867 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
868 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
870 (* Min. *)
871 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
872 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
874 (* Pairwise add. *)
875 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
876 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
877 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
879 (* Pairwise add, widen and accumulate. *)
880 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
881 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
883 (* Folding maximum, minimum. *)
884 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
885 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
887 (* Reciprocal step. *)
888 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
889 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
890 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
891 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
893 (* Vector shift left. *)
894 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
895 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
896 Vshl, [Instruction_name ["vrshl"]; Rounding],
897 All (3, Dreg), "vRshl", reg_shift, su_8_64;
898 Vshl, [Instruction_name ["vrshl"]; Rounding],
899 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
900 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
901 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
902 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
903 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
904 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
905 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
907 (* Vector shift right by constant. *)
908 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
909 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
910 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
911 "vRshr_n", shift_right, su_8_64;
912 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
913 "vRshrQ_n", shift_right, su_8_64;
914 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
915 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
916 shift_right_sign_invar, su_16_64;
917 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
918 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
919 "vqRshrn_n", shift_right, su_16_64;
920 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
921 shift_right_to_uns, [S16; S32; S64];
922 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
923 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
925 (* Vector shift left by constant. *)
926 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
927 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
928 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
929 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
930 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
931 shift_left_to_uns, [S8; S16; S32; S64];
932 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
933 shift_left_to_uns, [S8; S16; S32; S64];
934 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
936 (* Vector shift right by constant and accumulate. *)
937 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
938 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
939 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
940 "vRsra_n", shift_right_acc, su_8_64;
941 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
942 "vRsraQ_n", shift_right_acc, su_8_64;
944 (* Vector shift right and insert. *)
945 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
946 P8 :: P16 :: su_8_64;
947 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
948 P8 :: P16 :: su_8_64;
950 (* Vector shift left and insert. *)
951 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
952 P8 :: P16 :: su_8_64;
953 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
954 P8 :: P16 :: su_8_64;
956 (* Absolute value. *)
957 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
958 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
959 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
960 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
962 (* Negate. *)
963 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
964 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
965 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
966 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
968 (* Bitwise not. *)
969 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
970 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
972 (* Count leading sign bits. *)
973 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
974 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
976 (* Count leading zeros. *)
977 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
978 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
980 (* Count number of set bits. *)
981 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
982 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
984 (* Reciprocal estimate. *)
985 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
986 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
988 (* Reciprocal square-root estimate. *)
989 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
990 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
992 (* Get lanes from a vector. *)
993 Vget_lane,
994 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
995 Instruction_name ["vmov"]],
996 Use_operands [| Corereg; Dreg; Immed |],
997 "vget_lane", get_lane, pf_su_8_32;
998 Vget_lane,
999 [No_op;
1000 InfoWord;
1001 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1002 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1003 Use_operands [| Corereg; Dreg; Immed |],
1004 "vget_lane", notype_2, [S64; U64];
1005 Vget_lane,
1006 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
1007 Instruction_name ["vmov"]],
1008 Use_operands [| Corereg; Qreg; Immed |],
1009 "vgetQ_lane", get_lane, pf_su_8_32;
1010 Vget_lane,
1011 [InfoWord;
1012 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
1013 Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0);
1014 Fixed_core_reg],
1015 Use_operands [| Corereg; Qreg; Immed |],
1016 "vgetQ_lane", notype_2, [S64; U64];
1018 (* Set lanes in a vector. *)
1019 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1020 Instruction_name ["vmov"]],
1021 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1022 set_lane, pf_su_8_32;
1023 Vset_lane, [No_op;
1024 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1025 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1026 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
1027 set_lane_notype, [S64; U64];
1028 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
1029 Instruction_name ["vmov"]],
1030 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1031 set_lane, pf_su_8_32;
1032 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
1033 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1034 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1035 set_lane_notype, [S64; U64];
1037 (* Create vector from literal bit pattern. *)
1038 Vcreate,
1039 [No_op], (* Not really, but it can yield various things that are too
1040 hard for the test generator at this time. *)
1041 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1042 pf_su_8_64;
1044 (* Set all lanes to the same value. *)
1045 Vdup_n,
1046 [Disassembles_as [Use_operands [| Dreg;
1047 Alternatives [ Corereg;
1048 Element_of_dreg ] |]]],
1049 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1050 pf_su_8_32;
1051 Vdup_n,
1052 [No_op;
1053 Instruction_name ["vmov"];
1054 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1055 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1056 [S64; U64];
1057 Vdup_n,
1058 [Disassembles_as [Use_operands [| Qreg;
1059 Alternatives [ Corereg;
1060 Element_of_dreg ] |]]],
1061 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1062 pf_su_8_32;
1063 Vdup_n,
1064 [No_op;
1065 Instruction_name ["vmov"];
1066 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1067 Use_operands [| Dreg; Corereg; Corereg |]]],
1068 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1069 [S64; U64];
1071 (* These are just aliases for the above. *)
1072 Vmov_n,
1073 [Builtin_name "vdup_n";
1074 Disassembles_as [Use_operands [| Dreg;
1075 Alternatives [ Corereg;
1076 Element_of_dreg ] |]]],
1077 Use_operands [| Dreg; Corereg |],
1078 "vmov_n", bits_1, pf_su_8_32;
1079 Vmov_n,
1080 [No_op;
1081 Builtin_name "vdup_n";
1082 Instruction_name ["vmov"];
1083 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1084 Use_operands [| Dreg; Corereg |],
1085 "vmov_n", notype_1, [S64; U64];
1086 Vmov_n,
1087 [Builtin_name "vdupQ_n";
1088 Disassembles_as [Use_operands [| Qreg;
1089 Alternatives [ Corereg;
1090 Element_of_dreg ] |]]],
1091 Use_operands [| Qreg; Corereg |],
1092 "vmovQ_n", bits_1, pf_su_8_32;
1093 Vmov_n,
1094 [No_op;
1095 Builtin_name "vdupQ_n";
1096 Instruction_name ["vmov"];
1097 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1098 Use_operands [| Dreg; Corereg; Corereg |]]],
1099 Use_operands [| Qreg; Corereg |],
1100 "vmovQ_n", notype_1, [S64; U64];
1102 (* Duplicate, lane version. We can't use Use_operands here because the
1103 rightmost register (always Dreg) would be picked up by find_key_operand,
1104 when we want the leftmost register to be used in this case (otherwise
1105 the modes are indistinguishable in neon.md, etc. *)
1106 Vdup_lane,
1107 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1108 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1109 Vdup_lane,
1110 [No_op; Const_valuator (fun _ -> 0)],
1111 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1112 Vdup_lane,
1113 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1114 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1115 Vdup_lane,
1116 [No_op; Const_valuator (fun _ -> 0)],
1117 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1119 (* Combining vectors. *)
1120 Vcombine, [No_op],
1121 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1122 pf_su_8_64;
1124 (* Splitting vectors. *)
1125 Vget_high, [No_op],
1126 Use_operands [| Dreg; Qreg |], "vget_high",
1127 notype_1, pf_su_8_64;
1128 Vget_low, [Instruction_name ["vmov"];
1129 Disassembles_as [Use_operands [| Dreg; Dreg |]];
1130 Fixed_vector_reg],
1131 Use_operands [| Dreg; Qreg |], "vget_low",
1132 notype_1, pf_su_8_32;
1133 Vget_low, [No_op],
1134 Use_operands [| Dreg; Qreg |], "vget_low",
1135 notype_1, [S64; U64];
1137 (* Conversions. *)
1138 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1139 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1140 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1141 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1142 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1143 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1144 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1145 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1147 (* Move, narrowing. *)
1148 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1149 Narrow, "vmovn", sign_invar_1, su_16_64;
1150 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1151 Narrow, "vqmovn", elts_same_1, su_16_64;
1152 Vmovn,
1153 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1154 Narrow, "vqmovun", dst_unsign_1,
1155 [S16; S32; S64];
1157 (* Move, long. *)
1158 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1159 Long, "vmovl", elts_same_1, su_8_32;
1161 (* Table lookup. *)
1162 Vtbl 1,
1163 [Instruction_name ["vtbl"];
1164 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1165 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1166 Vtbl 2, [Instruction_name ["vtbl"]],
1167 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1168 [U8; S8; P8];
1169 Vtbl 3, [Instruction_name ["vtbl"]],
1170 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1171 [U8; S8; P8];
1172 Vtbl 4, [Instruction_name ["vtbl"]],
1173 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1174 [U8; S8; P8];
1176 (* Extended table lookup. *)
1177 Vtbx 1,
1178 [Instruction_name ["vtbx"];
1179 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1180 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1181 Vtbx 2, [Instruction_name ["vtbx"]],
1182 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1183 [U8; S8; P8];
1184 Vtbx 3, [Instruction_name ["vtbx"]],
1185 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1186 [U8; S8; P8];
1187 Vtbx 4, [Instruction_name ["vtbx"]],
1188 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1189 [U8; S8; P8];
1191 (* Multiply, lane. (note: these were undocumented at the time of
1192 writing). *)
1193 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1194 [S16; S32; U16; U32; F32];
1195 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1196 [S16; S32; U16; U32; F32];
1198 (* Multiply-accumulate, lane. *)
1199 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1200 [S16; S32; U16; U32; F32];
1201 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1202 [S16; S32; U16; U32; F32];
1203 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1204 [S16; S32; U16; U32];
1205 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1206 elts_same_io_lane, [S16; S32];
1208 (* Multiply-subtract, lane. *)
1209 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1210 [S16; S32; U16; U32; F32];
1211 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1212 [S16; S32; U16; U32; F32];
1213 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1214 [S16; S32; U16; U32];
1215 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1216 elts_same_io_lane, [S16; S32];
1218 (* Long multiply, lane. *)
1219 Vmull_lane, [],
1220 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1222 (* Saturating doubling long multiply, lane. *)
1223 Vqdmull_lane, [Saturating; Doubling],
1224 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1226 (* Saturating doubling long multiply high, lane. *)
1227 Vqdmulh_lane, [Saturating; Halving],
1228 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1229 Vqdmulh_lane, [Saturating; Halving],
1230 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1231 Vqdmulh_lane, [Saturating; Halving; Rounding;
1232 Instruction_name ["vqrdmulh"]],
1233 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1234 Vqdmulh_lane, [Saturating; Halving; Rounding;
1235 Instruction_name ["vqrdmulh"]],
1236 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1238 (* Vector multiply by scalar. *)
1239 Vmul_n, [InfoWord;
1240 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1241 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1242 sign_invar_2, [S16; S32; U16; U32; F32];
1243 Vmul_n, [InfoWord;
1244 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1245 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1246 sign_invar_2, [S16; S32; U16; U32; F32];
1248 (* Vector long multiply by scalar. *)
1249 Vmull_n, [Instruction_name ["vmull"];
1250 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1251 Wide_scalar, "vmull_n",
1252 elts_same_2, [S16; S32; U16; U32];
1254 (* Vector saturating doubling long multiply by scalar. *)
1255 Vqdmull_n, [Saturating; Doubling;
1256 Disassembles_as [Use_operands [| Qreg; Dreg;
1257 Element_of_dreg |]]],
1258 Wide_scalar, "vqdmull_n",
1259 elts_same_2, [S16; S32];
1261 (* Vector saturating doubling long multiply high by scalar. *)
1262 Vqdmulh_n,
1263 [Saturating; Halving; InfoWord;
1264 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1265 Use_operands [| Qreg; Qreg; Corereg |],
1266 "vqdmulhQ_n", elts_same_2, [S16; S32];
1267 Vqdmulh_n,
1268 [Saturating; Halving; InfoWord;
1269 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1270 Use_operands [| Dreg; Dreg; Corereg |],
1271 "vqdmulh_n", elts_same_2, [S16; S32];
1272 Vqdmulh_n,
1273 [Saturating; Halving; Rounding; InfoWord;
1274 Instruction_name ["vqrdmulh"];
1275 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1276 Use_operands [| Qreg; Qreg; Corereg |],
1277 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1278 Vqdmulh_n,
1279 [Saturating; Halving; Rounding; InfoWord;
1280 Instruction_name ["vqrdmulh"];
1281 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1282 Use_operands [| Dreg; Dreg; Corereg |],
1283 "vqRdmulh_n", elts_same_2, [S16; S32];
1285 (* Vector multiply-accumulate by scalar. *)
1286 Vmla_n, [InfoWord;
1287 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1288 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1289 sign_invar_io, [S16; S32; U16; U32; F32];
1290 Vmla_n, [InfoWord;
1291 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1292 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1293 sign_invar_io, [S16; S32; U16; U32; F32];
1294 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1295 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1296 [S16; S32];
1298 (* Vector multiply subtract by scalar. *)
1299 Vmls_n, [InfoWord;
1300 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1301 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1302 sign_invar_io, [S16; S32; U16; U32; F32];
1303 Vmls_n, [InfoWord;
1304 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1305 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1306 sign_invar_io, [S16; S32; U16; U32; F32];
1307 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1308 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1309 [S16; S32];
1311 (* Vector extract. *)
1312 Vext, [Const_valuator (fun _ -> 0)],
1313 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1314 pf_su_8_64;
1315 Vext, [Const_valuator (fun _ -> 0)],
1316 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1317 pf_su_8_64;
1319 (* Reverse elements. *)
1320 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1321 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1322 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1323 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1324 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1325 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1327 (* Bit selection. *)
1328 Vbsl,
1329 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1330 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1331 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1332 pf_su_8_64;
1333 Vbsl,
1334 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1335 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1336 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1337 pf_su_8_64;
1339 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1340 generating good code for intrinsics which return structure types --
1341 builtins work well by themselves (and understand that the values being
1342 stored on e.g. the stack also reside in registers, so can optimise the
1343 stores away entirely if the results are used immediately), but
1344 intrinsics are very much less efficient. Maybe something can be improved
1345 re: inlining, or tweaking the ABI used for intrinsics (a special call
1346 attribute?).
1348 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1349 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1351 (* Zip elements. *)
1352 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1353 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1355 (* Unzip elements. *)
1356 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1357 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1359 (* Element/structure loads. VLD1 variants. *)
1360 Vldx 1,
1361 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1362 CstPtrTo Corereg |]]],
1363 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1364 pf_su_8_64;
1365 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1366 CstPtrTo Corereg |]]],
1367 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1368 pf_su_8_64;
1370 Vldx_lane 1,
1371 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1372 CstPtrTo Corereg |]]],
1373 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1374 "vld1_lane", bits_3, pf_su_8_32;
1375 Vldx_lane 1,
1376 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1377 CstPtrTo Corereg |]];
1378 Const_valuator (fun _ -> 0)],
1379 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1380 "vld1_lane", bits_3, [S64; U64];
1381 Vldx_lane 1,
1382 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1383 CstPtrTo Corereg |]]],
1384 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1385 "vld1Q_lane", bits_3, pf_su_8_32;
1386 Vldx_lane 1,
1387 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1388 CstPtrTo Corereg |]]],
1389 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1390 "vld1Q_lane", bits_3, [S64; U64];
1392 Vldx_dup 1,
1393 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1394 CstPtrTo Corereg |]]],
1395 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1396 bits_1, pf_su_8_32;
1397 Vldx_dup 1,
1398 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1399 CstPtrTo Corereg |]]],
1400 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1401 bits_1, [S64; U64];
1402 Vldx_dup 1,
1403 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1404 CstPtrTo Corereg |]]],
1405 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1406 bits_1, pf_su_8_32;
1407 Vldx_dup 1,
1408 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1409 CstPtrTo Corereg |]]],
1410 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1411 bits_1, [S64; U64];
1413 (* VST1 variants. *)
1414 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1415 PtrTo Corereg |]]],
1416 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1417 store_1, pf_su_8_64;
1418 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1419 PtrTo Corereg |]]],
1420 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1421 store_1, pf_su_8_64;
1423 Vstx_lane 1,
1424 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1425 CstPtrTo Corereg |]]],
1426 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1427 "vst1_lane", store_3, pf_su_8_32;
1428 Vstx_lane 1,
1429 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1430 CstPtrTo Corereg |]];
1431 Const_valuator (fun _ -> 0)],
1432 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1433 "vst1_lane", store_3, [U64; S64];
1434 Vstx_lane 1,
1435 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1436 CstPtrTo Corereg |]]],
1437 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1438 "vst1Q_lane", store_3, pf_su_8_32;
1439 Vstx_lane 1,
1440 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1441 CstPtrTo Corereg |]]],
1442 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1443 "vst1Q_lane", store_3, [U64; S64];
1445 (* VLD2 variants. *)
1446 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1447 "vld2", bits_1, pf_su_8_32;
1448 Vldx 2, [Instruction_name ["vld1"]],
1449 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1450 "vld2", bits_1, [S64; U64];
1451 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1452 CstPtrTo Corereg |];
1453 Use_operands [| VecArray (2, Dreg);
1454 CstPtrTo Corereg |]]],
1455 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1456 "vld2Q", bits_1, pf_su_8_32;
1458 Vldx_lane 2,
1459 [Disassembles_as [Use_operands
1460 [| VecArray (2, Element_of_dreg);
1461 CstPtrTo Corereg |]]],
1462 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1463 VecArray (2, Dreg); Immed |],
1464 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1465 Vldx_lane 2,
1466 [Disassembles_as [Use_operands
1467 [| VecArray (2, Element_of_dreg);
1468 CstPtrTo Corereg |]]],
1469 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1470 VecArray (2, Qreg); Immed |],
1471 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1473 Vldx_dup 2,
1474 [Disassembles_as [Use_operands
1475 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1476 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1477 "vld2_dup", bits_1, pf_su_8_32;
1478 Vldx_dup 2,
1479 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1480 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1481 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1482 "vld2_dup", bits_1, [S64; U64];
1484 (* VST2 variants. *)
1485 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1486 PtrTo Corereg |]]],
1487 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1488 store_1, pf_su_8_32;
1489 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1490 PtrTo Corereg |]];
1491 Instruction_name ["vst1"]],
1492 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1493 store_1, [S64; U64];
1494 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1495 PtrTo Corereg |];
1496 Use_operands [| VecArray (2, Dreg);
1497 PtrTo Corereg |]]],
1498 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1499 store_1, pf_su_8_32;
1501 Vstx_lane 2,
1502 [Disassembles_as [Use_operands
1503 [| VecArray (2, Element_of_dreg);
1504 CstPtrTo Corereg |]]],
1505 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1506 store_3, P8 :: P16 :: F32 :: su_8_32;
1507 Vstx_lane 2,
1508 [Disassembles_as [Use_operands
1509 [| VecArray (2, Element_of_dreg);
1510 CstPtrTo Corereg |]]],
1511 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1512 store_3, [P16; F32; U16; U32; S16; S32];
1514 (* VLD3 variants. *)
1515 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1516 "vld3", bits_1, pf_su_8_32;
1517 Vldx 3, [Instruction_name ["vld1"]],
1518 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1519 "vld3", bits_1, [S64; U64];
1520 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1521 CstPtrTo Corereg |];
1522 Use_operands [| VecArray (3, Dreg);
1523 CstPtrTo Corereg |]]],
1524 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1525 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1527 Vldx_lane 3,
1528 [Disassembles_as [Use_operands
1529 [| VecArray (3, Element_of_dreg);
1530 CstPtrTo Corereg |]]],
1531 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1532 VecArray (3, Dreg); Immed |],
1533 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1534 Vldx_lane 3,
1535 [Disassembles_as [Use_operands
1536 [| VecArray (3, Element_of_dreg);
1537 CstPtrTo Corereg |]]],
1538 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1539 VecArray (3, Qreg); Immed |],
1540 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1542 Vldx_dup 3,
1543 [Disassembles_as [Use_operands
1544 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1545 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1546 "vld3_dup", bits_1, pf_su_8_32;
1547 Vldx_dup 3,
1548 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1549 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1550 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1551 "vld3_dup", bits_1, [S64; U64];
1553 (* VST3 variants. *)
1554 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1555 PtrTo Corereg |]]],
1556 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1557 store_1, pf_su_8_32;
1558 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1559 PtrTo Corereg |]];
1560 Instruction_name ["vst1"]],
1561 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1562 store_1, [S64; U64];
1563 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1564 PtrTo Corereg |];
1565 Use_operands [| VecArray (3, Dreg);
1566 PtrTo Corereg |]]],
1567 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1568 store_1, pf_su_8_32;
1570 Vstx_lane 3,
1571 [Disassembles_as [Use_operands
1572 [| VecArray (3, Element_of_dreg);
1573 CstPtrTo Corereg |]]],
1574 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1575 store_3, P8 :: P16 :: F32 :: su_8_32;
1576 Vstx_lane 3,
1577 [Disassembles_as [Use_operands
1578 [| VecArray (3, Element_of_dreg);
1579 CstPtrTo Corereg |]]],
1580 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1581 store_3, [P16; F32; U16; U32; S16; S32];
1583 (* VLD4/VST4 variants. *)
1584 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1585 "vld4", bits_1, pf_su_8_32;
1586 Vldx 4, [Instruction_name ["vld1"]],
1587 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1588 "vld4", bits_1, [S64; U64];
1589 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1590 CstPtrTo Corereg |];
1591 Use_operands [| VecArray (4, Dreg);
1592 CstPtrTo Corereg |]]],
1593 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1594 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1596 Vldx_lane 4,
1597 [Disassembles_as [Use_operands
1598 [| VecArray (4, Element_of_dreg);
1599 CstPtrTo Corereg |]]],
1600 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1601 VecArray (4, Dreg); Immed |],
1602 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1603 Vldx_lane 4,
1604 [Disassembles_as [Use_operands
1605 [| VecArray (4, Element_of_dreg);
1606 CstPtrTo Corereg |]]],
1607 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1608 VecArray (4, Qreg); Immed |],
1609 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1611 Vldx_dup 4,
1612 [Disassembles_as [Use_operands
1613 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1614 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1615 "vld4_dup", bits_1, pf_su_8_32;
1616 Vldx_dup 4,
1617 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1618 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1619 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1620 "vld4_dup", bits_1, [S64; U64];
1622 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1623 PtrTo Corereg |]]],
1624 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1625 store_1, pf_su_8_32;
1626 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1627 PtrTo Corereg |]];
1628 Instruction_name ["vst1"]],
1629 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1630 store_1, [S64; U64];
1631 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1632 PtrTo Corereg |];
1633 Use_operands [| VecArray (4, Dreg);
1634 PtrTo Corereg |]]],
1635 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1636 store_1, pf_su_8_32;
1638 Vstx_lane 4,
1639 [Disassembles_as [Use_operands
1640 [| VecArray (4, Element_of_dreg);
1641 CstPtrTo Corereg |]]],
1642 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1643 store_3, P8 :: P16 :: F32 :: su_8_32;
1644 Vstx_lane 4,
1645 [Disassembles_as [Use_operands
1646 [| VecArray (4, Element_of_dreg);
1647 CstPtrTo Corereg |]]],
1648 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1649 store_3, [P16; F32; U16; U32; S16; S32];
1651 (* Logical operations. And. *)
1652 Vand, [], All (3, Dreg), "vand", notype_2, su_8_32;
1653 Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64];
1654 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1656 (* Or. *)
1657 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32;
1658 Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64];
1659 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1661 (* Eor. *)
1662 Veor, [], All (3, Dreg), "veor", notype_2, su_8_32;
1663 Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64];
1664 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1666 (* Bic (And-not). *)
1667 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_32;
1668 Vbic, [No_op], All (3, Dreg), "vbic", notype_2, [S64; U64];
1669 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1671 (* Or-not. *)
1672 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_32;
1673 Vorn, [No_op], All (3, Dreg), "vorn", notype_2, [S64; U64];
1674 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1677 let reinterp =
1678 let elems = P8 :: P16 :: F32 :: su_8_64 in
1679 List.fold_right
1680 (fun convto acc ->
1681 let types = List.fold_right
1682 (fun convfrom acc ->
1683 if convfrom <> convto then
1684 Cast (convto, convfrom) :: acc
1685 else
1686 acc)
1687 elems
1690 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1691 "vreinterpret", conv_1, types
1692 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1693 "vreinterpretQ", conv_1, types in
1694 dconv :: qconv :: acc)
1695 elems
1698 (* Output routines. *)
1700 let rec string_of_elt = function
1701 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1702 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1703 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1704 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1705 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1706 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1707 | NoElts -> failwith "No elts"
1709 let string_of_elt_dots elt =
1710 match elt with
1711 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1712 | _ -> string_of_elt elt
1714 let string_of_vectype vt =
1715 let rec name affix = function
1716 T_int8x8 -> affix "int8x8"
1717 | T_int8x16 -> affix "int8x16"
1718 | T_int16x4 -> affix "int16x4"
1719 | T_int16x8 -> affix "int16x8"
1720 | T_int32x2 -> affix "int32x2"
1721 | T_int32x4 -> affix "int32x4"
1722 | T_int64x1 -> affix "int64x1"
1723 | T_int64x2 -> affix "int64x2"
1724 | T_uint8x8 -> affix "uint8x8"
1725 | T_uint8x16 -> affix "uint8x16"
1726 | T_uint16x4 -> affix "uint16x4"
1727 | T_uint16x8 -> affix "uint16x8"
1728 | T_uint32x2 -> affix "uint32x2"
1729 | T_uint32x4 -> affix "uint32x4"
1730 | T_uint64x1 -> affix "uint64x1"
1731 | T_uint64x2 -> affix "uint64x2"
1732 | T_float32x2 -> affix "float32x2"
1733 | T_float32x4 -> affix "float32x4"
1734 | T_poly8x8 -> affix "poly8x8"
1735 | T_poly8x16 -> affix "poly8x16"
1736 | T_poly16x4 -> affix "poly16x4"
1737 | T_poly16x8 -> affix "poly16x8"
1738 | T_int8 -> affix "int8"
1739 | T_int16 -> affix "int16"
1740 | T_int32 -> affix "int32"
1741 | T_int64 -> affix "int64"
1742 | T_uint8 -> affix "uint8"
1743 | T_uint16 -> affix "uint16"
1744 | T_uint32 -> affix "uint32"
1745 | T_uint64 -> affix "uint64"
1746 | T_poly8 -> affix "poly8"
1747 | T_poly16 -> affix "poly16"
1748 | T_float32 -> affix "float32"
1749 | T_immediate _ -> "const int"
1750 | T_void -> "void"
1751 | T_intQI -> "__builtin_neon_qi"
1752 | T_intHI -> "__builtin_neon_hi"
1753 | T_intSI -> "__builtin_neon_si"
1754 | T_intDI -> "__builtin_neon_di"
1755 | T_floatSF -> "__builtin_neon_sf"
1756 | T_arrayof (num, base) ->
1757 let basename = name (fun x -> x) base in
1758 affix (Printf.sprintf "%sx%d" basename num)
1759 | T_ptrto x ->
1760 let basename = name affix x in
1761 Printf.sprintf "%s *" basename
1762 | T_const x ->
1763 let basename = name affix x in
1764 Printf.sprintf "const %s" basename
1766 name (fun x -> x ^ "_t") vt
1768 let string_of_inttype = function
1769 B_TImode -> "__builtin_neon_ti"
1770 | B_EImode -> "__builtin_neon_ei"
1771 | B_OImode -> "__builtin_neon_oi"
1772 | B_CImode -> "__builtin_neon_ci"
1773 | B_XImode -> "__builtin_neon_xi"
1775 let string_of_mode = function
1776 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1777 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1778 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1779 | SF -> "sf"
1781 (* Use uppercase chars for letters which form part of the intrinsic name, but
1782 should be omitted from the builtin name (the info is passed in an extra
1783 argument, instead). *)
1784 let intrinsic_name name = String.lowercase name
1786 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1787 found in the features list. *)
1788 let builtin_name features name =
1789 let name = List.fold_right
1790 (fun el name ->
1791 match el with
1792 Flipped x | Builtin_name x -> x
1793 | _ -> name)
1794 features name in
1795 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1796 and buf = Buffer.create (String.length name) in
1797 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1798 Buffer.contents buf
1800 (* Transform an arity into a list of strings. *)
1801 let strings_of_arity a =
1802 match a with
1803 | Arity0 vt -> [string_of_vectype vt]
1804 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1805 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1806 string_of_vectype vt2;
1807 string_of_vectype vt3]
1808 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1809 string_of_vectype vt2;
1810 string_of_vectype vt3;
1811 string_of_vectype vt4]
1812 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1813 string_of_vectype vt2;
1814 string_of_vectype vt3;
1815 string_of_vectype vt4;
1816 string_of_vectype vt5]
1818 (* Suffixes on the end of builtin names that are to be stripped in order
1819 to obtain the name used as an instruction. They are only stripped if
1820 preceded immediately by an underscore. *)
1821 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1823 (* Get the possible names of an instruction corresponding to a "name" from the
1824 ops table. This is done by getting the equivalent builtin name and
1825 stripping any suffixes from the list at the top of this file, unless
1826 the features list presents with an Instruction_name entry, in which
1827 case that is used; or unless the features list presents with a Flipped
1828 entry, in which case that is used. If both such entries are present,
1829 the first in the list will be chosen. *)
1830 let get_insn_names features name =
1831 let names = try
1832 begin
1833 match List.find (fun feature -> match feature with
1834 Instruction_name _ -> true
1835 | Flipped _ -> true
1836 | _ -> false) features
1837 with
1838 Instruction_name names -> names
1839 | Flipped name -> [name]
1840 | _ -> assert false
1842 with Not_found -> [builtin_name features name]
1844 begin
1845 List.map (fun name' ->
1847 let underscore = String.rindex name' '_' in
1848 let our_suffix = String.sub name' (underscore + 1)
1849 ((String.length name') - underscore - 1)
1851 let rec strip remaining_suffixes =
1852 match remaining_suffixes with
1853 [] -> name'
1854 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1855 | _::ss -> strip ss
1857 strip suffixes_to_strip
1858 with (Not_found | Invalid_argument _) -> name') names
1861 (* Apply a function to each element of a list and then comma-separate
1862 the resulting strings. *)
1863 let rec commas f elts acc =
1864 match elts with
1865 [] -> acc
1866 | [elt] -> acc ^ (f elt)
1867 | elt::elts ->
1868 commas f elts (acc ^ (f elt) ^ ", ")
1870 (* Given a list of features and the shape specified in the "ops" table, apply
1871 a function to each possible shape that the instruction may have.
1872 By default, this is the "shape" entry in "ops". If the features list
1873 contains a Disassembles_as entry, the shapes contained in that entry are
1874 mapped to corresponding outputs and returned in a list. If there is more
1875 than one Disassembles_as entry, only the first is used. *)
1876 let analyze_all_shapes features shape f =
1878 match List.find (fun feature ->
1879 match feature with Disassembles_as _ -> true
1880 | _ -> false)
1881 features with
1882 Disassembles_as shapes -> List.map f shapes
1883 | _ -> assert false
1884 with Not_found -> [f shape]